In [3]:
import os
import pandas as pd
import numpy as np
from tqdm import tqdm

In [64]:
base_url = './daic/data/'

In [52]:
def convert_txt_to_csv(base_url, feature):
    pids = [f for f in os.listdir(base_url) if len(f) == 3]
    for pid in tqdm(pids, desc='Converting txt to csv', ascii="░▒█"):
        txt_file_path = f'{base_url}{pid}/{pid}_CLNF_{feature}.txt'
        
        if os.path.isfile(txt_file_path):
            try:
                df = pd.read_csv(txt_file_path, delimiter=',', low_memory=False)
                df.iloc[:, 4:] = df.iloc[:, 4:].astype(np.float32)
                
                csv_file_path = f'{base_url}{pid}/{pid}_CLNF_{feature}.csv'
                
                df.to_csv(csv_file_path, index=False)
                
                os.remove(txt_file_path)
            except Exception as e:
                print(f"Failed to convert {txt_file_path}. Error: {str(e)}")
        else:
            print(f"File {txt_file_path} does not exist.")

In [58]:
def remove_unsucc(input_dir, feature):
    pids = [f for f in os.listdir(input_dir) if len(f) == 3]

    for pid in tqdm(pids, desc="Remove unsuccessful frames", ascii="░▒█"):
        file_path = f'{input_dir}{pid}/{pid}_CLNF_{feature}.csv'
        df = pd.read_csv(file_path)
        df = df[df[' success'] == 1]
        df.to_csv(file_path, index=False)

In [74]:
remove_unsucc(base_url, 'features')
remove_unsucc(base_url, 'AUs')
remove_unsucc(base_url, 'gaze')

Remove unsuccessful frames:   0%|░░░░░░░░░░| 0/189 [00:00<?, ?it/s]

Remove unsuccessful frames: 100%|██████████| 189/189 [20:42<00:00,  6.57s/it]
Remove unsuccessful frames: 100%|██████████| 189/189 [01:49<00:00,  1.72it/s]
Remove unsuccessful frames: 100%|██████████| 189/189 [02:03<00:00,  1.52it/s]


In [111]:
def calculate_global_reference(input_dir):
    all_nosetip_x = []
    all_nosetip_y = []

    pids = [f for f in os.listdir(input_dir) if len(f) == 3]

    # Iterate over each file to collect nosetip data
    for folder_name in tqdm(pids, desc="Calculating global nosetip", ascii="░▒█"):
        folder_path = input_dir + folder_name + '/'
        df = pd.read_csv(f'{folder_path}{folder_name}_CLNF_features.csv')

        all_nosetip_x.extend(df[' x33'].values)
        all_nosetip_y.extend(df[' y33'].values)

    # Calculate the global mean or median of the nosetip coordinates
    global_nosetip_x = np.mean(all_nosetip_x)
    global_nosetip_y = np.mean(all_nosetip_y)

    return global_nosetip_x, global_nosetip_y

In [112]:
glob_nt_x, glob_nt_y = calculate_global_reference(base_url)

Calculating global nosetip: 100%|██████████| 189/189 [03:03<00:00,  1.03it/s]


In [115]:
def calculate_global_min_max(base_url, feature):
    global_min = np.inf
    global_max = -np.inf

    pids = [f for f in os.listdir(base_url) if len(f) == 3]

    for folder_name in tqdm(pids, desc="Calculating global min/max", ascii="░▒█"):
        file_path = f'{base_url}{folder_name}/{folder_name}_CLNF_{feature}.csv'
        
        if os.path.isfile(file_path):
            df = pd.read_csv(file_path, delimiter=',')
            global_x_min = min(global_min, df.iloc[:, 4:72].min().min())
            global_x_max = max(global_max, df.iloc[:, 4:72].max().max())
            global_y_min = min(global_min, df.iloc[:, 72:].min().min())
            global_y_max = max(global_max, df.iloc[:, 72:].max().max())
    
    return global_x_min, global_x_max, global_y_min, global_y_max

In [116]:
glob_x_min, glob_x_max, glob_y_min, glob_y_max = calculate_global_min_max(base_url, 'features')

Calculating global min/max: 100%|██████████| 189/189 [03:06<00:00,  1.01it/s]


In [120]:
# glob_x_min
# glob_x_max
# glob_y_min
# glob_y_max

1017.3300170898438

In [121]:
def normalize_landmarks(df, glob_nt_x, glob_nt_y, glob_x_min, glob_x_max, glob_y_min, glob_y_max):

    current_nt_x = df[' x33'].values
    current_nt_y = df[' y33'].values

    df.iloc[:, 4:72] = df.iloc[:, 4:72].subtract(current_nt_x, axis=0)
    df.iloc[:, 72:] = df.iloc[:, 72:].subtract(current_nt_y, axis=0)

    df.iloc[:, 4:72] = df.iloc[:, 4:72].add(glob_nt_x, axis=0)
    df.iloc[:, 72:] = df.iloc[:, 72:].add(glob_nt_y, axis=0)

    df.iloc[:, 4:72] = (df.iloc[:, 4:72] - glob_x_min) / (glob_x_max - glob_x_min)
    df.iloc[:, 72:] = (df.iloc[:, 72:] - glob_y_min) / (glob_y_max - glob_y_min)

    return df

In [133]:
# pid = '367'
# test_df = pd.read_csv(f'./daic/data/{pid}/{pid}_CLNF_features.csv')
# normalize_landmarks(test_df, glob_nt_x, glob_nt_y, glob_x_min, glob_x_max, glob_y_min, glob_y_max)[' x33']


In [134]:
pids = [f for f in os.listdir(base_url) if len(f) == 3]
for pid in tqdm(pids, desc='Normalising landmarks feature'):
    file_path = f'{base_url}{pid}/{pid}_CLNF_features.csv'
    df = pd.read_csv(file_path)
    normalize_landmarks(df, glob_nt_x, glob_nt_y, glob_x_min, glob_x_max, glob_y_min, glob_y_max).to_csv(file_path, index=False) # type: ignore

Normalising landmarks feature: 100%|██████████| 189/189 [21:43<00:00,  6.90s/it]
