In [1]:
import pandas as pd
import numpy as np
import glob
import os

In [2]:
path = "/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/"

# List all the .csv files in the folder
mix_emo_files = glob.glob(path + "/*.csv")

mix_emo_dfs = []
num_deleted = 0

for filename in mix_emo_files:
    df = pd.read_csv(filename, index_col=None, header=0)
    

    # check the ratio of rows with confidence >= 0.98 to the total number of rows
    if len(df[df["confidence"]<0.98])/len(df) >= 0.15:
        # print the filename and delete the df variable
        print(filename)
        del df
        num_deleted += 1
    else:
        # select confidence >= 0.98 & success = 1
        df = df.loc[(df["confidence"]>= 0.98) & (df["success"]== 1 ) ] 
        # drop columns that does not contain facial feature information
        df = df.drop(columns=["confidence","success","frame", "face_id", "timestamp"])
        
        # drop binary AU features
        cols_to_drop = df.filter(regex='AU.*_c').columns

        
        df = df.drop(columns=cols_to_drop)

        df = df.fillna(method='ffill') # forward fill missing values
        df = df.fillna(method='bfill') # backward fill missing values
            
        mix_emo_dfs.append(df)
        
print(f"{num_deleted} dataframes were deleted.")

print(len(mix_emo_dfs))

/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A404_mix_ang_fea_5050.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A411_mix_fea_hap_3070.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A417_mix_fea_hap_7030.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A220_mix_disg_fea_5050.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A220_mix_fea_sad_5050.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A427_mix_ang_sad_7030.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A410_mix_disg_sad_7030.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A413_mix_disg_fea_7030.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A64_mix_ang_fea_5050.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A408_mix_disg_sad_5050.csv
/Users/jiachenyao/Desktop/Thesis/OpenFace_data/Mixed_emotions/A410_mix_ang_hap_7030.csv
/Users/jiachenyao/Desktop/The

In [3]:
#Extract functional features
mix_emo_functional_features_results = []

for df, file in zip(mix_emo_dfs, mix_emo_files):

    # Extract filename from filepath
    filename = os.path.basename(file)
    #print(filename)
    mean = df.mean()
    cv = df.std() / df.mean()
    percentile_20 = df.quantile(0.2)
    percentile_50 = df.quantile(0.5)
    percentile_80 = df.quantile(0.8)
    range_20_80 = df.quantile(0.8) - df.quantile(0.2)
    

    result = pd.concat([mean, cv, percentile_20, percentile_50, percentile_80, range_20_80],axis=1).T
    #result.index = ['Mean', 'Coefficient of Variation', '20th Percentile', '50th Percentile', '80th Percentile', 'Percentile Range']

    # Rename columns with filename as prefix
    prefix = filename[:-4] # remove ".csv" extension
    prefix = prefix.split('_', 1)[1]
    
    result.index = [f"{prefix}", f"{prefix}", f"{prefix}", f"{prefix}", f"{prefix}", f"{prefix}"]


    mix_emo_functional_features_results.append(result)
print(len(mix_emo_functional_features_results))

839


In [4]:
# no ratio
mix_emo_noratio_results = mix_emo_functional_features_results.copy()

In [5]:
for df in mix_emo_noratio_results:
    new_index = df.index.str.rsplit('_', n=1).str[0]

    # Update the index of the DataFrame
    df.index = new_index


In [6]:
# Concatenate all the results
mix_emo_noratio_output = pd.concat(mix_emo_noratio_results)

mix_emo_noratio_output = mix_emo_noratio_output.reset_index().rename(columns={'index': 'emotion'})

# replace all the NaN with 0
mix_emo_noratio_output.fillna(0, inplace=True)

In [7]:
# Normalize the data
cols_to_norm = mix_emo_noratio_output.columns[1:]

# z score normalization
#mix_emo_noratio_output[cols_to_norm] = mix_emo_noratio_output[cols_to_norm].apply(lambda x: (x - x.mean()) / x.std())

# min-max normalization
mix_emo_noratio_output[cols_to_norm] = mix_emo_noratio_output[cols_to_norm].apply(lambda x: (x - x.min()) / (x.max() - x.min()))


In [9]:
mix_emo_noratio_notime_6_output = mix_emo_noratio_output.copy()

In [10]:
mix_emo_noratio_notime_6_output.to_csv("/Users/jiachenyao/Desktop/Thesis/Preprocessed_data/Mixed_emotions_noratio_notime_6.csv", index=False)