In [268]:
import csv
import os

import math
import numpy as np
import re

import pandas as pd
import matplotlib
import matplotlib.pyplot as plt
import seaborn as sns

import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

from scipy.stats import ttest_rel
from sklearn.decomposition import PCA

# Load Data

In [199]:
# Frames
data_original = {}

for (root, dirs, file) in os.walk("../Model_Input/all_features/"):
    for f in file:
        if ".csv" in f:
            path = root + "/" + f
            df = pd.read_csv(path, index_col=[0,1,2])
            # Remove "Unnamed" columns
            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
            data_original[f] = df

In [200]:
# Parted Video data
data_parted = {}

for (root, dirs, file) in os.walk("../Model_Input/parted_video/"):
    for f in file:
        if ".csv" in f:
            path = root + "/" + f
            df = pd.read_csv(path)
            df.columns.values[0] = 'part'
            df.set_index('part', inplace=True)
            data_parted[f] = df

print(data_parted.keys())

dict_keys(['libreface_stats_parted.csv', 'me_graph_stats_parted.csv', 'openframe_stats_parted.csv', 'libreface_stats_parted-checkpoint.csv'])


In [201]:
# get ASC / NT info
participant_info_df = pd.read_csv("../Model_Input/participant_info.csv")

class_NT_df = participant_info_df[participant_info_df['ASC'] == 0]
class_ASC_df = participant_info_df[participant_info_df['ASC'] == 1]

NT_list = class_NT_df['id'].tolist()
ASC_list = class_ASC_df['id'].tolist()

NT_list = [f"{s}_concat.csv" for s in NT_list]
ASC_list = [f"{s}_concat.csv" for s in ASC_list]

### Frames in Parts

In [202]:
stamp_overview = pd.read_csv("../Model_Input/open_face_features_timestamps.csv", sep = ';')
new_part_rows = stamp_overview[stamp_overview['speaker'] != stamp_overview['speaker'].shift(1)]
print(new_part_rows)

frame_stamps = {
    "neutral_actress": [0, 1000],
    "neutral_participant": [1001, 1650],
    "joy_actress": [1651, 2425],
    "joy_participant": [2426, 3075],
    "disgust_actress": [3075, 3900],
    "disgust_participant": [3901, 4803]
}

time_stamps = {
    "neutral_actress": 40.0,
    "neutral_participant": 66.00,
    "joy_actress": 97.0,
    "joy_participant": 123.0,
    "disgust_actress": 156.0,
    "disgust_participant": 192.08
}

      frames  timestamps     part      speaker
0          1        0.01  neutral      actress
1000    1001       40.00  neutral  participant
1650    1651       66.00      joy      actress
2425    2426       97.00      joy  participant
3075    3076      123.00  disgust      actress
3900    3901      156.00  disgust  participant


In [220]:
def rename_AU_features(dataframe):
    # Function to rename columns
    def rename_columns(col):
        match = re.match(r"(.*_)(AU)(\d+)(.*)?", col)
        if match:
            return f"{match.group(2)}_{int(match.group(3))}"
        return col

    dataframe.columns = [rename_columns(col) for col in dataframe.columns]
    return dataframe

    # rename to AU_xx only
    #mean.index = mean.index.str.replace(r'me_graph_|openface_|libreface_', '', regex=True)
    #std.index = std.index.str.replace(r'me_graph_|openface_|libreface_', '', regex=True) # will result in i.e. "AU2": 0.08
    #mean = rename_keys(mean)
    #std = rename_keys(std)

# Function to create sub-groups for each part of the video
def create_sub_groups(df):
    sub_groups = {}
    previous_time = 0.0
    for label, time in time_stamps.items():
        sub_group = df[(df['timestamp'] >= previous_time) & (df['timestamp'] <= time)]
        sub_groups[label] = sub_group
        previous_time = time
    return sub_groups

# Function to create sub-groups for each part of the video
def create_parted_dataframe(dataframes):
    sub_groups = {}
    previous_time = 0.0
    for label, time in time_stamps.items():
        participants = {}
        for participant, df in dataframes.items():
            sub_group = df[(df['timestamp'] >= previous_time) & (df['timestamp'] <= time)]
            sub_group = rename_AU_features(sub_group)
            participants[participant] = sub_group
        sub_groups[label] = participants
        previous_time = time
    return sub_groups

def calculate_average_over_participants(dict_part):
    
    
    mean = pd.concat(dict_part).mean()
    mean = mean.drop(labels="timestamp")
    std = pd.concat(dict_part).std()
    std = std.drop(labels="timestamp")
    
    return {"mean":mean, "std":std}

def create_average_by_parts(dictionary):
    # must have video-parts outside and participant-id inside
    averages = {}
    averages_nt = {}
    averages_asc = {}
    for part in dictionary.keys():
        averages[part] = calculate_average_over_participants(dictionary[part])
        # for NT only
        nt_dict = {key: dictionary[part][key] for key in NT_list}
        averages_nt[part] = calculate_average_over_participants(nt_dict)
        # for ASC only
        asc_dict = {key: dictionary[part][key] for key in ASC_list}
        averages_asc[part] = calculate_average_over_participants(asc_dict)
    return averages, averages_nt, averages_asc


In [221]:
# Create a copy of the DataFrame with only columns containing "openface_"
data_of_binary = {}
for key, df in data_original.items():
    data_of_binary[key] = df.loc[:, df.columns.str.contains(r'openface_.*_c|timestamp')].copy()    
parted_of_binary = create_parted_dataframe(data_of_binary)

data_of_intensity = {}
for key, df in data_original.items():
    data_of_intensity[key] = df.loc[:, df.columns.str.contains(r'openface_.*_r|timestamp')].copy()
parted_of_intensity = create_parted_dataframe(data_of_intensity)

In [223]:
# Create a copy of the DataFrame with only columns containing "libreface_"
data_lf_binary = {}
for key, df in data_original.items():
    data_lf_binary[key] = df.loc[:, df.columns.str.contains(r'libreface_.*_d|timestamp')].copy()
parted_lf_binary = create_parted_dataframe(data_lf_binary)

data_lf_intensity = {}
for key, df in data_original.items():
    data_lf_intensity[key] = df.loc[:, df.columns.str.contains(r'libreface_.*_i|timestamp')].copy()
parted_lf_intensity = create_parted_dataframe(data_lf_intensity)

In [224]:
# Create a copy of the DataFrame with only columns containing "me_graph_"
data_me_graph = {}
for key, df in data_original.items():
    data_me_graph[key] = df.loc[:, df.columns.str.contains(r'me_graph_|timestamp')].copy()

parted_me = create_parted_dataframe(data_me_graph)
# Create subgroups for every data

### Split by parts and Sort into ALL / NT / ASC

In [226]:
me_averages, me_averages_nt, me_averages_asc = create_average_by_parts(parted_me)

lf_averages_b, lf_averages_nt_b, lf_averages_asc_b = create_average_by_parts(parted_lf_binary)
of_averages_b, of_averages_nt_b, of_averages_asc_b = create_average_by_parts(parted_of_binary)

lf_averages_i, lf_averages_nt_i, lf_averages_asc_i = create_average_by_parts(parted_lf_intensity)
of_averages_i, of_averages_nt_i, of_averages_asc_i = create_average_by_parts(parted_of_intensity)

In [272]:
methods = ['Libreface (binary)', 'Libreface (intensity)', 'Openface (binary)', 'Openface (intensity)', 'ME-Graph']
parts = me_averages.keys()
action_units = me_averages["neutral_actress"]["mean"].keys()

# Combine data
all_data = {
    'Libreface (binary)': lf_averages_b,
    'Libreface (intensity)': lf_averages_i,
    'Openface (binary)': of_averages_b,
    'Openface (intensity)': of_averages_i,
    'ME-Graph': me_averages
}

# Plotting over participant averages

In [280]:
def plot_for_AU(data, au_to_plot, filename):
    current_methods = []
    means = {}
    for method in methods:
        au_list_for_parts = []
        for part in parts:
            mean = data[method][part]['mean']
            if au_to_plot in mean:
                au_list_for_parts.append(mean[au_to_plot])

        if au_list_for_parts != []:
            means[method] = au_list_for_parts
            current_methods.append(method)
    
    #method_dict = {method: [data[method][part]['mean'][au_to_plot] for part in parts] for method in methods}

    
    # Plotting
    fig, ax = plt.subplots(figsize=(12, 8))
    
    for method in current_methods:
        ax.plot(parts, means[method], marker='o', label=method)
    
    
    # Add labels and title
    ax.set_xlabel('Parts')
    ax.set_ylabel('Mean Values')
    ax.set_title(f'Comparison of Methods by Parts for {au_to_plot}')
    ax.set_xticks([p + width for p in x])
    ax.set_xticklabels(parts)
    ax.legend()
    
    # save in file
    plt.savefig(f"./Plots/features/{filename}{au_to_plot}.png")

In [None]:
for au in action_units:
    plot_for_AU(all_data, au, "Averages_")
    matplotlib.pyplot.close()

##### Plot again but separated by NT / ASC

In [277]:
# Combine data
nt_data = {
    'Libreface (binary)': lf_averages_nt_b,
    'Libreface (intensity)': lf_averages_nt_i,
    'Openface (binary)': of_averages_nt_b,
    'Openface (intensity)': of_averages_nt_i,
    'ME-Graph': me_averages_nt
}
asc_data = {
    'Libreface (binary)': lf_averages_asc_b,
    'Libreface (intensity)': lf_averages_asc_i,
    'Openface (binary)': of_averages_asc_b,
    'Openface (intensity)': of_averages_asc_i,
    'ME-Graph': me_averages_asc
}

In [281]:
for au in action_units:
    plot_for_AU(nt_data, au, "NT_averages")
    matplotlib.pyplot.close()

In [282]:
for au in action_units:
    plot_for_AU(asc_data, au, "ASC_averages")
    matplotlib.pyplot.close()