In [5]:
import csv
import os

import math
import numpy as np

import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import xgboost as xgb
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split

AttributeError: partially initialized module 'pandas' has no attribute '_pandas_parser_CAPI' (most likely due to a circular import)

# Load data

In [None]:
# Load Data
data = {}

for (root, dirs, file) in os.walk("../all_features"):
    for f in file:
        if ".csv" in f:
            path = root + "/" + f
            df = pd.read_csv(path, index_col=[0,1,2])
            # Remove "Unnamed" columns
            df = df.loc[:, ~df.columns.str.contains('^Unnamed')]
            data[f] = df

In [None]:
example = data["91-001_part_2_concat.csv"]
example.describe()

In [None]:
# example.hist(["libreface_AU04_i"])

In [None]:
print(example.mean())

In [None]:
example_mean = example.filter(like="openface").mean()
#print(example_mean[:5])
#print(type(example_mean))

# Rename columns
example_mean = example_mean.rename(lambda x: x + '_mean')
print(example_mean[:5])

## Mean, var, median

In [None]:
def participant_stats(df, method_filter, percentage_filter): #percentage meaning intensity filter
        
    # mean
    columns = df.loc[:, df.columns.str.match(method_filter)]
    means = columns.mean()
    # Rename columns
    means = means.rename(lambda x: x.replace(method_filter, "") + '_mean')

    # variance
    columns = df.loc[:, df.columns.str.match(percentage_filter)]
    m_vars = columns.var()
    m_vars = m_vars.rename(lambda x: x.replace(method_filter, "") + '_var')

    # median
    med = columns.median()
    med = med.rename(lambda x: x.replace(method_filter, "") + '_median')

    stat = pd.concat([means, m_vars, med])
    
    return stat

In [None]:
def get_stats(data, method_filter='openface_', percentage_filter=r'openface_.*_r'):
    """
    data = whole dataset
    method_filter = which method to filter out
    percentage_filter = all relevant AUs - must be filtered by regular expression
    """
    stats = {}
    print("iterate and filter through every AU beginning with ", percentage_filter)

    for k in list(data.keys()):
        df = data[k]
        stat = participant_stats(df, method_filter, percentage_filter)
        stats[k] = stat
    return stats

### get stats of each method

In [None]:
# Create a copy of the DataFrame with only columns containing "openface_"
data_of = {}
for key, df in data.items():
    data_of[key] = df.loc[:, df.columns.str.contains(r'openface_|timestamp')].copy()

#data_of["91-001_part_2_concat.csv"].describe()

In [None]:
open_face_stats = get_stats(data_of, 'openface_', r'openface_.*_r')

In [None]:
open_face_stats["91-001_part_2_concat.csv"]#.describe()

##### Some Libreface-values must be converted to binary values first

_d is BP4D <-> indicating AU presence

_i is DISFA <-> intensity estimation

In [None]:
# Create a copy of the DataFrame with only columns containing "libreface_"
data_lf = {}
for key, df in data.items():
    data_lf[key] = df.loc[:, df.columns.str.contains(r'libreface_|timestamp')].copy()

# Create new columns with binary values
for key, df in data_lf.items():
    # Columns matching the pattern
    lf_presence_columns = df.columns[df.columns.str.match(r'libreface_.*_d')]
    # Apply binary conversion with threshold 0.5
    data_lf[key].loc[:, lf_presence_columns] = df[lf_presence_columns].apply(lambda col: col.map(lambda x: 1 if x > 0.5 else 0))


In [None]:
libre_face_stats = get_stats(data_lf, 'libreface_', r'libreface_.*_i')

In [None]:
libre_face_stats["91-001_part_2_concat.csv"]

##### Create columns with binary values for ME Graph

In [None]:
# Drop columns containing "presence" from the original DataFrame
#df = df.loc[:, ~df.columns.str.contains('presence')]

# Create a copy of the DataFrame with only columns containing "me_graph_"
data_me_graph = {}
for key, df in data.items():
    data_me_graph[key] = df.loc[:, df.columns.str.contains(r'me_graph_|timestamp')].copy()
    

# Create new columns with binary values
for key, df in data_me_graph.items():
    for col in df:
        new_col_name = f"{col}_presence"
        data_me_graph[key].loc[:, new_col_name] = data_me_graph[key][col].map(lambda x: 1 if x > 0.5 else 0)
        
print(type(data_me_graph))
#print(data_me_graph.keys())

In [None]:
# (?!_presence\b) means, that it is not allowed in the regex
me_graph_stats = get_stats(data_me_graph, 'me_graph_', r'^me_graph_(?!.*presence)')

In [None]:
me_graph_stats["91-001_part_2_concat.csv"]

#### save in new files

In [None]:
path = "../Model_Input/whole_video/"

of_frame = pd.DataFrame.from_dict(open_face_stats, orient='index')
of_frame.reset_index(inplace=True)
of_frame.rename(columns={'index': 'id'}, inplace=True)
of_frame = of_frame.sort_values('id')
of_frame.to_csv(path + "openface_stats_complete.csv")

In [None]:
lf_frame = pd.DataFrame.from_dict(libre_face_stats, orient='index')
lf_frame.reset_index(inplace=True)
lf_frame.rename(columns={'index': 'id'}, inplace=True)
lf_frame = lf_frame.sort_values('id')
lf_frame.to_csv(path + "libreface_stats_complete.csv")

In [None]:
me_frame = pd.DataFrame.from_dict(me_graph_stats, orient='index')
me_frame.reset_index(inplace=True)
me_frame.rename(columns={'index': 'id'}, inplace=True)
me_frame = me_frame.sort_values('id')
me_frame.to_csv(path + "megraph_stats_complete.csv")

In [None]:
# test
#print(lf_frame['libreface_AU01_d_mean']) 
#print(of_frame['openface_AU01_r_mean'])

# Partial sub-grouping

In [4]:
stamp_overview = pd.read_csv("../open_face_features_timestamps.csv", sep = ';')
new_part_rows = stamp_overview[stamp_overview['speaker'] != stamp_overview['speaker'].shift(1)]
print(new_part_rows)

frame_stamps = {
    "neutral_actress": [0, 1000],
    "neutral_participant": [1001, 1650],
    "joy_actress": [1651, 2425],
    "joy_participant": [2426, 3075],
    "disgust_actress": [3075, 3900],
    "disgust_participant": [3901, 4803]
}

time_stamps = {
    "neu_actress": 40.0,
    "neutral_participant": 66.00,
    "joy_actress": 97.0,
    "joy_participant": 123.0,
    "disgust_actress": 156.0,
    "disgust_participant": 192.08
}
# face=['AU04_r', 'AU06_r', 'AU09_r', 'AU12_r']

NameError: name 'pd' is not defined

In [None]:
# Function to create sub-groups for each part of the video
def create_sub_groups(df):
    sub_groups = {}
    previous_time = 0.0
    for label, time in time_stamps.items():
        sub_group = df[(df['timestamp'] >= previous_time) & (df['timestamp'] <= time)]
        sub_groups[label] = sub_group
        previous_time = time
    return sub_groups

##### Part each dataframe by timestamps

In [None]:
parted_of = {}
for key, df in data_of.items():
    parted_of[key] = create_sub_groups(df)
#parted_of = {'participant.csv': {'neu_actress': df ...}}

In [None]:
parted_lf = {}
for key, df in data_lf.items():
    parted_lf[key] = create_sub_groups(df)

In [None]:
parted_me = {}
# Create subgroups for every data
for key, df in data_me_graph.items():
    parted_me[key] = create_sub_groups(df)

In [None]:
#parted_me["91-001_part_2_concat.csv"]["joy_actress"].describe()

### get stats of each part

In [None]:
def get_part_stats(data, method_filter, au_filter):
    stats = {}
    for tstmp, df in data.items():
        stat = participant_stats(df, method_filter, au_filter)
        stats[tstmp] = stat
    return stats

In [None]:
example = parted_of["91-001_part_2_concat.csv"]
example
#get_part_stats(example, 'openface_', 'openface_')

In [None]:
# get stats for every participant
parted_stats_openface = {}
for key, time_data_dict in parted_of.items():
    # key = participant; time_data_dict = 'neu_actress': df
    parted_stats_openface[key] = get_part_stats(time_data_dict, 'openface_', r'openface_.*_r')
    
parted_stats_libreface = {}
for key, time_data_dict in parted_lf.items():
    parted_stats_libreface[key] = get_part_stats(time_data_dict, 'libreface_', r'libreface_.*_i')
    
parted_stats_megraph = {}
for key, time_data_dict in parted_me.items():
    parted_stats_megraph[key] = get_part_stats(time_data_dict, 'me_graph_', r'^me_graph_(?!.*presence)')

In [None]:
example = parted_stats_openface["91-001_part_2_concat.csv"]
print(example)

## Restructure df
Should have all participants in one file,
'AU_part' as columns.

### Invert
Invert so that participants are inside, then create dataframe with participants as rows and AU+video_part together as columns.

In [None]:
# {'A': {1: {'b': 'value'}}} -> {'b': {1: {'A': 'value'}}}
def invert_dict_p_inside(d):
    inverted = {}
    for key, subdict in d.items():
        for subkey, subsubdict in subdict.items():
            for subsubkey, value in subsubdict.items():
                if subkey not in inverted:
                    inverted[subkey] = {}
                if subsubkey not in inverted[subkey]:
                    inverted[subkey][subsubkey] = {}
                inverted[subkey][subsubkey][key] = value
    return inverted


In [None]:
inverted_dict_of = invert_dict_p_inside(parted_stats_openface)

In [None]:
of_frame = pd.DataFrame.from_dict({(j, i): inverted_dict_of[i][j] 
                             for i in inverted_dict_of.keys() 
                             for j in inverted_dict_of[i].keys()},
                            orient='columns')
    
of_frame

In [None]:
inverted_dict_lf = invert_dict_p_inside(parted_stats_libreface)

lf_frame = pd.DataFrame.from_dict({(j, i): inverted_dict_lf[i][j] 
                             for i in inverted_dict_lf.keys() 
                             for j in inverted_dict_lf[i].keys()},
                            orient='columns')

In [None]:
lf_frame

In [None]:
inverted_dict_me = invert_dict_p_inside(parted_stats_megraph)

me_frame = pd.DataFrame.from_dict({(j, i): inverted_dict_me[i][j] 
                             for i in inverted_dict_me.keys() 
                             for j in inverted_dict_me[i].keys()},
                            orient='columns')

## save files

In [None]:
path = "../Model_Input/parted_video/"

of_frame.to_csv(path + "openframe_stats_parted.csv")
lf_frame.to_csv(path + "libreface_stats_parted.csv")
me_frame.to_csv(path + "me_graph_stats_parted.csv")