# **Processing**

Libraries

In [None]:
import os
import pandas as pd
import numpy as np
import re
from collections import Counter

Loading and Combining Features

In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Root folder path
root_folder = '/content/drive/MyDrive/AMIGOS'

In [None]:
# Initialize an empty DataFrame for concatenation
main_df = pd.DataFrame()

In [None]:
# Iterate over each folder in the root folder
for subdir in os.listdir(root_folder):
    # Check if the subfolder name starts with 'Exp'
    if os.path.isdir(os.path.join(root_folder, subdir)) and subdir.startswith('Exp'):
        subdir_path = os.path.join(root_folder, subdir)

        # Iterate through files in the Exp subfolder
        for file in os.listdir(subdir_path):
            # Check if the file is a .csv file
            if file.endswith('.csv'):
                file_path = os.path.join(subdir_path, file)

                # Load the CSV file with the first row as column names
                df = pd.read_csv(file_path)

                # Print the shape and filename
                print(f"Loaded {file} with shape: {df.shape}")

                # Add a new column 'filename' to the DataFrame with the current filename
                df['filename'] = file[:-4]

                # Check if filename starts with P(10,12,11,15)
                if file.startswith('P(10,12,11,15)'):
                    # Take every frame (no filtering)
                    filtered_df = df
                else:
                    # Take every 3rd frame
                    filtered_df = df[df['frame'] % 3 == 0]

                # Concatenate the filtered DataFrame with the main DataFrame
                main_df = pd.concat([main_df, filtered_df], ignore_index=True)

Loaded P(6,32,4,3)_N1_face.csv with shape: (140830, 714)
Loaded P(6,32,4,3)_P1_face.csv with shape: (112674, 714)
Loaded P(6,32,4,3)_B1_face.csv with shape: (141348, 714)
Loaded P(6,32,4,3)_U1_face.csv with shape: (84812, 714)
Loaded P(29,5,27,21)_B1_face.csv with shape: (141457, 714)
Loaded P(29,5,27,21)_N1_face.csv with shape: (140561, 714)
Loaded P(29,5,27,21)_P1_face.csv with shape: (112312, 714)
Loaded P(29,5,27,21)_U1_face.csv with shape: (85000, 714)


In [None]:
# Check the final shape of the concatenated DataFrame
print(f"Final shape of main DataFrame: {main_df.shape}")


Final shape of main DataFrame: (319664, 715)


In [None]:
main_df.head(5)

Unnamed: 0,frame,face_id,timestamp,confidence,success,gaze_0_x,gaze_0_y,gaze_0_z,gaze_1_x,gaze_1_y,...,AU14_c,AU15_c,AU17_c,AU20_c,AU23_c,AU25_c,AU26_c,AU28_c,AU45_c,filename
0,3,0,0.08,0.98,1,0.257138,0.013615,-0.966279,0.100244,0.00905,...,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,"P(6,32,4,3)_N1_face"
1,3,1,0.08,0.98,1,-0.439685,0.011282,-0.898081,-0.555332,0.019238,...,1.0,1.0,1.0,0.0,0.0,1.0,1.0,1.0,1.0,"P(6,32,4,3)_N1_face"
2,3,2,0.08,0.98,1,0.48646,0.047286,-0.872422,0.365866,0.110957,...,1.0,1.0,1.0,1.0,1.0,0.0,0.0,0.0,0.0,"P(6,32,4,3)_N1_face"
3,3,3,0.08,0.77,1,0.046915,0.004465,-0.998889,-0.115612,0.011087,...,0.0,0.0,1.0,0.0,0.0,0.0,0.0,1.0,0.0,"P(6,32,4,3)_N1_face"
4,6,0,0.2,0.98,1,0.259105,0.010726,-0.96579,0.101941,0.005828,...,0.0,1.0,1.0,1.0,0.0,0.0,0.0,1.0,0.0,"P(6,32,4,3)_N1_face"


In [None]:
main_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 319664 entries, 0 to 319663
Columns: 715 entries, frame to filename
dtypes: float64(711), int64(3), object(1)
memory usage: 1.7+ GB


Save / Load DF

In [None]:
# Optionally save the main DataFrame to a new CSV file
# main_df.to_csv('/content/drive/MyDrive/AMIGOS/Exp2_DF_final.csv', index=False)

In [None]:
# Load the main DataFrame to a new CSV file
# main_df = pd.read_csv('/content/drive/MyDrive/AMIGOS/Files_Exp_2/Exp2_DF_final.csv')

In [None]:
main_df.head()

Re-Aranging the Names According to Face Indexes (Only for multiple faces / issue with OpenFace as it shuffles face indexes)

In [None]:
# Iterate through each row in main_df
for idx, row in main_df.iterrows():
    filename = row['filename']
    # Check and replace based on conditions
    if filename == "P(6,32,4,3)_B1_face":
        main_df.at[idx, 'filename'] = "P(4,3,32,6)_B1_face"
    elif filename == "P(6,32,4,3)_N1_face":
        main_df.at[idx, 'filename'] = "P(32,3,6,4)_N1_face"
    elif filename == "P(6,32,4,3)_P1_face":
        main_df.at[idx, 'filename'] = "P(4,32,3,6)_P1_face"
    elif filename == "P(6,32,4,3)_U1_face":
        main_df.at[idx, 'filename'] = "P(32,3,6,4)_U1_face"
    elif filename == "P(29,5,27,21)_B1_face":
        main_df.at[idx, 'filename'] = "P(21,27,5,29)_B1_face"
    elif filename == "P(29,5,27,21)_N1_face":
        main_df.at[idx, 'filename'] = "P(27,21,29,5)_N1_face"
    elif filename == "P(29,5,27,21)_P1_face":
        main_df.at[idx, 'filename'] = "P(27,21,29,5)_P1_face"
    elif filename == "P(29,5,27,21)_U1_face":
        main_df.at[idx, 'filename'] = "P(29,27,21,5)_U1_face"
    elif filename == "P(7,1,2,16)_B1_face":
        main_df.at[idx, 'filename'] = "P(16,1,7,2)_B1_face"
    elif filename == "P(7,1,2,16)_N1_face":
        main_df.at[idx, 'filename'] = "P(1,7,2,16)_N1_face"
    elif filename == "P(7,1,2,16)_P1_face":
        main_df.at[idx, 'filename'] = "P(16,7,2,1)_P1_face"
    elif filename == "P(7,1,2,16)_U1_face":
        main_df.at[idx, 'filename'] = "P(7,16,2,1)_U1_face"
    elif filename == "P(18,14,17,22)_B1_face":
        main_df.at[idx, 'filename'] = "P(17,14,22,18)_B1_face"
    elif filename == "P(18,14,17,22)_N1_face":
        main_df.at[idx, 'filename'] = "P(14,18,17,22)_N1_face"
    elif filename == "P(18,14,17,22)_P1_face":
        main_df.at[idx, 'filename'] = "P(17,18,14,22)_P1_face"
    elif filename == "P(18,14,17,22)_U1_face":
        main_df.at[idx, 'filename'] = "P(14,17,22,18)_U1_face"
    elif filename == "P(10,12,11,15)_B1_face":
        main_df.at[idx, 'filename'] = "P(11,15,12,10)_B1_face"
    elif filename == "P(10,12,11,15)_N1_face":
        main_df.at[idx, 'filename'] = "P(12,10,15,11)_N1_face"
    elif filename == "P(10,12,11,15)_P1_face":
        main_df.at[idx, 'filename'] = "P(11,10,15,12)_P1_face"
    elif filename == "P(10,12,11,15)_U1_face":
        main_df.at[idx, 'filename'] = "P(10,15,12,11)_U1_face"
    else:
        # If no replacement is found, print the message
        print(f"No replacement found for filename: {filename}, index: {idx}")

In [None]:
Counter(main_df['filename'])

Counter({'P(32,3,6,4)_N1_face': 46943,
         'P(4,32,3,6)_P1_face': 37559,
         'P(4,3,32,6)_B1_face': 47113,
         'P(32,3,6,4)_U1_face': 28273,
         'P(21,27,5,29)_B1_face': 47152,
         'P(27,21,29,5)_N1_face': 46853,
         'P(27,21,29,5)_P1_face': 37438,
         'P(29,27,21,5)_U1_face': 28333})

Preprocessing and Dealing with Null Values

In [None]:
main_df.isna().sum()

Unnamed: 0,0
frame,0
face_id,0
timestamp,0
confidence,0
success,0
...,...
AU25_c,0
AU26_c,0
AU28_c,0
AU45_c,0


In [None]:
# Drop rows with any missing values
# main_df.dropna(inplace=True)

In [None]:
# Iterate over columns to handle missing values based on their data type
# for col in main_df.columns:
#     if main_df[col].dtype == 'float64':
#         main_df[col].fillna(main_df[col].mean(), inplace=True)  # Fill NaN with column mean (for float columns)
#     elif main_df[col].dtype == 'int64':
#         main_df[col].fillna(main_df[col].median(), inplace=True)  # Fill NaN with column median (for int columns)

In [None]:
# Check the data types of each column
print(main_df.dtypes)

frame            int64
 face_id         int64
 timestamp     float64
 confidence    float64
 success         int64
                ...   
 AU25_c        float64
 AU26_c        float64
 AU28_c        float64
 AU45_c        float64
filename        object
Length: 715, dtype: object


In [None]:
# Convert columns to the correct type if needed (e.g., convert 'frame' to int)
# main_df['frame'] = main_df['frame'].astype(int)

Loading Labels File

In [None]:
pip install openpyxl

Collecting openpyxl
  Downloading openpyxl-3.1.5-py2.py3-none-any.whl.metadata (2.5 kB)
Collecting et-xmlfile (from openpyxl)
  Downloading et_xmlfile-2.0.0-py3-none-any.whl.metadata (2.7 kB)
Downloading openpyxl-3.1.5-py2.py3-none-any.whl (250 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m250.9/250.9 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading et_xmlfile-2.0.0-py3-none-any.whl (18 kB)
Installing collected packages: et-xmlfile, openpyxl
Successfully installed et-xmlfile-2.0.0 openpyxl-3.1.5


In [None]:
# Load the labels Excel file
labels = pd.read_excel('/content/drive/MyDrive/AMIGOS/Files_Exp_2/Exp2_labels.xlsx') # Replace with the actual path

In [None]:
labels

Unnamed: 0,Trial,UserID,Exp2_ID,Video_ID,arousal,valence,dominance,liking,familiarity,neutral,disgust,happiness,surprise,anger,fear,sadness
0,1,9,L1,U1,5.08,4.76,6.04,8.84,7.96,0,0,1,0,0,0,1
1,1,23,L2,N1,6.36,5.08,6.84,6.84,1.88,1,1,0,0,0,1,0
2,1,7,L3,N1,7.80,4.44,2.20,8.36,1.16,1,0,0,1,0,1,0
3,1,1,L3,N1,7.88,6.60,3.24,7.80,1.64,0,0,0,1,0,1,0
4,1,2,L3,N1,6.28,5.00,3.24,7.56,2.76,0,0,1,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
143,4,36,L18,N1,7.48,3.56,2.20,5.08,2.60,0,1,0,0,0,1,0
144,4,19,L19,N1,6.84,2.84,2.68,3.96,2.12,0,0,0,1,0,1,1
145,4,38,L20,U1,5.48,7.32,6.60,8.60,6.76,0,0,1,0,0,0,1
146,4,39,L21,B1,5.00,5.00,7.96,5.88,1.00,1,0,0,1,0,0,0


Assigning Labels to Respected Feature rows

In [None]:
# Columns of interest from `labels_df`:
label_columns = ['arousal', 'valence', 'dominance', 'liking', 'familiarity', 'neutral', 'disgust',
                 'happiness', 'surprise', 'anger', 'fear', 'sadness']

# Iterate through each row in main_df
for idx, row in main_df.iterrows():
    filename = row['filename']  # Get the filename
    # Extract UserIDs and Video_ID
    match = re.match(r'P\((\d+),(\d+),(\d+),(\d+)\)_([A-Za-z0-9]+)', filename) # Regex pattern
    if match:
        user_ids = list(map(int, match.groups()[:4]))  # Extract UserIDs as integers
        video_id = match.group(5)  # Extract Video_ID

        # Map face IDs based on position in user_ids
        face_id = row[' face_id']  # Current face_id
        if 0 <= face_id < len(user_ids):
            user_id = user_ids[face_id]  # Get the respective UserID
        else:
            print(f"Invalid face_id: {face_id} for filename: {filename}")
            continue

        # Match with labels_df
        matched_label = labels[
            (labels['UserID'] == user_id) & (labels['Video_ID'] == video_id)
        ]

        # If a match is found, assign labels
        if not matched_label.empty:
            for col in label_columns:
                main_df.at[idx, col] = matched_label.iloc[0][col]
        else:
            print(f"No label found for row: {row['filename']}, index: {idx}")
    else:
        print(f"Filename format is incorrect: {filename}")

In [None]:
main_df.head()

Unnamed: 0,frame,face_id,timestamp,confidence,success,gaze_0_x,gaze_0_y,gaze_0_z,gaze_1_x,gaze_1_y,...,dominance,liking,familiarity,neutral,disgust,happiness,surprise,anger,fear,sadness
0,3,0,0.08,0.98,1,0.257138,0.013615,-0.966279,0.100244,0.009050,...,3.24,8.12,3.24,0.0,0.0,0.0,0.0,0.0,1.0,0.0
1,3,1,0.08,0.98,1,-0.439685,0.011282,-0.898081,-0.555332,0.019238,...,6.92,5.64,1.16,1.0,0.0,0.0,1.0,0.0,1.0,0.0
2,3,2,0.08,0.98,1,0.486460,0.047286,-0.872422,0.365866,0.110957,...,2.68,6.36,2.12,0.0,1.0,0.0,1.0,0.0,1.0,1.0
3,3,3,0.08,0.77,1,0.046915,0.004465,-0.998889,-0.115612,0.011087,...,3.40,6.28,6.60,0.0,0.0,0.0,1.0,0.0,1.0,0.0
4,6,0,0.20,0.98,1,0.259105,0.010726,-0.965790,0.101941,0.005828,...,3.24,8.12,3.24,0.0,0.0,0.0,0.0,0.0,1.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
319659,21276,3,851.00,0.93,1,0.097215,-0.109715,-0.989198,-0.012894,-0.136827,...,5.88,7.80,1.96,0.0,0.0,1.0,0.0,0.0,0.0,1.0
319660,21279,0,851.12,0.98,1,0.493929,0.020739,-0.869255,0.376686,0.027592,...,3.24,8.12,1.32,0.0,0.0,1.0,0.0,0.0,0.0,1.0
319661,21279,1,851.12,0.98,1,-0.068890,-0.041851,-0.996746,-0.217434,-0.026081,...,6.36,6.04,6.36,1.0,0.0,1.0,0.0,0.0,0.0,1.0
319662,21279,2,851.12,0.98,1,-0.301181,0.036381,-0.952873,-0.429786,0.021777,...,2.60,8.36,2.28,0.0,0.0,1.0,0.0,0.0,0.0,1.0


In [None]:
main_df.columns[-13:]

Index(['filename', 'arousal', 'valence', 'dominance', 'liking', 'familiarity',
       'neutral', 'disgust', 'happiness', 'surprise', 'anger', 'fear',
       'sadness'],
      dtype='object')

Save / Load Labeled DF

In [None]:
# Save the labeled main DataFrame to a new CSV file
main_df.to_csv('/content/drive/MyDrive/AMIGOS/Exp2_DF_Labeled.csv', index=False)