# Enviroment Setup

In [None]:
# Install py-feat toolbox
!pip install -q py-feat

# Libraries

In [None]:
# Import libraries
from glob import glob
import numpy as np
import os
import pandas as pd
from feat.detector import Detector
import csv

# Paths

In [None]:
# Input paths
holdout_path = "/kaggle/input/truthlie-clean-split/TruthLie_Holdout_Stratified"
cross_path = "/kaggle/input/truthlie-clean-crossvalidation/TruthLie_CrossVal_Stratified"

# Holdout output paths
train_path = '/kaggle/working/' + "train_features.csv"
val_path = '/kaggle/working/' + "val_features.csv"
test_path = '/kaggle/working/' + "test_features.csv"

# Cross-Validation output paths
fold_paths = []
for i in range(4):
    fold_paths.append('/kaggle/working/' + f"fold_{i}_features.csv")

# Feature Extraction

In [None]:
# Initialize the default detector
detector = Detector(device="cuda")

In [None]:
def feature_extraction(folds):
    # Define a list to accumulate DataFrames
    dfs = []

    # Loop on each folder and process each video, saving the results in a CSV file
    for fold in folds:
        video_files = np.sort(glob(fold + '/Statements' + '/*'))
        transcript_file = os.path.join(fold, "Transcripts", "Transcripts.xlsx")
        transcript_df = pd.read_excel(transcript_file)

        # Loop over and process each video
        for video in video_files:
            # Get the name of the current video
            video_name = video.split('/')[-1]

            # Get the label of the current video
            video_label = transcript_df[transcript_df['video name']==video_name]['label'].values[0]
            
            # Estract FEX from the current video
            fex = detector.detect_video(video, skip_frames=3, antialias=True)
            df_fex = pd.DataFrame(fex)
            df_fex['label']=video_label
            dfs.append(df_fex)

        df = pd.concat(dfs, ignore_index=True)        
    
    return df

## Holdout

In [None]:
# Extract train features
df_train = feature_extraction(np.sort(glob(holdout_path + '/train')))
 
# Save DataFrame in a CSV file
df_train.to_csv(train_path, index=False)

In [None]:
# Extract val features
df_val = feature_extraction(np.sort(glob(holdout_path + '/val')))
 
# Save DataFrame in a CSV file
df_val.to_csv(val_path, index=False)

In [None]:
# Extract test features
df_train = feature_extraction(np.sort(glob(holdout_path + '/test')))
 
# Save DataFrame in a CSV file
df_test.to_csv(test_path, index=False)

## Cross-Validation

In [None]:
for i in range(4):
    # Extract fold features
    df_fold = feature_extraction(np.sort(glob(cross_path + f'/fold_{i}')))
     
    # Save DataFrame in a CSV file
    df_fold.to_csv(fold_paths[i], index=False)