In [None]:
import numpy as np
import scipy as sp
import pandas as pd
import os
from pathlib import Path
import shutil
from sklearn.svm import SVC, SVR
from sklearn.metrics import classification_report, r2_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt
from sklearn.model_selection import KFold
import nltk
from gensim.models.keyedvectors import KeyedVectors
nltk.download('stopwords')
from nltk.corpus import stopwords
import gc 

In [None]:
labels_dir = '../data/DAIC/labels/'
openface_features_dir = '../data/DAIC/openface_features/'


In [None]:
train_labels = pd.read_csv(labels_dir+'train_split.csv',delimiter=',',encoding='utf-8')
dev_labels = pd.read_csv(labels_dir+'dev_split.csv',delimiter=',',encoding='utf-8')
train_ids = np.array(train_labels)[:, 0]
dev_ids = np.array(dev_labels)[:, 0]
print('train_size: ', len(train_ids))
print('dev_size: ', len(dev_ids))

In [None]:
def get_data_openface(id):
    df_openface = pd.DataFrame()
    df_openface = pd.read_csv(openface_features_dir + f'{id}_OpenFace2.1.0_Pose_gaze_AUs.csv')
    indices_to_remove = df_openface[df_openface['success'] == 0].index
    df_openface = df_openface.drop(index=indices_to_remove)
    df_openface.insert(0, 'ID', id)
    df_openface = df_openface.sort_values(by=['ID','frame'])
    df_openface = df_openface.drop(['frame', 'timestamp', 'confidence', 'success'], axis=1)
    df_openface = df_openface.reset_index()
    df_openface = df_openface.drop(['index'], axis=1)
    return df_openface

In [None]:
import math

def angle_between(v1, v2):
    dot = np.dot(v1, v2)
    x_modulus = np.sqrt((v1*v1).sum())
    y_modulus = np.sqrt((v2*v2).sum())
    cos_angle = dot / (x_modulus * y_modulus)
    angle = math.acos(cos_angle)
    return angle

def get_gaze_angle(df_openface):
    df_openface_with_angle = df_openface.copy(deep=True)
    for i in range(len(df_openface_with_angle)-1):
        x1_0 = df_openface_with_angle.iloc[i]['gaze_0_x']
        y1_0 = df_openface_with_angle.iloc[i]['gaze_0_y']
        z1_0 = df_openface_with_angle.iloc[i]['gaze_0_z']
        
        x2_0 = df_openface_with_angle.iloc[i+1]['gaze_0_x']
        y2_0 = df_openface_with_angle.iloc[i+1]['gaze_0_y']
        z2_0 = df_openface_with_angle.iloc[i+1]['gaze_0_z']
        
        v1 = np.array([x1_0, y1_0, z1_0])
        v2 = np.array([x2_0, y2_0, z2_0])
        
        angle_0 = angle_between(v1, v2)

        x1_1 = df_openface_with_angle.iloc[i]['gaze_1_x']
        y1_1 = df_openface_with_angle.iloc[i]['gaze_1_y']
        z1_1 = df_openface_with_angle.iloc[i]['gaze_1_z']
        
        x2_1 = df_openface_with_angle.iloc[i+1]['gaze_1_x']
        y2_1 = df_openface_with_angle.iloc[i+1]['gaze_1_y']
        z2_1 = df_openface_with_angle.iloc[i+1]['gaze_1_z']
        
        v1 = np.array([x1_1, y1_1, z1_1])
        v2 = np.array([x2_1, y2_1, z2_1])
        
        angle_1 = angle_between(v1, v2)

        # add angle_0 and angle_1 to the df_openface dataframe
        df_openface_with_angle.at[i, 'eye0_angle'] = angle_0
        df_openface_with_angle.at[i, 'eye1_angle'] = angle_1
    return df_openface_with_angle


def get_gaze_angle_features_for_dataset(ids):
    features = pd.DataFrame()
    # run get_data_openface() on all the rows in the dataset dataframe
    for id in ids:
        df_openface = get_data_openface(id)
        df_openface_with_angle = get_gaze_angle(df_openface)
        df_openface_with_angle = df_openface_with_angle.drop(df_openface_with_angle.index[len(df_openface_with_angle)-1])
        # calculate the mean value of eye0_angle and eye1_angle columns from df_openface_with_angle
        eye0_angle_mean = df_openface_with_angle['eye0_angle'].mean()
        eye1_angle_mean = df_openface_with_angle['eye1_angle'].mean()
        # calculate standard deviation of eye0_angle and eye1_angle columns from df_openface_with_angle
        eye0_angle_std = df_openface_with_angle['eye0_angle'].std()
        eye1_angle_std = df_openface_with_angle['eye1_angle'].std()
        # add the mean values to the features dataframe
        features = features.append({'ID': id, 'eye0_angle_mean': eye0_angle_mean, 'eye1_angle_mean': eye1_angle_mean, 'eye0_angle_std': eye0_angle_std, 'eye1_angle_std': eye1_angle_std}, ignore_index=True)
    return features

In [None]:
# In this cell we skipped the extraction parts in favour of using the extracted features from our previos works
# And we are going to use those extracted features to train a SVR model

# read df_openface_created_features_per_video.csv
df_openface_created_features_per_video = pd.read_csv('../data/DAIC/processed/extracted_visual_features.csv')
df_openface_created_features_per_video = df_openface_created_features_per_video.drop(['Unnamed: 0'], axis=1)

# train a SVR model on the df_openface_created_features_per_video dataframe with the eye0_angle and eye1_angle and eye0_angle_std and eye1_angle_std columns as features and PHQ_Score column as the target
from sklearn.svm import SVR
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error
from sklearn.model_selection import KFold

# split df_openface_created_features_per_video into X and y based on rows that their ID column is equal to train_ids and dev_ids

X_train = np.array(df_openface_created_features_per_video[df_openface_created_features_per_video['ID'].isin(train_ids)].drop(['ID', 'Gender','PHQ_Binary','PHQ_Score','PCL-C (PTSD)','PTSD Severity'], axis=1))
X_dev = np.array(df_openface_created_features_per_video[df_openface_created_features_per_video['ID'].isin(dev_ids)].drop(['ID', 'Gender','PHQ_Binary','PHQ_Score','PCL-C (PTSD)','PTSD Severity'], axis=1))

y_train = np.array(df_openface_created_features_per_video[df_openface_created_features_per_video['ID'].isin(train_ids)]['PHQ_Score'])
y_dev = np.array(df_openface_created_features_per_video[df_openface_created_features_per_video['ID'].isin(dev_ids)]['PHQ_Score'])

# save the X_train, X_dev, y_train, y_dev to a file
np.save('../data/DAIC/processed/X_train.npy', X_train)
np.save('../data/DAIC/processed/X_dev.npy', X_dev)

# train a SVR model on X_train and y_train
svr = SVR(kernel='rbf', C=1, gamma=0.1)
svr.fit(X_train, y_train)

# predict on X_train and calculate the mean squared error and mean absolute error

y_pred = svr.predict(X_train)
mse = mean_squared_error(y_train, y_pred)
mae = mean_absolute_error(y_train, y_pred)
print('mse for train: ', mse)
print('mae for train: ', mae)

# predict on X_dev and calculate the mean squared error and mean absolute error
y_pred = svr.predict(X_dev)
mse = mean_squared_error(y_dev, y_pred)
mae = mean_absolute_error(y_dev, y_pred)
print('mse for dev: ', mse)
print('mae for dev: ', mae)

