In [1]:
import os
import subprocess
import pandas as pd
import platform
import pathlib
import numpy as np
from annotations import *
from extract_video_features import *
from extract_audio_features import *
import cv2
import seaborn as sns
from scipy.stats import kurtosis, skew
from sklearn.metrics import f1_score
import importlib
from tqdm import tqdm
import ordinal_classification as o_c
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from util import runGridSearchClassifiers


In [2]:
#get current directory
cwd = os.getcwd()

In [3]:
cwd

'/Users/valentinadiproietto/filrouge'

In [4]:
OpenFace_folder = '/Users/valentinadiproietto/OpenFace'
filename_annotations = 'https://docs.google.com/spreadsheets/d/1Rqu1sJiD-ogc4a6R491JTiaYacptOTqh6DKqhwTa8NA/gviz/tq?tqx=out:csv&sheet=Template'

In [5]:
Video_folder = '/Users/valentinadiproietto/Desktop/video_stress'

In [6]:
video_paths, video_names = get_videos(Video_folder)

In [7]:
video_names

['WIN_20210331_21_22_52_Pro',
 'WIN_20210329_14_13_45_Pro',
 'WIN_20210406_18_49_10_Pro',
 'WIN_20210408_14_11_32_Pro',
 'WIN_20210408_15_20_51_Pro',
 'WIN_20210404_10_58_27_Pro',
 'WIN_20210414_06_24_52_Pro',
 'WIN_20210406_15_06_15_Pro',
 'WIN_20210417_14_53_12_Pro',
 'WIN_20210413_15_38_01_Pro',
 'WIN_20210408_11_48_58_Pro',
 'WIN_20210408_16_04_32_Pro',
 'WIN_20210329_10_16_02_Pro',
 'WIN_20210323_19_17_40_Pro',
 'WIN_20210409_10_26_11_Pro',
 'Test_pour_AFPA',
 'WIN_20210405_15_09_16_Pro',
 'WIN_20210407_14_54_56_Pro_edit2',
 'WIN_20210406_21_05_52_Pro',
 'WIN_20210403_18_49_15_Pro',
 'WIN_20210408_14_02_19_Pro',
 'WIN_20210415_15_41_24_Pro',
 'WIN_20210406_18_35_52_Pro',
 'WIN_20210402_14_27_50_Pro',
 'WIN_20210407_09_04_05_Pro',
 'WIN_20210402_19_04_53_Pro',
 'WIN_20210416_08_06_54_Pro',
 'Video_1',
 'WIN_20210408_14_00_44_Pro',
 'WIN_20210404_21_41_12_Pro',
 'WIN_20210330_13_10_29_Pro']

In [8]:
list_dataframes = []
for i in video_names: 
    list_dataframes.append(create_dataframe_video('/Users/valentinadiproietto/OpenFace/processed/', i))


In [9]:
video_names.remove('WIN_20210329_14_13_45_Pro')
video_names.remove('WIN_20210402_14_27_50_Pro')


In [10]:
len(video_names)

29

In [11]:
list_df_max = []
for v_name in tqdm(video_names):
    df_annoted = get_df_video_with_annotations('/Users/valentinadiproietto/OpenFace/processed/', v_name, filename_annotations, "max")
    list_df_max.append(eliminate_features(df_annoted))


    

100%|██████████| 29/29 [03:22<00:00,  6.97s/it]


In [12]:
list_df_max[0].columns

Index(['frame', 'face_id', 'timestamp', 'confidence', 'success', 'AU01_r',
       'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r',
       'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r',
       'AU26_r', 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c',
       'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c',
       'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c', 'gaze_0_x',
       'gaze_0_y', 'gaze_0_z', 'gaze_1_x', 'gaze_1_y', 'gaze_1_z',
       'gaze_angle_x', 'gaze_angle_y', 'pose_Tx', 'pose_Ty', 'pose_Tz',
       'pose_Rx', 'pose_Ry', 'pose_Rz', 'type_candidat', 'sexe', 'video_name',
       'stress_global', 'stress', 'diapo'],
      dtype='object')

##

## AGGREGATION TIME WINDOW AND FIRST RF ON TIME WINDOW %

In [12]:
df_with_deriv = []
for i in list_df_max:
    to_drop = ['frame','face_id','timestamp','confidence','success', 'type_candidat']
    i = add_frameTimeWindow(i)
    i = i.drop(to_drop, axis = 1)
    i = add_derivatives_drop_spatial(i)
    df_with_deriv.append(i)

In [13]:
groupby_features= ['video_name','stress_global','stress','frameTimeWindow','sexe', 'diapo']

df_total = pd.concat(df_with_deriv)


In [14]:
df_total = df_total.groupby(groupby_features).agg(['mean']).reset_index()
df_total.columns= df_total.columns.map('_'.join).str.strip('_')
df_total[['stress']].value_counts()

stress
1.0       2419
0.0       2064
2.0        638
3.0         20
dtype: int64

In [15]:
x = df_total.drop(['video_name','stress_global', 'stress','frameTimeWindow'], axis = 1)
x.shape

(5141, 99)

In [16]:
#Replace string values
x.sexe = x.sexe.replace('H',0)
x.sexe = x.sexe.replace('F',1)

y = df_total[['stress']]
y.shape

(5141, 1)

In [17]:
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut

models_list = [RandomForestClassifier(random_state = 42, n_jobs=-1)]
parameters_list = [
                {'n_estimators': [100, 150, 200, 250, 300], 'max_depth':[10, 15, 20, 25,30], 'class_weight':[None,'balanced']}
                ]
groups = df_total['video_name']
loo = LeaveOneGroupOut()
cv_loo = loo.split(x, y, groups)

In [18]:
best_result, y_predict, y_proba, result_list = runGridSearchClassifiers(x, y, cv_loo, models_list, parameters_list, output_predict=True, n_jobs=-1, verbose=True)

Fitting 29 folds for each of 50 candidates, totalling 1450 fits


KeyboardInterrupt: 

In [None]:
# Saving predictions
predict5s = pd.concat([df_total[['video_name','diapo','frameTimeWindow']],
                        pd.DataFrame(y_predict, columns=['ypredict'])],axis=1)


In [None]:
best_result

In [None]:
predict5s.to_csv('all_features_tw5.csv')

In [None]:
predict5s= pd.read_csv('all_features_tw5.csv')

## PIVOT TABLE WITH COUNT OF PREDICTION 5SEC, TO PREDICT STRESS BY DIAPO

In [None]:
X = predict5s.pivot_table(values='frameTimeWindow', columns='ypredict', index=['video_name','diapo'], aggfunc='count', fill_value=0)
X_sum = X.sum(axis=1).values.copy()
for col_number in range(len(X.columns)):
    X.iloc[:,col_number] = X.iloc[:,col_number] / X_sum
X
 

In [None]:
#STRESS BY DIAPO

df_annotations_stress = pd.read_csv('annotations.csv')
df_annotations_stress = df_annotations_stress.drop(df_annotations_stress.columns[-1],axis=1).set_index('video_name').stack()
df_annotations_stress = pd.DataFrame(df_annotations_stress).reset_index()
df_annotations_stress.columns = ['video_name','diapo','stress']
#df_annotations_stress = df_annotations_stress.set_index(['video_name','diapo'])
df_annotations_stress['diapo'] = df_annotations_stress['diapo'].astype(int)
df_annotations_stress

In [None]:
Xy = X.merge(df_annotations_stress, how='inner', on=['video_name','diapo'])
X = Xy.iloc[:,:-1].set_index(['video_name','diapo'])
y = Xy.iloc[:,-1]
Xy

In [None]:
X

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

pca = PCA()
pipe = Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42))])

models_list = [
                LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42),
                Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True))]),
                KNeighborsClassifier(),
                Pipeline(steps=[('pca', pca), ('knn', KNeighborsClassifier())]),
                RandomForestClassifier(random_state = 42, n_jobs=-1)
                ]

parameters_list = [
                    {'C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'class_weight' : [None, 'balanced']},
                    {'pca__n_components': [1, 2, 3, 4],
                        'logistic__C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'logistic__class_weight' : [None, 'balanced']},
                    {'n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'weights' : ['uniform', 'distance'], 'p': [1, 2]},
                    {'pca__n_components': [1, 2, 3, 4],
                        'knn__n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'knn__weights' : ['uniform', 'distance'],                              'knn__p': [1, 2]},
                    {'n_estimators': [50, 100, 150, 200], 'max_depth':[3, 4, 5, 6, 10, 15, 20], 'class_weight':[None,'balanced']}
                    ]

groups = X.reset_index()['video_name']
loo = LeaveOneGroupOut()
cv_loo = loo.split(X, y, groups)

In [None]:
best_result_diapo, y_predict_diapo, y_proba, result_list_diapo = runGridSearchClassifiers(X, y, cv_loo, models_list, parameters_list,output_predict=True, n_jobs=-1, verbose=True)

In [None]:
best_result_diapo

In [None]:
y_predict_diapo
##ATTENZIONE NON PREDICE MAI I 2

## ORA SEMPRE PARTENDO DALLE PEDIZIONI DI  5 SECONDI FACCIO NEL AGGIUNGENDO MINMAX sulle predizioni e predico su diapo

In [None]:
predict5s['ypredict'].value_counts()

In [None]:
from scipy.stats import kurtosis, skew

def percentil25(x): 
    return np.percentile(x, q=25)

def percentil75(x): 
    return np.percentile(x, q=75)

In [None]:
# Autre méthode
X_diapo_minmax = predict5s.groupby(['video_name','diapo']).agg({'ypredict':['mean','min','max', 'median', 'std', percentil25, percentil75, kurtosis, skew]})

In [None]:
X_diapo_minmax

In [None]:
#questo e' lo stress vero per diapositiva
y

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

pca = PCA()
pipe = Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42))])

models_list = [
                LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42),
                Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True))]),
                KNeighborsClassifier(),
                Pipeline(steps=[('pca', pca), ('knn', KNeighborsClassifier())]),
                RandomForestClassifier(random_state = 42, n_jobs=-1)
                ]

parameters_list = [
                    {'C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'class_weight' : [None, 'balanced']},
                    {'pca__n_components': [1, 2, 3, 4],
                        'logistic__C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'logistic__class_weight' : [None, 'balanced']},
                    {'n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'weights' : ['uniform', 'distance'], 'p': [1, 2]},
                    {'pca__n_components': [1, 2, 3, 4],
                        'knn__n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'knn__weights' : ['uniform', 'distance'],                              'knn__p': [1, 2]},
                    {'n_estimators': [50, 100, 150, 200], 'max_depth':[3, 4, 5, 6, 10, 15, 20], 'class_weight':[None,'balanced']}
                    ]

groups = X_diapo_minmax.reset_index()['video_name']
loo = LeaveOneGroupOut()
cv_loo = loo.split(X_diapo_minmax, y, groups)

In [None]:
best_result_diapo_minmax, y_predict_diapo_minmax, y_proba, result_list_diapo = runGridSearchClassifiers(X_diapo_minmax, y, cv_loo, models_list, parameters_list,output_predict=True, n_jobs=-1, verbose=True)

In [None]:
best_result_diapo_minmax

In [None]:
#è meglio arriva a predire i 2
y_predict_diapo_minmax

In [None]:
# Saving predictions
df_diapo_minmax = pd.concat([X_diapo_minmax.reset_index()[['video_name','diapo']],
                        pd.DataFrame(y_predict_diapo_minmax, columns=['ypredict'])],axis=1)

df_diapo_minmax.columns = ['video_name', 'diapo', 'ypredict']
df_diapo_minmax
df_diapo_minmax.to_csv('all_features_tw5_diapo_minmax.csv')

In [None]:
df_diapo_minmax = pd.read_csv('all_features_tw5_diapo_minmax.csv')

## GLOBAL STRESS, UTILIZZANDO LE PREDIZIONI DI DIAPO MINMAX

In [None]:
stress_diapo_by_video = df_diapo_minmax.pivot_table(values='ypredict', columns='diapo',index='video_name',aggfunc='mean')
stress_diapo_by_video

In [None]:
df_annotations_stress = pd.read_csv('annotations.csv')
df_annotations_stress = df_annotations_stress.set_index(['video_name'])
df_annotations_stress

In [None]:
Xy = stress_diapo_by_video.merge(df_annotations_stress.iloc[:,-1],how='inner',on='video_name')
X_global= Xy.iloc[:,:-1]
y_global = Xy.iloc[:,-1]

In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

pca = PCA()

models_list = [
                LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42),
                Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True))]),
                KNeighborsClassifier(),
                Pipeline(steps=[('pca', pca), ('knn', KNeighborsClassifier())]),
                RandomForestClassifier(random_state = 42, n_jobs=-1)
                ]

parameters_list = [
                    {'C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'class_weight' : [None, 'balanced']},
                    {'pca__n_components': [1, 2, 3, 4],
                        'logistic__C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'logistic__class_weight' : [None, 'balanced']},
                    {'n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'weights' : ['uniform', 'distance'], 'p': [1, 2]},
                    {'pca__n_components': [1, 2, 3, 4],
                        'knn__n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'knn__weights' : ['uniform', 'distance'],                              'knn__p': [1, 2]},
                    {'n_estimators': [50, 100, 150, 200], 'max_depth':[3, 4, 5, 6, 10, 15, 20], 'class_weight':[None,'balanced']}
                    ]



In [None]:
best_result_tw5_diapo_minmax_global, y_predict_tw5_diapo_minmax_global, y_proba, result_list = runGridSearchClassifiers(X_global, y_global, 5 , models_list, parameters_list, output_predict=True, n_jobs=-1, verbose=True)

In [9]:
best_result_tw5_diapo_minmax_global

{'best_estimator': Pipeline(steps=[('pca', PCA(n_components=3)),
                 ('knn', KNeighborsClassifier(n_neighbors=10))]),
 'best_score': 0.6804444444444444,
 'best_params': {'knn__n_neighbors': 10,
  'knn__p': 2,
  'knn__weights': 'uniform',
  'pca__n_components': 3},
 'mean_test_f1_score': 0.6804444444444444,
 'std_test_f1_score': 0.13701869747151346,
 'mean_test_accuracy_score': 0.6866666666666668,
 'std_test_accuracy_score': 0.1309792180292567,
 'mean_test_balanced_accuracy_score': 0.6888888888888889,
 'std_test_balanced_accuracy_score': 0.15153535218873174,
 'mean_test_precision': 0.7511111111111111,
 'std_test_precision': 0.16815630588793434,
 'mean_test_recall': 0.6866666666666668,
 'std_test_recall': 0.1309792180292567}

In [None]:
from sklearn.metrics import f1_score
print("F1 " + str(f1_score(y_global, y_predict_tw5_diapo_minmax_global, average='weighted')))
from sklearn.metrics import accuracy_score
print("Accuracy " + str(accuracy_score(y_global, y_predict_tw5_diapo_minmax_global)))

In [None]:
y_predict_tw5_diapo_minmax_global

In [None]:
#seeing prediction
df_global_5tw_minmax = pd.concat([X_global.reset_index()[['video_name']],
                        pd.DataFrame(y_predict_tw5_diapo_minmax_global, columns=['y_predict_tw5_diapo_minmax_global'])],axis=1)

df_global_5tw_minmax.columns = ['video_name', 'y_predict_tw5_diapo_minmax_global']
df_global_5tw_minmax


In [None]:
#SAving predictions
df_global_5tw_minmax.to_csv('all_features_tw5_diapo_minmax_global.csv')