In [1]:
import os
import subprocess
import pandas as pd
import platform
import pathlib
import numpy as np
from annotations import *
from extract_video_features import *
from extract_audio_features import *
import cv2
import seaborn as sns
from scipy.stats import kurtosis, skew
from sklearn.metrics import f1_score
import importlib
from tqdm import tqdm
import ordinal_classification as o_c
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import GridSearchCV
from util import runGridSearchClassifiers


In [2]:
#get current directory
cwd = os.getcwd()

In [3]:
cwd

'/Users/valentinadiproietto/filrouge'

In [4]:
OpenFace_folder = '/Users/valentinadiproietto/OpenFace'
filename_annotations = 'https://docs.google.com/spreadsheets/d/1Rqu1sJiD-ogc4a6R491JTiaYacptOTqh6DKqhwTa8NA/gviz/tq?tqx=out:csv&sheet=Template'

In [5]:
Video_folder = '/Users/valentinadiproietto/Desktop/video_stress'

In [6]:
video_paths, video_names = get_videos(Video_folder)

In [7]:
video_names

['WIN_20210331_21_22_52_Pro',
 'WIN_20210329_14_13_45_Pro',
 'WIN_20210406_18_49_10_Pro',
 'WIN_20210408_14_11_32_Pro',
 'WIN_20210408_15_20_51_Pro',
 'WIN_20210404_10_58_27_Pro',
 'WIN_20210414_06_24_52_Pro',
 'WIN_20210406_15_06_15_Pro',
 'WIN_20210417_14_53_12_Pro',
 'WIN_20210413_15_38_01_Pro',
 'WIN_20210408_11_48_58_Pro',
 'WIN_20210408_16_04_32_Pro',
 'WIN_20210329_10_16_02_Pro',
 'WIN_20210323_19_17_40_Pro',
 'WIN_20210409_10_26_11_Pro',
 'Test_pour_AFPA',
 'WIN_20210405_15_09_16_Pro',
 'WIN_20210407_14_54_56_Pro_edit2',
 'WIN_20210406_21_05_52_Pro',
 'WIN_20210403_18_49_15_Pro',
 'WIN_20210408_14_02_19_Pro',
 'WIN_20210415_15_41_24_Pro',
 'WIN_20210406_18_35_52_Pro',
 'WIN_20210402_14_27_50_Pro',
 'WIN_20210407_09_04_05_Pro',
 'WIN_20210402_19_04_53_Pro',
 'WIN_20210416_08_06_54_Pro',
 'Video_1',
 'WIN_20210408_14_00_44_Pro',
 'WIN_20210404_21_41_12_Pro',
 'WIN_20210330_13_10_29_Pro']

In [8]:
list_dataframes = []
for i in video_names: 
    list_dataframes.append(create_dataframe_video('/Users/valentinadiproietto/OpenFace/processed/', i))


In [9]:
video_names.remove('WIN_20210329_14_13_45_Pro')
video_names.remove('WIN_20210402_14_27_50_Pro')


In [10]:
len(video_names)

29

In [11]:
list_df_max = []
for v_name in tqdm(video_names):
    df_annoted = get_df_video_with_annotations('/Users/valentinadiproietto/OpenFace/processed/', v_name, filename_annotations, "max")
    list_df_max.append(eliminate_features(df_annoted))


    

100%|██████████| 29/29 [03:01<00:00,  6.27s/it]


In [12]:
list_df_max[0].columns

Index(['frame', 'face_id', 'timestamp', 'confidence', 'success', 'AU01_r',
       'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r', 'AU10_r',
       'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r', 'AU25_r',
       'AU26_r', 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c', 'AU06_c',
       'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c', 'AU17_c',
       'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c', 'gaze_0_x',
       'gaze_0_y', 'gaze_0_z', 'gaze_1_x', 'gaze_1_y', 'gaze_1_z',
       'gaze_angle_x', 'gaze_angle_y', 'pose_Tx', 'pose_Ty', 'pose_Tz',
       'pose_Rx', 'pose_Ry', 'pose_Rz', 'type_candidat', 'sexe', 'video_name',
       'stress_global', 'stress', 'diapo'],
      dtype='object')

## AGGREGATION FOR VIDEO, PREDICTION ON VIDEO

In [22]:
df_with_deriv = []
for i in list_df_max:
    to_drop = ['frame','face_id','timestamp','confidence','success', 'type_candidat']
    
    i = i.drop(to_drop, axis = 1)
    i = add_derivatives_drop_spatial(i)
    df_with_deriv.append(i)

In [23]:
groupby_features= ['video_name','stress_global','sexe']
df_total = pd.concat(df_with_deriv)
df_total = df_total.drop(['diapo','stress' ], axis = 1)
pd.options.display.max_seq_items = 2000
df_total.columns

Index(['AU01_r', 'AU02_r', 'AU04_r', 'AU05_r', 'AU06_r', 'AU07_r', 'AU09_r',
       'AU10_r', 'AU12_r', 'AU14_r', 'AU15_r', 'AU17_r', 'AU20_r', 'AU23_r',
       'AU25_r', 'AU26_r', 'AU45_r', 'AU01_c', 'AU02_c', 'AU04_c', 'AU05_c',
       'AU06_c', 'AU07_c', 'AU09_c', 'AU10_c', 'AU12_c', 'AU14_c', 'AU15_c',
       'AU17_c', 'AU20_c', 'AU23_c', 'AU25_c', 'AU26_c', 'AU28_c', 'AU45_c',
       'sexe', 'video_name', 'stress_global', 'AU01_r_de', 'AU01_r_de_de',
       'AU02_r_de', 'AU02_r_de_de', 'AU04_r_de', 'AU04_r_de_de', 'AU05_r_de',
       'AU05_r_de_de', 'AU06_r_de', 'AU06_r_de_de', 'AU07_r_de',
       'AU07_r_de_de', 'AU09_r_de', 'AU09_r_de_de', 'AU10_r_de',
       'AU10_r_de_de', 'AU12_r_de', 'AU12_r_de_de', 'AU14_r_de',
       'AU14_r_de_de', 'AU15_r_de', 'AU15_r_de_de', 'AU17_r_de',
       'AU17_r_de_de', 'AU20_r_de', 'AU20_r_de_de', 'AU23_r_de',
       'AU23_r_de_de', 'AU25_r_de', 'AU25_r_de_de', 'AU26_r_de',
       'AU26_r_de_de', 'AU45_r_de', 'AU45_r_de_de', 'gaze_0_x_de',
     

In [24]:
df_total = df_total.groupby(groupby_features).agg(['mean', 'std']).reset_index()
df_total.columns= df_total.columns.map('_'.join).str.strip('_')
df_total[['stress_global']].value_counts()

stress_global
0.0              12
1.0               9
2.0               8
dtype: int64

In [25]:
pd.options.display.max_seq_items = 2000
df_total.columns

Index(['video_name', 'stress_global', 'sexe', 'AU01_r_mean', 'AU01_r_std',
       'AU02_r_mean', 'AU02_r_std', 'AU04_r_mean', 'AU04_r_std', 'AU05_r_mean',
       'AU05_r_std', 'AU06_r_mean', 'AU06_r_std', 'AU07_r_mean', 'AU07_r_std',
       'AU09_r_mean', 'AU09_r_std', 'AU10_r_mean', 'AU10_r_std', 'AU12_r_mean',
       'AU12_r_std', 'AU14_r_mean', 'AU14_r_std', 'AU15_r_mean', 'AU15_r_std',
       'AU17_r_mean', 'AU17_r_std', 'AU20_r_mean', 'AU20_r_std', 'AU23_r_mean',
       'AU23_r_std', 'AU25_r_mean', 'AU25_r_std', 'AU26_r_mean', 'AU26_r_std',
       'AU45_r_mean', 'AU45_r_std', 'AU01_c_mean', 'AU01_c_std', 'AU02_c_mean',
       'AU02_c_std', 'AU04_c_mean', 'AU04_c_std', 'AU05_c_mean', 'AU05_c_std',
       'AU06_c_mean', 'AU06_c_std', 'AU07_c_mean', 'AU07_c_std', 'AU09_c_mean',
       'AU09_c_std', 'AU10_c_mean', 'AU10_c_std', 'AU12_c_mean', 'AU12_c_std',
       'AU14_c_mean', 'AU14_c_std', 'AU15_c_mean', 'AU15_c_std', 'AU17_c_mean',
       'AU17_c_std', 'AU20_c_mean', 'AU20_c_std', 

In [26]:
x = df_total.drop(['video_name','stress_global'], axis = 1)
x.sexe = x.sexe.replace('H',0)
x.sexe = x.sexe.replace('F',1)

y = df_total[['stress_global']]

In [27]:
pd.options.display.max_seq_items = 2000
x.shape

(29, 195)

In [28]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

pca = PCA()
pipe = Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42))])

models_list = [
                LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42),
                Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True))]),
                KNeighborsClassifier(),
                Pipeline(steps=[('pca', pca), ('knn', KNeighborsClassifier())]),
                RandomForestClassifier(random_state = 42, n_jobs=-1)
                ]

parameters_list = [
                    {'C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'class_weight' : [None, 'balanced']},
                    {'pca__n_components': [1, 2, 3, 4],
                        'logistic__C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'logistic__class_weight' : [None, 'balanced']},
                    {'n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'weights' : ['uniform', 'distance'], 'p': [1, 2]},
                    {'pca__n_components': [1, 2, 3, 4],
                        'knn__n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'knn__weights' : ['uniform', 'distance'],                              'knn__p': [1, 2]},
                    {'n_estimators': [50, 100, 150, 200], 'max_depth':[3, 4, 5, 6, 10, 15, 20], 'class_weight':[None,'balanced']}
                    ]



In [30]:
def runGridSearchClassifiers(X, y, cv, models_list, parameters_list, output_predict = True, n_jobs=1, verbose=1):

    

    from sklearn.model_selection import cross_val_predict, GridSearchCV
    from sklearn.metrics import f1_score, accuracy_score
    import types
    import warnings
    warnings.filterwarnings("ignore")
    result_list = []
    best_result = {}
    best_score = 0
    X_no_name = X
    y_no_name = y
    
    if len(models_list) != len(parameters_list):
        print('Error: models and parameters lists do not have the same length', len(models_list), len(parameters_list))
        return -1
    
    if isinstance(cv, types.GeneratorType):
        cv = list(cv)

    for model, parameters in zip(models_list,parameters_list):
        result = {}
        #cv_temp = cv
        clf = GridSearchCV(estimator=model, 
                            param_grid=parameters, 
                            scoring={'accuracy_score' : 'accuracy', 'f1_score' : 'f1_weighted',
                                    'balanced_accuracy_score' : 'balanced_accuracy', 
                                    'precision' : 'precision_weighted', 'recall' : 'recall_weighted'}, 
                            refit='f1_score',
                            cv=cv, n_jobs=n_jobs, verbose=verbose)
        clf.fit(X_no_name, y_no_name)
        result['best_estimator'] = clf.best_estimator_
        result['best_score'] = clf.best_score_
        result['best_params'] = clf.best_params_
        result['mean_test_f1_score'] = clf.cv_results_['mean_test_f1_score'][clf.best_index_]
        result['std_test_f1_score'] = clf.cv_results_['std_test_f1_score'][clf.best_index_]
        result['mean_test_accuracy_score'] = clf.cv_results_['mean_test_accuracy_score'][clf.best_index_]
        result['std_test_accuracy_score'] = clf.cv_results_['std_test_accuracy_score'][clf.best_index_]
        result['mean_test_balanced_accuracy_score'] = clf.cv_results_['mean_test_balanced_accuracy_score'][clf.best_index_]
        result['std_test_balanced_accuracy_score'] = clf.cv_results_['std_test_balanced_accuracy_score'][clf.best_index_]
        result['mean_test_precision'] = clf.cv_results_['mean_test_precision'][clf.best_index_]
        result['std_test_precision'] = clf.cv_results_['std_test_precision'][clf.best_index_]
        result['mean_test_recall'] = clf.cv_results_['mean_test_recall'][clf.best_index_]
        result['std_test_recall'] = clf.cv_results_['std_test_recall'][clf.best_index_]
        result_list.append(result)
        if result['best_score'] > best_score:
            best_score = result['best_score']
            best_result = result

        if verbose:
            print('Best estimator', clf.best_estimator_)
            print('Best results', clf.best_score_)
            print('Best params', clf.best_params_)
            print('accuracy (mean, std)', clf.cv_results_['mean_test_accuracy_score'][clf.best_index_], 
                    clf.cv_results_['std_test_accuracy_score'][clf.best_index_])
            print('f1 (mean, std)', clf.cv_results_['mean_test_f1_score'][clf.best_index_], 
                    clf.cv_results_['std_test_f1_score'][clf.best_index_])
            print('balanced accuracy (mean, std)', clf.cv_results_['mean_test_balanced_accuracy_score'][clf.best_index_],                               clf.cv_results_['std_test_balanced_accuracy_score'][clf.best_index_])
            print('precision (mean, std)', clf.cv_results_['mean_test_precision'][clf.best_index_], 
                    clf.cv_results_['std_test_precision'][clf.best_index_])
            print('recall (mean, std)', clf.cv_results_['mean_test_recall'][clf.best_index_], 
                    clf.cv_results_['std_test_recall'][clf.best_index_])
            print()
    
    if output_predict:
        y_predict = cross_val_predict(best_result['best_estimator'],X_no_name,y_no_name,cv=cv)
        y_predict_proba = ''
        print('f1_score (weighted)',f1_score(y,y_predict, average='weighted'))
        print('accuracy',accuracy_score(y,y_predict))
    else:
        y_predict = None

    return best_result,  y_predict, y_predict_proba, result_list


In [31]:
best_result_glob, y_predict_glob, y_proba, result_list_glob = runGridSearchClassifiers(x, y, 5, models_list, parameters_list,output_predict=True, n_jobs=-1, verbose=True)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best estimator LogisticRegression(C=1, class_weight='balanced', multi_class='multinomial',
                   random_state=42)
Best results 0.5444444444444444
Best params {'C': 1, 'class_weight': 'balanced'}
accuracy (mean, std) 0.5599999999999999 0.15972198067614587
f1 (mean, std) 0.5444444444444444 0.15022616694360505
balanced accuracy (mean, std) 0.5555555555555556 0.12668615834434865
precision (mean, std) 0.5900000000000001 0.18607047649270486
recall (mean, std) 0.5599999999999999 0.15972198067614587

Fitting 5 folds for each of 80 candidates, totalling 400 fits
Best estimator Pipeline(steps=[('pca', PCA(n_components=4)),
                ('logistic',
                 LogisticRegression(C=1, multi_class='multinomial'))])
Best results 0.48965079365079367
Best params {'logistic__C': 1, 'logistic__class_weight': None, 'pca__n_components': 4}
accuracy (mean, std) 0.5199999999999999 0.11274356350191841
f1 (mean, std) 0.4896507

In [32]:
best_result_glob

{'best_estimator': Pipeline(steps=[('pca', PCA(n_components=1)),
                 ('knn', KNeighborsClassifier(p=1))]),
 'best_score': 0.5511111111111111,
 'best_params': {'knn__n_neighbors': 5,
  'knn__p': 1,
  'knn__weights': 'uniform',
  'pca__n_components': 1},
 'mean_test_f1_score': 0.5511111111111111,
 'std_test_f1_score': 0.2935184601004732,
 'mean_test_accuracy_score': 0.6,
 'std_test_accuracy_score': 0.24944382578492943,
 'mean_test_balanced_accuracy_score': 0.5777777777777777,
 'std_test_balanced_accuracy_score': 0.24745619390355653,
 'mean_test_precision': 0.5777777777777777,
 'std_test_precision': 0.3344425987398314,
 'mean_test_recall': 0.6,
 'std_test_recall': 0.24944382578492943}

In [25]:
y_predict_glob

array([1., 1., 1., 1., 0., 2., 0., 2., 1., 0., 0., 0., 0., 0., 1., 0., 0.,
       2., 0., 1., 0., 0., 0., 0., 2., 1., 0., 1., 0.])

In [33]:
## SANS SPATIAL

In [33]:
spatial_features = ['gaze_0_x_de_mean',
       'gaze_0_x_de_std', 'gaze_0_x_de_de_mean', 'gaze_0_x_de_de_std',
       'gaze_0_y_de_mean', 'gaze_0_y_de_std', 'gaze_0_y_de_de_mean',
       'gaze_0_y_de_de_std', 'gaze_0_z_de_mean', 'gaze_0_z_de_std',
       'gaze_0_z_de_de_mean', 'gaze_0_z_de_de_std', 'gaze_1_x_de_mean',
       'gaze_1_x_de_std', 'gaze_1_x_de_de_mean', 'gaze_1_x_de_de_std',
       'gaze_1_y_de_mean', 'gaze_1_y_de_std', 'gaze_1_y_de_de_mean',
       'gaze_1_y_de_de_std', 'gaze_1_z_de_mean', 'gaze_1_z_de_std',
       'gaze_1_z_de_de_mean', 'gaze_1_z_de_de_std', 'gaze_angle_x_de_mean',
       'gaze_angle_x_de_std', 'gaze_angle_x_de_de_mean',
       'gaze_angle_x_de_de_std', 'gaze_angle_y_de_mean', 'gaze_angle_y_de_std',
       'gaze_angle_y_de_de_mean', 'gaze_angle_y_de_de_std', 'pose_Tx_de_mean',
       'pose_Tx_de_std', 'pose_Tx_de_de_mean', 'pose_Tx_de_de_std',
       'pose_Ty_de_mean', 'pose_Ty_de_std', 'pose_Ty_de_de_mean',
       'pose_Ty_de_de_std', 'pose_Tz_de_mean', 'pose_Tz_de_std',
       'pose_Tz_de_de_mean', 'pose_Tz_de_de_std', 'pose_Rx_de_mean',
       'pose_Rx_de_std', 'pose_Rx_de_de_mean', 'pose_Rx_de_de_std',
       'pose_Ry_de_mean', 'pose_Ry_de_std', 'pose_Ry_de_de_mean',
       'pose_Ry_de_de_std', 'pose_Rz_de_mean', 'pose_Rz_de_std',
       'pose_Rz_de_de_mean', 'pose_Rz_de_de_std']

In [36]:
x_only_AUC = x.drop(spatial_features, axis =1)
x_only_AUC.columns

Index(['sexe', 'AU01_r_mean', 'AU01_r_std', 'AU02_r_mean', 'AU02_r_std',
       'AU04_r_mean', 'AU04_r_std', 'AU05_r_mean', 'AU05_r_std', 'AU06_r_mean',
       'AU06_r_std', 'AU07_r_mean', 'AU07_r_std', 'AU09_r_mean', 'AU09_r_std',
       'AU10_r_mean', 'AU10_r_std', 'AU12_r_mean', 'AU12_r_std', 'AU14_r_mean',
       'AU14_r_std', 'AU15_r_mean', 'AU15_r_std', 'AU17_r_mean', 'AU17_r_std',
       'AU20_r_mean', 'AU20_r_std', 'AU23_r_mean', 'AU23_r_std', 'AU25_r_mean',
       'AU25_r_std', 'AU26_r_mean', 'AU26_r_std', 'AU45_r_mean', 'AU45_r_std',
       'AU01_c_mean', 'AU01_c_std', 'AU02_c_mean', 'AU02_c_std', 'AU04_c_mean',
       'AU04_c_std', 'AU05_c_mean', 'AU05_c_std', 'AU06_c_mean', 'AU06_c_std',
       'AU07_c_mean', 'AU07_c_std', 'AU09_c_mean', 'AU09_c_std', 'AU10_c_mean',
       'AU10_c_std', 'AU12_c_mean', 'AU12_c_std', 'AU14_c_mean', 'AU14_c_std',
       'AU15_c_mean', 'AU15_c_std', 'AU17_c_mean', 'AU17_c_std', 'AU20_c_mean',
       'AU20_c_std', 'AU23_c_mean', 'AU23_c_std', 'A

In [37]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import LeaveOneGroupOut
from sklearn.decomposition import PCA
from sklearn.pipeline import Pipeline

pca = PCA()
pipe = Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42))])

models_list = [
                LogisticRegression(multi_class='multinomial', fit_intercept=True, random_state=42),
                Pipeline(steps=[('pca', pca), ('logistic', LogisticRegression(multi_class='multinomial', fit_intercept=True))]),
                KNeighborsClassifier(),
                Pipeline(steps=[('pca', pca), ('knn', KNeighborsClassifier())]),
                RandomForestClassifier(random_state = 42, n_jobs=-1)
                ]

parameters_list = [
                    {'C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'class_weight' : [None, 'balanced']},
                    {'pca__n_components': [1, 2, 3, 4],
                        'logistic__C': [0.01, 0.05, 0.1, 0.5, 1, 2, 3, 4 , 5, 10], 'logistic__class_weight' : [None, 'balanced']},
                    {'n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'weights' : ['uniform', 'distance'], 'p': [1, 2]},
                    {'pca__n_components': [1, 2, 3, 4],
                        'knn__n_neighbors': [4, 5, 6, 7, 8, 9, 10, 11, 12,  15, 20], 'knn__weights' : ['uniform', 'distance'],                              'knn__p': [1, 2]},
                    {'n_estimators': [50, 100, 150, 200], 'max_depth':[3, 4, 5, 6, 10, 15, 20], 'class_weight':[None,'balanced']}
                    ]


In [38]:
best_result_glob_au, y_predict_glob_au, y_proba, result_list_glob_au = runGridSearchClassifiers(x_only_AUC, y, 5, models_list, parameters_list,output_predict=True, n_jobs=-1, verbose=True)

Fitting 5 folds for each of 20 candidates, totalling 100 fits
Best estimator LogisticRegression(C=0.5, multi_class='multinomial', random_state=42)
Best results 0.5222222222222223
Best params {'C': 0.5, 'class_weight': None}
accuracy (mean, std) 0.52 0.1869046102516825
f1 (mean, std) 0.5222222222222223 0.18973665961010278
balanced accuracy (mean, std) 0.4666666666666667 0.19436506316151003
precision (mean, std) 0.5700000000000001 0.21162510528097836
recall (mean, std) 0.52 0.1869046102516825

Fitting 5 folds for each of 80 candidates, totalling 400 fits
Best estimator Pipeline(steps=[('pca', PCA(n_components=3)),
                ('logistic',
                 LogisticRegression(C=1, multi_class='multinomial'))])
Best results 0.4337777777777777
Best params {'logistic__C': 1, 'logistic__class_weight': None, 'pca__n_components': 3}
accuracy (mean, std) 0.44666666666666666 0.1641137816678823
f1 (mean, std) 0.4337777777777777 0.174951986358727
balanced accuracy (mean, std) 0.4222222222222222 

In [39]:
best_result_glob_au

{'best_estimator': LogisticRegression(C=0.5, multi_class='multinomial', random_state=42),
 'best_score': 0.5222222222222223,
 'best_params': {'C': 0.5, 'class_weight': None},
 'mean_test_f1_score': 0.5222222222222223,
 'std_test_f1_score': 0.18973665961010278,
 'mean_test_accuracy_score': 0.52,
 'std_test_accuracy_score': 0.1869046102516825,
 'mean_test_balanced_accuracy_score': 0.4666666666666667,
 'std_test_balanced_accuracy_score': 0.19436506316151003,
 'mean_test_precision': 0.5700000000000001,
 'std_test_precision': 0.21162510528097836,
 'mean_test_recall': 0.52,
 'std_test_recall': 0.1869046102516825}

In [40]:
y_predict_glob_au

array([2., 0., 1., 0., 0., 1., 2., 2., 0., 0., 1., 1., 1., 1., 2., 0., 0.,
       1., 1., 1., 0., 2., 0., 0., 0., 1., 0., 2., 0.])