## Apply DAiSEE and EmotiW on conventional Machine Learning

In [1]:
import pickle
import pandas as pd
import numpy as np

from sklearn.preprocessing import StandardScaler, LabelEncoder
from sklearn.pipeline import Pipeline
from sklearn.ensemble import RandomForestRegressor
from xgboost import XGBRegressor
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsRegressor
from sklearn.tree import DecisionTreeRegressor
from sklearn.svm import SVR
from sklearn.naive_bayes import MultinomialNB, GaussianNB
from sklearn.metrics import mean_absolute_error, mean_squared_error, accuracy_score
from keras.utils import to_categorical
from sklearn.model_selection import KFold, cross_val_score, cross_validate
from sklearn.decomposition import PCA, KernelPCA

import matplotlib.pyplot as plt
import seaborn as sns

Using TensorFlow backend.


In [2]:
# INPUT PATH
path = ('C:/Users/hasegawa-lab-pc/OneDrive - Japan Advanced Institute of Science and Technology/Documents/Exp_Shofi/Preprocess and Statistical Analysis/extracted/')

# DAISEE
with open(path+'daisee_28_186.pkl','rb') as f:
    X_train, X_val, X_test, y_train, y_val, y_test = pickle.load(f)

# EMOTIW2018
with open(path+'emotiw_31_56.pkl','rb') as f:
    X, y = pickle.load(f)

# Averaged-data (DAISEE)
def load_data(datafile):
    df = pd.read_csv(datafile)
    df = df.drop(df.columns[0], axis=1)
    return df

path_av = ('C:/Users/hasegawa-lab-pc/OneDrive - Japan Advanced Institute of Science and Technology/Documents/Dataset/DAiSEE/OpenFace_2.2.0_win_x64/processed/csv/labels/')
df_train_av = load_data(path_av+'average_train.csv')
df_val_av = load_data(path_av+'average_val.csv')
df_test_av = load_data(path_av+'average_test.csv')
df_all_av = pd.concat([df_train_av,df_val_av,df_test_av], axis=0, ignore_index=True)

df_emotiw = load_data(path+'average_emotiw.csv')
# df_emotiw.head()

# SPLIT features and Label
def split_label(df):
    df = np.array(df)
    Y = df[:,-1]
    Y_en = LabelEncoder().fit_transform(Y) #encode label value as label variable
    Y_cat = to_categorical(Y)
    X = df[:,0:-1]    
    return X, Y_en, Y_cat

X_train_av, y_train_av, y_train_av_cat = split_label(df_train_av)
X_val_av, y_val_av, y_val_av_cat = split_label(df_val_av)
X_test_av, y_test_av, y_test_av_cat = split_label(df_test_av)
X_all_av, Y_all_av, Y_all_av_cat = split_label(df_all_av)
X_emotiw, y_emotiw, y_emotiw_cat = split_label(df_emotiw)

### PLOTS


In [5]:
sns.set_context('talk')
sns.pairplot(df_test_av, hue='Engagement')

In [3]:
df_all_av.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8571 entries, 0 to 8570
Columns: 330 entries,  gaze_0_x to Engagement
dtypes: float64(330)
memory usage: 21.6 MB


### Regression

In [None]:
# REGRESSION models
model_1 = RandomForestRegressor(n_estimators=100, random_state=0)
model_2 = XGBRegressor(random_state=0, learning_rate=0.05)
model_3 = KNeighborsRegressor()
model_4 = DecisionTreeRegressor()
model_5 = SVR()
model_6 = GaussianNB()

# CLASSIFICATION models
model_1c = LogisticRegression()

In [None]:
# PRE-Processing
preprocess = Pipeline([('scaled', StandardScaler()), ('pca', PCA(n_components=90, whiten=True))])
preprocessKPCA = Pipeline([('pca', KernelPCA(n_components=90)),('scaled', StandardScaler())])

# https://www.kaggle.com/rakesh2711/multiple-models-using-pipeline/notebook
pipe = []
pipe.append(('scaledRFG', (Pipeline([('scaled', StandardScaler()),('RGB', model_1)]))))
pipe.append(('scaled_pca_RFG', (Pipeline([('preprocess', preprocess),('RGB', model_1)]))))
pipe.append(('kpca_scaled_RFG', (Pipeline([('preprocessKPCA', preprocess),('RGB', model_1)]))))

pipe.append(('scaledXGBR', (Pipeline([('scaled', StandardScaler()),('XGBR', model_2)]))))
pipe.append(('scaled_pca_XGBR', (Pipeline([('preprocess', preprocess),('XGBR', model_2)]))))
pipe.append(('kpca_scaled_XGBR', (Pipeline([('preprocessKPCA', preprocess),('XGBR', model_2)]))))

pipe.append(('scaledKNN', (Pipeline([('scaled', StandardScaler()),('KNN', model_3)]))))
pipe.append(('scaled_pca_KNN', (Pipeline([('preprocess', preprocess),('KNN', model_3)]))))
pipe.append(('kpca_scaled_KNN', (Pipeline([('preprocessKPCA', preprocess),('KNN', model_3)]))))

pipe.append(('scaledDT', (Pipeline([('scaled', StandardScaler()),('DT', model_4)]))))
pipe.append(('scaled_pca_DT', (Pipeline([('preprocess', preprocess),('DT', model_4)]))))
pipe.append(('kpca_scaled_DT', (Pipeline([('preprocessKPCA', preprocess),('DT', model_4)]))))

pipe.append(('scaledSVR', (Pipeline([('scaled', StandardScaler()),('SVR', model_5)]))))
pipe.append(('scaled_pca_SVR', (Pipeline([('preprocess', preprocess),('SVR', model_5)]))))
pipe.append(('kpca_scaled_SVR', (Pipeline([('preprocessKPCA', preprocess),('SVR', model_5)]))))

pipe.append(('scaledGNB', (Pipeline([('scaled', StandardScaler()),('GNB', model_6)]))))
pipe.append(('scaled_pca_GNB', (Pipeline([('preprocess', preprocess),('GNB', model_6)]))))
pipe.append(('kpca_scaled_GNB', (Pipeline([('preprocessKPCA', preprocess),('GNB', model_6)]))))
# # pipe.append(('scaledLR', (Pipeline([('scaled', StandardScaler()), ('LR', model_1c)]))))
# pipe.append(('scaled_pca_LR', (Pipeline([('preprocess', preprocess),('LR', model_1c)]))))

In [None]:
def train_kf(X,Y):
    model_name = []
    results = []
    # results_acc = []

    for pipes, model in pipe:
        kf = KFold(n_splits=5)
        cv_result = -1 * cross_val_score(model, X, Y, cv=kf, scoring='neg_mean_squared_error')
        # cv_result = cross_val_score(model, X_all_av, Y_all_av, cv=kf, scoring='accuracy')

        results.append(cv_result)
        model_name.append(pipes)
        msg = "%s: %f: (%f)" % (model_name, cv_result.mean(), cv_result.std())
        print(msg)  

    # PLOT Algorithm comparison
    fig = plt.figure(figsize=(15,5))
    fig.suptitle('Regression Algorithm Comparison (5-Fold CrossValidation)')
    ax = fig.add_subplot(111)
    plt.boxplot(results)
    plt.xlabel('Model')
    plt.ylabel('Mean Squared Error (MSE)')
    ax.set_xticklabels(model_name)
    plt.xticks(rotation=70)
    plt.show()

    return model_name, results

In [None]:
model_name, results = train_kf(X_all_av,Y_all_av)

In [None]:
emotiw_model_name, emotiw_results = train_kf(X_emotiw, y_emotiw)