In [67]:
import pandas as pd
import numpy as np
import tdml
from imblearn.under_sampling import RandomUnderSampler

import os
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier
from sklearn.tree import DecisionTreeClassifier
from sklearn.model_selection import train_test_split
import xgboost
from xgboost import XGBClassifier
import xlsxwriter
from imblearn.over_sampling import SMOTE

In [54]:
df = pd.read_csv('em_feat_0510.csv').dropna()
df.columns


Index(['Angry', 'Disgust', 'Scared', 'Happy', 'Sad', 'Surprised', 'Neutral',
       'eda_mean', 'eda_min', 'eda_max', 'eda_std', 'eda_kurtosis', 'eda_skew',
       'eda_num_peaks', 'eda_amphitude', 'eda_duration', 'hr_mean', 'hr_min',
       'hr_max', 'hr_std', 'hr_rms', 'hr_num_peaks', 'hr_amphitude',
       'hr_duration', 'temp_mean', 'temp_min', 'temp_max', 'temp_mtd',
       'stress', 'user'],
      dtype='object')

In [55]:
bio_df = df[['eda_mean', 'eda_min', 'eda_max', 'eda_std', 'eda_kurtosis', 'eda_skew',
       'eda_num_peaks', 'eda_amphitude', 'eda_duration', 'hr_mean', 'hr_min',
       'hr_max', 'hr_std', 'hr_rms', 'hr_num_peaks', 'hr_amphitude',
       'hr_duration', 'temp_mean', 'temp_min', 'temp_max', 'temp_mtd',
       'stress', 'user']]

In [56]:
emo_df = df[['Angry', 'Disgust', 'Scared', 'Happy', 'Sad',
             'Surprised', 'Neutral','stress', 'user']]
user_list=df.user.unique()
writer = pd.ExcelWriter('results.xlsx',engine='xlsxwriter')

pd.DataFrame(user_list,columns=['user_list']).to_excel(writer,sheet_name = "results.xlsx", index =False)  
writer.save()


In [57]:
def results2excel(rf_accuracy, dt_accuracy, xgb_accuracy, user_list, sheet_name):
    rf, dt, xg = pd.DataFrame(rf_accuracy), pd.DataFrame(dt_accuracy), pd.DataFrame(xgb_accuracy)
    
    user_list = pd.DataFrame(user_list)

    results = pd.DataFrame()
    results = pd.concat([user_list,rf,dt,et,xg],axis=1)
    results.columns = ['user','random forest','decision tree','extra trees','xgboost']
    
    with pd.ExcelWriter('results.xlsx', engine='openpyxl', mode='a') as writer:
        results.to_excel(writer, sheet_name = sheet_name)

        writer.save()


In [61]:
def machine_learning(dataframe, sheet_name):

    xgb_accuracy, et_accuracy, dt_accuracy, rf_accuracy = [], [], [], []
    user_list = []
    for user in dataframe.user.unique():

        user_list.append(user)

        
        train_set = dataframe[dataframe['user'] != user]
        su = SMOTE(random_state=42)
        X_train, y_train = su.fit_resample(train_set.drop(columns= ['user','stress']), train_set['stress'])
        
        test_set = dataframe[dataframe['user'] == user]
        
        rf = RandomForestClassifier(n_estimators = 100, max_depth=5, min_samples_leaf=5,random_state = 123)
        rf.fit(X_train, y_train)
        predictions = rf.predict(test_set.drop(columns=['user','stress']))
        rf_accuracy.append(accuracy_score(predictions,test_set['stress']))
            
        dt =DecisionTreeClassifier(random_state=123)
        dt.fit(X_train, y_train)
        predictions = dt.predict(test_set.drop(columns=['user','stress']))
        dt_accuracy.append(accuracy_score(predictions,test_set['stress']))

        xgb = XGBClassifier()
        xgb.fit(X_train, y_train)
        predictions = xgb.predict(test_set.drop(columns=['user','stress']))
        xgb_accuracy.append(accuracy_score(predictions,test_set['stress']))

    results2excel(rf_accuracy,dt_accuracy,et_accuracy,xgb_accuracy,user_list,sheet_name)    

In [None]:
machine_learning(emo_df,'Emotions')


In [None]:
machine_learning(bio_df,'biometrics')


In [None]:
machine_learning(df,'full-set')


In [10]:
from scipy.stats import pearsonr

ndf = df.drop(columns='user')
pvals = pd.DataFrame([pearsonr(ndf[c], ndf['stress'])[1] for c in ndf.columns],
                     index=ndf.columns)
pvals


Unnamed: 0,0
Angry,1.051365e-15
Disgust,4.337172e-10
Scared,0.1663046
Happy,2.162209e-27
Sad,6.355232e-06
Surprised,0.5021822
Neutral,0.0001105051
eda_mean,4.7155659999999995e-20
eda_min,3.384631e-20
eda_max,6.160898e-20


In [65]:
new_df = df[[ 'Surprised', 'Neutral',
       'eda_kurtosis', 'hr_std', 'hr_rms', 'hr_num_peaks', 'hr_amphitude',
       'hr_duration', 'stress', 'user']] 

In [None]:
machine_learning(new_df,'Pearsons')