# Script to build thermal comfort model with groups of participants

In [1]:
import pickle
import pandas as pd
from datetime import datetime

from modeling_functions import *


# Variables

In [2]:
seed = 13
dataframes_names = ['fs1', 'fs2', 'fs3', 'fs4', 'fs5', 'fs6']
folder_path = 'data-processed-preferences/'
file_date = '2019-11-15'

# load any of the dataset just to get the complete list of participants
dataframe_names = pd.read_csv('../' + folder_path + file_date + '_'  + dataframes_names[0] + ".csv")
participant_list = dataframe_names['user_id'].unique()
print("Available participants: \n{}".format(participant_list))


Available participants: 
['cresh07' 'cresh10' 'cresh08' 'cresh12' 'cresh09' 'cresh06' 'cresh02'
 'cresh13' 'cresh15' 'cresh03' 'cresh14' 'cresh11' 'cresh05' 'cresh01'
 'cresh04' 'cresh22' 'cresh16' 'cresh19' 'cresh21' 'cresh26' 'cresh29'
 'cresh27' 'cresh23' 'cresh20' 'cresh18' 'cresh30' 'cresh17' 'cresh24'
 'cresh28' 'cresh25']


# FS1: Time + Sensing

In [3]:
df_file = folder_path + file_date + "_" +  dataframes_names[0]
print("Loading files from: {}".format(df_file))


Loading files from: data-processed-preferences/2019-11-15_fs1


In [4]:
df_train = pd.DataFrame()
df_val = pd.DataFrame()

for participant in participant_list:
    df_train_participant = pd.read_csv("../" + df_file + "_train_" + participant + ".csv")
    df_val_participant = pd.read_csv("../" + df_file + "_val_" + participant + ".csv")
    
    df_train = df_train.append(df_train_participant, ignore_index = True)
    df_val = df_val.append(df_val_participant, ignore_index = True)

# drop userid
df_train.drop('user_id', axis=1, inplace=True)
df_val.drop('user_id', axis=1, inplace=True)

print(df_train.columns.values)


['light_cozie' 'noise_cozie' 'humidity_sensing' 'light_sensing'
 'noise_sensing' 'temperature_sensing' 'hour_sin' 'hour_cos'
 'day_of_week_sin' 'day_of_week_cos' 'thermal_cozie']


## Thermal Comfort Prediction

In [5]:
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_1_thermal, macro_rf_1_thermal, _ = model_validate(df_train, df_val, clf)


Accuracy (f1 micro) on validation set: 0.5823429541595926
F1 micro on validation set: 0.5823429541595926
F1 macro on validation set: 0.39070972509983687
Confusion Matrix: 
[[  7  62  12]
 [ 20 296  49]
 [  9  94  40]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.19      0.09      0.12        81
        10.0       0.65      0.81      0.72       365
        11.0       0.40      0.28      0.33       143

    accuracy                           0.58       589
   macro avg       0.42      0.39      0.39       589
weighted avg       0.53      0.58      0.55       589



## Light Comfort Prediction

In [6]:
# move light response to the end
df_aux = df_train.pop('light_cozie')
df_train['light_cozie'] = df_aux

df_aux = df_val.pop('light_cozie')
df_val['light_cozie'] = df_aux

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_1_light, macro_rf_1_light, _ = model_validate(df_train, df_val, clf)


Accuracy (f1 micro) on validation set: 0.7640067911714771
F1 micro on validation set: 0.7640067911714772
F1 macro on validation set: 0.4053285070234223
Confusion Matrix: 
[[ 17  54   7]
 [ 19 431   7]
 [  4  48   2]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.42      0.22      0.29        78
        10.0       0.81      0.94      0.87       457
        11.0       0.12      0.04      0.06        54

    accuracy                           0.76       589
   macro avg       0.45      0.40      0.41       589
weighted avg       0.70      0.76      0.72       589



## Aural Comfort Prediction

In [7]:
# move light response to the end
df_aux = df_train.pop('noise_cozie')
df_train['noise_cozie'] = df_aux

df_aux = df_val.pop('noise_cozie')
df_val['noise_cozie'] = df_aux

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_1_aural, macro_rf_1_aural, _ = model_validate(df_train, df_val, clf)


Accuracy (f1 micro) on validation set: 0.8573853989813243
F1 micro on validation set: 0.8573853989813243
F1 macro on validation set: 0.6677143778207608
Confusion Matrix: 
[[475  24]
 [ 60  30]]
Classification Metrics: 
              precision    recall  f1-score   support

        10.0       0.89      0.95      0.92       499
        11.0       0.56      0.33      0.42        90

    accuracy                           0.86       589
   macro avg       0.72      0.64      0.67       589
weighted avg       0.84      0.86      0.84       589



# FS2: Time + Sensing + Heart Rate + mbient

In [8]:
df_file = folder_path + file_date + "_" +  dataframes_names[1]
print("Loading files from: {}".format(df_file))


Loading files from: data-processed-preferences/2019-11-15_fs2


In [9]:
df_train = pd.DataFrame()
df_val = pd.DataFrame()

for participant in participant_list:
    df_train_participant = pd.read_csv("../" + df_file + "_train_" + participant + ".csv")
    df_val_participant = pd.read_csv("../" + df_file + "_val_" + participant + ".csv")
    
    df_train = df_train.append(df_train_participant, ignore_index = True)
    df_val = df_val.append(df_val_participant, ignore_index = True)

# drop userid
df_train.drop('user_id', axis=1, inplace=True)
df_val.drop('user_id', axis=1, inplace=True)

print(df_train.columns.values)


['heartRate_cozie' 'light_cozie' 'noise_cozie' 'humidity_sensing'
 'light_sensing' 'noise_sensing' 'temperature_sensing'
 'temperature_mbient' 'hour_sin' 'hour_cos' 'day_of_week_sin'
 'day_of_week_cos' 'thermal_cozie']


## Thermal Comfort Prediction

In [10]:
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_2_thermal, macro_rf_2_thermal, _ = model_validate(df_train, df_val, clf)


Accuracy (f1 micro) on validation set: 0.6451612903225806
F1 micro on validation set: 0.6451612903225806
F1 macro on validation set: 0.4545053409204353
Confusion Matrix: 
[[ 10  65   6]
 [ 11 324  30]
 [  3  94  46]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.42      0.12      0.19        81
        10.0       0.67      0.89      0.76       365
        11.0       0.56      0.32      0.41       143

    accuracy                           0.65       589
   macro avg       0.55      0.44      0.45       589
weighted avg       0.61      0.65      0.60       589



## Light Comfort Prediction

In [11]:
# move light response to the end
df_aux = df_train.pop('light_cozie')
df_train['light_cozie'] = df_aux

df_aux = df_val.pop('light_cozie')
df_val['light_cozie'] = df_aux

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_2_light, macro_rf_2_light, _ = model_validate(df_train, df_val, clf)


Accuracy (f1 micro) on validation set: 0.7741935483870968
F1 micro on validation set: 0.7741935483870968
F1 macro on validation set: 0.3711119626512445
Confusion Matrix: 
[[ 10  63   5]
 [  6 445   6]
 [  3  50   1]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.53      0.13      0.21        78
        10.0       0.80      0.97      0.88       457
        11.0       0.08      0.02      0.03        54

    accuracy                           0.77       589
   macro avg       0.47      0.37      0.37       589
weighted avg       0.70      0.77      0.71       589



## Aural Comfort Prediction

In [12]:
# move light response to the end
df_aux = df_train.pop('noise_cozie')
df_train['noise_cozie'] = df_aux

df_aux = df_val.pop('noise_cozie')
df_val['noise_cozie'] = df_aux

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_2_aural, macro_rf_2_aural, _ = model_validate(df_train, df_val, clf)


Accuracy (f1 micro) on validation set: 0.8573853989813243
F1 micro on validation set: 0.8573853989813243
F1 macro on validation set: 0.6508693688607881
Confusion Matrix: 
[[479  20]
 [ 64  26]]
Classification Metrics: 
              precision    recall  f1-score   support

        10.0       0.88      0.96      0.92       499
        11.0       0.57      0.29      0.38        90

    accuracy                           0.86       589
   macro avg       0.72      0.62      0.65       589
weighted avg       0.83      0.86      0.84       589



# FS3: Time + Sensing + Heart Rate + mbient + room + preference history

In [13]:
df_file = folder_path + file_date + "_" +  dataframes_names[2]
print("Loading files from: {}".format(df_file))


Loading files from: data-processed-preferences/2019-11-15_fs3


In [14]:
df_train = pd.DataFrame()
df_val = pd.DataFrame()

for participant in participant_list:
    df_train_participant = pd.read_csv("../" + df_file + "_train_" + participant + ".csv")
    df_val_participant = pd.read_csv("../" + df_file + "_val_" + participant + ".csv")
    
    df_train = df_train.append(df_train_participant, ignore_index = True)
    df_val = df_val.append(df_val_participant, ignore_index = True)

# drop userid
df_train.drop('user_id', axis=1, inplace=True)
df_val.drop('user_id', axis=1, inplace=True)

# drop room
df_train.drop('room', axis=1, inplace=True)
df_val.drop('room', axis=1, inplace=True)

print(df_train.columns.values)


['heartRate_cozie' 'light_cozie' 'noise_cozie' 'humidity_sensing'
 'light_sensing' 'noise_sensing' 'temperature_sensing'
 'temperature_mbient' 'hour_sin' 'hour_cos' 'day_of_week_sin'
 'day_of_week_cos' 'user_grouped_cooler' 'user_grouped_warmer'
 'user_grouped_dimmer' 'user_grouped_brighter' 'user_grouped_quieter'
 'user_grouped_louder' 'room_grouped_cooler' 'room_grouped_warmer'
 'room_grouped_dimmer' 'room_grouped_brighter' 'room_grouped_quieter'
 'room_grouped_louder' 'thermal_cozie']


## Thermal Comfort Prediction

In [15]:
df_train_thermal = df_train.copy()
df_val_thermal = df_val.copy()

# drop other preferences
other_preferences = ['user_grouped_dimmer', 'user_grouped_brighter', 'user_grouped_quieter', 'user_grouped_louder',
                     'room_grouped_dimmer', 'room_grouped_brighter', 'room_grouped_quieter', 'room_grouped_louder']

df_train_thermal.drop(other_preferences, axis=1, inplace=True)
df_val_thermal.drop(other_preferences, axis=1, inplace=True)

print(df_train_thermal.columns.values)

clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_3_thermal, macro_rf_3_thermal, _ = model_validate(df_train_thermal, df_val_thermal, clf)


['heartRate_cozie' 'light_cozie' 'noise_cozie' 'humidity_sensing'
 'light_sensing' 'noise_sensing' 'temperature_sensing'
 'temperature_mbient' 'hour_sin' 'hour_cos' 'day_of_week_sin'
 'day_of_week_cos' 'user_grouped_cooler' 'user_grouped_warmer'
 'room_grouped_cooler' 'room_grouped_warmer' 'thermal_cozie']
Accuracy (f1 micro) on validation set: 0.7028862478777589
F1 micro on validation set: 0.7028862478777589
F1 macro on validation set: 0.5473951577299011
Confusion Matrix: 
[[ 15  63   3]
 [ 17 332  16]
 [  2  74  67]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.44      0.19      0.26        81
        10.0       0.71      0.91      0.80       365
        11.0       0.78      0.47      0.59       143

    accuracy                           0.70       589
   macro avg       0.64      0.52      0.55       589
weighted avg       0.69      0.70      0.67       589



## Light Comfort Prediction

In [16]:
df_train_light = df_train.copy()
df_val_light = df_val.copy()

# move light response to the end
df_aux = df_train_light.pop('light_cozie')
df_train_light['light_cozie'] = df_aux

df_aux = df_val_light.pop('light_cozie')
df_val_light['light_cozie'] = df_aux

# drop other preferences
other_preferences = ['user_grouped_cooler', 'user_grouped_warmer', 'user_grouped_quieter', 'user_grouped_louder',
                     'room_grouped_cooler', 'room_grouped_warmer', 'room_grouped_quieter', 'room_grouped_louder']

df_train_light.drop(other_preferences, axis=1, inplace=True)
df_val_light.drop(other_preferences, axis=1, inplace=True)

print(df_train_light.columns.values)

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_3_light, macro_rf_3_light, _ = model_validate(df_train_light, df_val_light, clf)


['heartRate_cozie' 'noise_cozie' 'humidity_sensing' 'light_sensing'
 'noise_sensing' 'temperature_sensing' 'temperature_mbient' 'hour_sin'
 'hour_cos' 'day_of_week_sin' 'day_of_week_cos' 'user_grouped_dimmer'
 'user_grouped_brighter' 'room_grouped_dimmer' 'room_grouped_brighter'
 'thermal_cozie' 'light_cozie']
Accuracy (f1 micro) on validation set: 0.7962648556876061
F1 micro on validation set: 0.7962648556876061
F1 macro on validation set: 0.4752104739752728
Confusion Matrix: 
[[ 25  53   0]
 [ 16 440   1]
 [  4  46   4]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.56      0.32      0.41        78
        10.0       0.82      0.96      0.88       457
        11.0       0.80      0.07      0.14        54

    accuracy                           0.80       589
   macro avg       0.72      0.45      0.48       589
weighted avg       0.78      0.80      0.75       589



## Aural Comfort Prediction

In [17]:
df_train_aural = df_train.copy()
df_val_aural = df_val.copy()

# move light response to the end
df_aux = df_train_aural.pop('noise_cozie')
df_train_aural['noise_cozie'] = df_aux

df_aux = df_val_aural.pop('noise_cozie')
df_val_aural['noise_cozie'] = df_aux

# drop other preferences
other_preferences = ['user_grouped_cooler', 'user_grouped_warmer', 'user_grouped_dimmer', 'user_grouped_brighter',
                     'room_grouped_cooler', 'room_grouped_warmer', 'room_grouped_dimmer', 'room_grouped_brighter']

df_train_aural.drop(other_preferences, axis=1, inplace=True)
df_val_aural.drop(other_preferences, axis=1, inplace=True)

print(df_train_aural.columns.values)

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_3_aural, macro_rf_3_aural, _ = model_validate(df_train_aural, df_val_aural, clf)


['heartRate_cozie' 'light_cozie' 'humidity_sensing' 'light_sensing'
 'noise_sensing' 'temperature_sensing' 'temperature_mbient' 'hour_sin'
 'hour_cos' 'day_of_week_sin' 'day_of_week_cos' 'user_grouped_quieter'
 'user_grouped_louder' 'room_grouped_quieter' 'room_grouped_louder'
 'thermal_cozie' 'noise_cozie']
Accuracy (f1 micro) on validation set: 0.8794567062818336
F1 micro on validation set: 0.8794567062818336
F1 macro on validation set: 0.7445965175861289
Confusion Matrix: 
[[473  26]
 [ 45  45]]
Classification Metrics: 
              precision    recall  f1-score   support

        10.0       0.91      0.95      0.93       499
        11.0       0.63      0.50      0.56        90

    accuracy                           0.88       589
   macro avg       0.77      0.72      0.74       589
weighted avg       0.87      0.88      0.87       589



# FS4: Time + Heart Rate + mbient + room + preference history

In [18]:
df_file = folder_path + file_date + "_" +  dataframes_names[3]
print("Loading files from: {}".format(df_file))


Loading files from: data-processed-preferences/2019-11-15_fs4


In [19]:
df_train = pd.DataFrame()
df_val = pd.DataFrame()

for participant in participant_list:
    df_train_participant = pd.read_csv("../" + df_file + "_train_" + participant + ".csv")
    df_val_participant = pd.read_csv("../" + df_file + "_val_" + participant + ".csv")
    
    df_train = df_train.append(df_train_participant, ignore_index = True)
    df_val = df_val.append(df_val_participant, ignore_index = True)

# drop userid
df_train.drop('user_id', axis=1, inplace=True)
df_val.drop('user_id', axis=1, inplace=True)

# drop room
df_train.drop('room', axis=1, inplace=True)
df_val.drop('room', axis=1, inplace=True)

print(df_train.columns.values)


['heartRate_cozie' 'light_cozie' 'noise_cozie' 'temperature_mbient'
 'hour_sin' 'hour_cos' 'day_of_week_sin' 'day_of_week_cos'
 'user_grouped_cooler' 'user_grouped_warmer' 'user_grouped_dimmer'
 'user_grouped_brighter' 'user_grouped_quieter' 'user_grouped_louder'
 'room_grouped_cooler' 'room_grouped_warmer' 'room_grouped_dimmer'
 'room_grouped_brighter' 'room_grouped_quieter' 'room_grouped_louder'
 'thermal_cozie']


## Thermal Comfort Prediction

In [20]:
df_train_thermal = df_train.copy()
df_val_thermal = df_val.copy()

# drop other preferences
other_preferences = ['user_grouped_dimmer', 'user_grouped_brighter', 'user_grouped_quieter', 'user_grouped_louder',
                     'room_grouped_dimmer', 'room_grouped_brighter', 'room_grouped_quieter', 'room_grouped_louder']

df_train_thermal.drop(other_preferences, axis=1, inplace=True)
df_val_thermal.drop(other_preferences, axis=1, inplace=True)

print(df_train_thermal.columns.values)

clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_4_thermal, macro_rf_4_thermal, _ = model_validate(df_train_thermal, df_val_thermal, clf)


['heartRate_cozie' 'light_cozie' 'noise_cozie' 'temperature_mbient'
 'hour_sin' 'hour_cos' 'day_of_week_sin' 'day_of_week_cos'
 'user_grouped_cooler' 'user_grouped_warmer' 'room_grouped_cooler'
 'room_grouped_warmer' 'thermal_cozie']
Accuracy (f1 micro) on validation set: 0.7198641765704584
F1 micro on validation set: 0.7198641765704584
F1 macro on validation set: 0.5714293556684861
Confusion Matrix: 
[[ 18  58   5]
 [ 12 338  15]
 [  4  71  68]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.53      0.22      0.31        81
        10.0       0.72      0.93      0.81       365
        11.0       0.77      0.48      0.59       143

    accuracy                           0.72       589
   macro avg       0.68      0.54      0.57       589
weighted avg       0.71      0.72      0.69       589



## Light Comfort Prediction

In [21]:
df_train_light = df_train.copy()
df_val_light = df_val.copy()

# move light response to the end
df_aux = df_train_light.pop('light_cozie')
df_train_light['light_cozie'] = df_aux

df_aux = df_val_light.pop('light_cozie')
df_val_light['light_cozie'] = df_aux

# drop other preferences
other_preferences = ['user_grouped_cooler', 'user_grouped_warmer', 'user_grouped_quieter', 'user_grouped_louder',
                     'room_grouped_cooler', 'room_grouped_warmer', 'room_grouped_quieter', 'room_grouped_louder']

df_train_light.drop(other_preferences, axis=1, inplace=True)
df_val_light.drop(other_preferences, axis=1, inplace=True)

print(df_train_light.columns.values)

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_4_light, macro_rf_4_light, _ = model_validate(df_train_light, df_val_light, clf)


['heartRate_cozie' 'noise_cozie' 'temperature_mbient' 'hour_sin'
 'hour_cos' 'day_of_week_sin' 'day_of_week_cos' 'user_grouped_dimmer'
 'user_grouped_brighter' 'room_grouped_dimmer' 'room_grouped_brighter'
 'thermal_cozie' 'light_cozie']
Accuracy (f1 micro) on validation set: 0.7860780984719864
F1 micro on validation set: 0.7860780984719865
F1 macro on validation set: 0.4833816589914151
Confusion Matrix: 
[[ 20  58   0]
 [ 19 435   3]
 [  6  40   8]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.44      0.26      0.33        78
        10.0       0.82      0.95      0.88       457
        11.0       0.73      0.15      0.25        54

    accuracy                           0.79       589
   macro avg       0.66      0.45      0.48       589
weighted avg       0.76      0.79      0.75       589



## Aural Comfort Prediction

In [22]:
df_train_aural = df_train.copy()
df_val_aural = df_val.copy()

# move light response to the end
df_aux = df_train_aural.pop('noise_cozie')
df_train_aural['noise_cozie'] = df_aux

df_aux = df_val_aural.pop('noise_cozie')
df_val_aural['noise_cozie'] = df_aux

# drop other preferences
other_preferences = ['user_grouped_cooler', 'user_grouped_warmer', 'user_grouped_dimmer', 'user_grouped_brighter',
                     'room_grouped_cooler', 'room_grouped_warmer', 'room_grouped_dimmer', 'room_grouped_brighter']

df_train_aural.drop(other_preferences, axis=1, inplace=True)
df_val_aural.drop(other_preferences, axis=1, inplace=True)

print(df_train_aural.columns.values)

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_4_aural, macro_rf_4_aural, _ = model_validate(df_train_aural, df_val_aural, clf)


['heartRate_cozie' 'light_cozie' 'temperature_mbient' 'hour_sin'
 'hour_cos' 'day_of_week_sin' 'day_of_week_cos' 'user_grouped_quieter'
 'user_grouped_louder' 'room_grouped_quieter' 'room_grouped_louder'
 'thermal_cozie' 'noise_cozie']
Accuracy (f1 micro) on validation set: 0.8743633276740238
F1 micro on validation set: 0.8743633276740239
F1 macro on validation set: 0.7205165692007798
Confusion Matrix: 
[[476  23]
 [ 51  39]]
Classification Metrics: 
              precision    recall  f1-score   support

        10.0       0.90      0.95      0.93       499
        11.0       0.63      0.43      0.51        90

    accuracy                           0.87       589
   macro avg       0.77      0.69      0.72       589
weighted avg       0.86      0.87      0.86       589



# FS5: Time + Heart Rate + room + preference history

In [23]:
df_file = folder_path + file_date + "_" +  dataframes_names[4]
print("Loading files from: {}".format(df_file))


Loading files from: data-processed-preferences/2019-11-15_fs5


In [24]:
df_train = pd.DataFrame()
df_val = pd.DataFrame()

for participant in participant_list:
    df_train_participant = pd.read_csv("../" + df_file + "_train_" + participant + ".csv")
    df_val_participant = pd.read_csv("../" + df_file + "_val_" + participant + ".csv")
    
    df_train = df_train.append(df_train_participant, ignore_index = True)
    df_val = df_val.append(df_val_participant, ignore_index = True)

# drop userid
df_train.drop('user_id', axis=1, inplace=True)
df_val.drop('user_id', axis=1, inplace=True)

# drop room
df_train.drop('room', axis=1, inplace=True)
df_val.drop('room', axis=1, inplace=True)

print(df_train.columns.values)


['heartRate_cozie' 'light_cozie' 'noise_cozie' 'hour_sin' 'hour_cos'
 'day_of_week_sin' 'day_of_week_cos' 'user_grouped_cooler'
 'user_grouped_warmer' 'user_grouped_dimmer' 'user_grouped_brighter'
 'user_grouped_quieter' 'user_grouped_louder' 'room_grouped_cooler'
 'room_grouped_warmer' 'room_grouped_dimmer' 'room_grouped_brighter'
 'room_grouped_quieter' 'room_grouped_louder' 'thermal_cozie']


## Thermal Comfort Prediction

In [25]:
df_train_thermal = df_train.copy()
df_val_thermal = df_val.copy()

# drop other preferences
other_preferences = ['user_grouped_dimmer', 'user_grouped_brighter', 'user_grouped_quieter', 'user_grouped_louder',
                     'room_grouped_dimmer', 'room_grouped_brighter', 'room_grouped_quieter', 'room_grouped_louder']

df_train_thermal.drop(other_preferences, axis=1, inplace=True)
df_val_thermal.drop(other_preferences, axis=1, inplace=True)

print(df_train_thermal.columns.values)

clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_5_thermal, macro_rf_5_thermal, _ = model_validate(df_train_thermal, df_val_thermal, clf)


['heartRate_cozie' 'light_cozie' 'noise_cozie' 'hour_sin' 'hour_cos'
 'day_of_week_sin' 'day_of_week_cos' 'user_grouped_cooler'
 'user_grouped_warmer' 'room_grouped_cooler' 'room_grouped_warmer'
 'thermal_cozie']
Accuracy (f1 micro) on validation set: 0.6825127334465195
F1 micro on validation set: 0.6825127334465195
F1 macro on validation set: 0.5509984512698217
Confusion Matrix: 
[[ 21  54   6]
 [ 25 314  26]
 [  5  71  67]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.41      0.26      0.32        81
        10.0       0.72      0.86      0.78       365
        11.0       0.68      0.47      0.55       143

    accuracy                           0.68       589
   macro avg       0.60      0.53      0.55       589
weighted avg       0.66      0.68      0.66       589



## Light Comfort Prediction

In [26]:
df_train_light = df_train.copy()
df_val_light = df_val.copy()

# move light response to the end
df_aux = df_train_light.pop('light_cozie')
df_train_light['light_cozie'] = df_aux

df_aux = df_val_light.pop('light_cozie')
df_val_light['light_cozie'] = df_aux

# drop other preferences
other_preferences = ['user_grouped_cooler', 'user_grouped_warmer', 'user_grouped_quieter', 'user_grouped_louder',
                     'room_grouped_cooler', 'room_grouped_warmer', 'room_grouped_quieter', 'room_grouped_louder']

df_train_light.drop(other_preferences, axis=1, inplace=True)
df_val_light.drop(other_preferences, axis=1, inplace=True)

print(df_train_light.columns.values)

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_5_light, macro_rf_5_light, _ = model_validate(df_train_light, df_val_light, clf)


['heartRate_cozie' 'noise_cozie' 'hour_sin' 'hour_cos' 'day_of_week_sin'
 'day_of_week_cos' 'user_grouped_dimmer' 'user_grouped_brighter'
 'room_grouped_dimmer' 'room_grouped_brighter' 'thermal_cozie'
 'light_cozie']
Accuracy (f1 micro) on validation set: 0.7860780984719864
F1 micro on validation set: 0.7860780984719865
F1 macro on validation set: 0.49557731611682043
Confusion Matrix: 
[[ 24  54   0]
 [ 21 431   5]
 [  7  39   8]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.46      0.31      0.37        78
        10.0       0.82      0.94      0.88       457
        11.0       0.62      0.15      0.24        54

    accuracy                           0.79       589
   macro avg       0.63      0.47      0.50       589
weighted avg       0.76      0.79      0.75       589



## Aural Comfort Prediction

In [27]:
df_train_aural = df_train.copy()
df_val_aural = df_val.copy()

# move light response to the end
df_aux = df_train_aural.pop('noise_cozie')
df_train_aural['noise_cozie'] = df_aux

df_aux = df_val_aural.pop('noise_cozie')
df_val_aural['noise_cozie'] = df_aux

# drop other preferences
other_preferences = ['user_grouped_cooler', 'user_grouped_warmer', 'user_grouped_dimmer', 'user_grouped_brighter',
                     'room_grouped_cooler', 'room_grouped_warmer', 'room_grouped_dimmer', 'room_grouped_brighter']

df_train_aural.drop(other_preferences, axis=1, inplace=True)
df_val_aural.drop(other_preferences, axis=1, inplace=True)

print(df_train_aural.columns.values)

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_5_aural, macro_rf_5_aural, _ = model_validate(df_train_aural, df_val_aural, clf)


['heartRate_cozie' 'light_cozie' 'hour_sin' 'hour_cos' 'day_of_week_sin'
 'day_of_week_cos' 'user_grouped_quieter' 'user_grouped_louder'
 'room_grouped_quieter' 'room_grouped_louder' 'thermal_cozie'
 'noise_cozie']
Accuracy (f1 micro) on validation set: 0.8675721561969439
F1 micro on validation set: 0.8675721561969439
F1 macro on validation set: 0.7054093567251463
Confusion Matrix: 
[[474  25]
 [ 53  37]]
Classification Metrics: 
              precision    recall  f1-score   support

        10.0       0.90      0.95      0.92       499
        11.0       0.60      0.41      0.49        90

    accuracy                           0.87       589
   macro avg       0.75      0.68      0.71       589
weighted avg       0.85      0.87      0.86       589



# FS6: Time + room + preference history

In [28]:
df_file = folder_path + file_date + "_" +  dataframes_names[5]
print("Loading files from: {}".format(df_file))


Loading files from: data-processed-preferences/2019-11-15_fs6


In [29]:
df_train = pd.DataFrame()
df_val = pd.DataFrame()

for participant in participant_list:
    df_train_participant = pd.read_csv("../" + df_file + "_train_" + participant + ".csv")
    df_val_participant = pd.read_csv("../" + df_file + "_val_" + participant + ".csv")
    
    df_train = df_train.append(df_train_participant, ignore_index = True)
    df_val = df_val.append(df_val_participant, ignore_index = True)

# drop userid
df_train.drop('user_id', axis=1, inplace=True)
df_val.drop('user_id', axis=1, inplace=True)

# drop room
df_train.drop('room', axis=1, inplace=True)
df_val.drop('room', axis=1, inplace=True)

print(df_train.columns.values)


['light_cozie' 'noise_cozie' 'hour_sin' 'hour_cos' 'day_of_week_sin'
 'day_of_week_cos' 'user_grouped_cooler' 'user_grouped_warmer'
 'user_grouped_dimmer' 'user_grouped_brighter' 'user_grouped_quieter'
 'user_grouped_louder' 'room_grouped_cooler' 'room_grouped_warmer'
 'room_grouped_dimmer' 'room_grouped_brighter' 'room_grouped_quieter'
 'room_grouped_louder' 'thermal_cozie']


## Thermal Comfort Prediction

In [30]:
df_train_thermal = df_train.copy()
df_val_thermal = df_val.copy()

# drop other preferences
other_preferences = ['user_grouped_dimmer', 'user_grouped_brighter', 'user_grouped_quieter', 'user_grouped_louder',
                     'room_grouped_dimmer', 'room_grouped_brighter', 'room_grouped_quieter', 'room_grouped_louder']

df_train_thermal.drop(other_preferences, axis=1, inplace=True)
df_val_thermal.drop(other_preferences, axis=1, inplace=True)

print(df_train_thermal.columns.values)

clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_6_thermal, macro_rf_6_thermal, _ = model_validate(df_train_thermal, df_val_thermal, clf)


['light_cozie' 'noise_cozie' 'hour_sin' 'hour_cos' 'day_of_week_sin'
 'day_of_week_cos' 'user_grouped_cooler' 'user_grouped_warmer'
 'room_grouped_cooler' 'room_grouped_warmer' 'thermal_cozie']
Accuracy (f1 micro) on validation set: 0.6383701188455009
F1 micro on validation set: 0.6383701188455009
F1 macro on validation set: 0.5146724817888643
Confusion Matrix: 
[[ 19  57   5]
 [ 45 289  31]
 [  5  70  68]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.28      0.23      0.25        81
        10.0       0.69      0.79      0.74       365
        11.0       0.65      0.48      0.55       143

    accuracy                           0.64       589
   macro avg       0.54      0.50      0.51       589
weighted avg       0.63      0.64      0.63       589



## Light Comfort Prediction

In [31]:
df_train_light = df_train.copy()
df_val_light = df_val.copy()

# move light response to the end
df_aux = df_train_light.pop('light_cozie')
df_train_light['light_cozie'] = df_aux

df_aux = df_val_light.pop('light_cozie')
df_val_light['light_cozie'] = df_aux

# drop other preferences
other_preferences = ['user_grouped_cooler', 'user_grouped_warmer', 'user_grouped_quieter', 'user_grouped_louder',
                     'room_grouped_cooler', 'room_grouped_warmer', 'room_grouped_quieter', 'room_grouped_louder']

df_train_light.drop(other_preferences, axis=1, inplace=True)
df_val_light.drop(other_preferences, axis=1, inplace=True)

print(df_train_light.columns.values)

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_6_light, macro_rf_6_light, _ = model_validate(df_train_light, df_val_light, clf)


['noise_cozie' 'hour_sin' 'hour_cos' 'day_of_week_sin' 'day_of_week_cos'
 'user_grouped_dimmer' 'user_grouped_brighter' 'room_grouped_dimmer'
 'room_grouped_brighter' 'thermal_cozie' 'light_cozie']
Accuracy (f1 micro) on validation set: 0.801358234295416
F1 micro on validation set: 0.801358234295416
F1 macro on validation set: 0.5585860349680144
Confusion Matrix: 
[[ 33  44   1]
 [ 24 427   6]
 [  9  33  12]]
Classification Metrics: 
              precision    recall  f1-score   support

         9.0       0.50      0.42      0.46        78
        10.0       0.85      0.93      0.89       457
        11.0       0.63      0.22      0.33        54

    accuracy                           0.80       589
   macro avg       0.66      0.53      0.56       589
weighted avg       0.78      0.80      0.78       589



## Aural Comfort Prediction

In [32]:
df_train_aural = df_train.copy()
df_val_aural = df_val.copy()

# move light response to the end
df_aux = df_train_aural.pop('noise_cozie')
df_train_aural['noise_cozie'] = df_aux

df_aux = df_val_aural.pop('noise_cozie')
df_val_aural['noise_cozie'] = df_aux

# drop other preferences
other_preferences = ['user_grouped_cooler', 'user_grouped_warmer', 'user_grouped_dimmer', 'user_grouped_brighter',
                     'room_grouped_cooler', 'room_grouped_warmer', 'room_grouped_dimmer', 'room_grouped_brighter']

df_train_aural.drop(other_preferences, axis=1, inplace=True)
df_val_aural.drop(other_preferences, axis=1, inplace=True)

print(df_train_aural.columns.values)

# train and validate model
clf = RandomForestClassifier(n_estimators = 1000, random_state = seed)
micro_rf_6_aural, macro_rf_6_aural, _ = model_validate(df_train_aural, df_val_aural, clf)


['light_cozie' 'hour_sin' 'hour_cos' 'day_of_week_sin' 'day_of_week_cos'
 'user_grouped_quieter' 'user_grouped_louder' 'room_grouped_quieter'
 'room_grouped_louder' 'thermal_cozie' 'noise_cozie']
Accuracy (f1 micro) on validation set: 0.8556876061120543
F1 micro on validation set: 0.8556876061120543
F1 macro on validation set: 0.6909968460878528
Confusion Matrix: 
[[467  32]
 [ 53  37]]
Classification Metrics: 
              precision    recall  f1-score   support

        10.0       0.90      0.94      0.92       499
        11.0       0.54      0.41      0.47        90

    accuracy                           0.86       589
   macro avg       0.72      0.67      0.69       589
weighted avg       0.84      0.86      0.85       589



# Results

In [33]:
dict_results_grouped_micro = {
    "fs1_thermal" : micro_rf_1_thermal,
    "fs1_light" : micro_rf_1_light,
    "fs1_aural" : micro_rf_1_aural,

    "fs2_thermal" : micro_rf_2_thermal,
    "fs2_light" : micro_rf_2_light,
    "fs2_aural" : micro_rf_2_aural,
    
    "fs3_thermal" : micro_rf_3_thermal,
    "fs3_light" : micro_rf_3_light,
    "fs3_aural" : micro_rf_3_aural,

    "fs4_thermal" : micro_rf_4_thermal,
    "fs4_light" : micro_rf_4_light,
    "fs4_aural" : micro_rf_4_aural,

    "fs5_thermal" : micro_rf_5_thermal,
    "fs5_light" : micro_rf_5_light,
    "fs5_aural" : micro_rf_5_aural,
    
    "fs6_thermal" : micro_rf_6_thermal,
    "fs6_light" : micro_rf_6_light,
    "fs6_aural" : micro_rf_6_aural,
}

dict_results_grouped_macro = {
    "fs1_thermal" : macro_rf_1_thermal,
    "fs1_light" : macro_rf_1_light,
    "fs1_aural" : macro_rf_1_aural,

    "fs2_thermal" : macro_rf_2_thermal,
    "fs2_light" : macro_rf_2_light,
    "fs2_aural" : macro_rf_2_aural,
    
    "fs3_thermal" : macro_rf_3_thermal,
    "fs3_light" : macro_rf_3_light,
    "fs3_aural" : macro_rf_3_aural,

    "fs4_thermal" : macro_rf_4_thermal,
    "fs4_light" : macro_rf_4_light,
    "fs4_aural" : macro_rf_4_aural,

    "fs5_thermal" : macro_rf_5_thermal,
    "fs5_light" : macro_rf_5_light,
    "fs5_aural" : macro_rf_5_aural,
    
    "fs6_thermal" : macro_rf_6_thermal,
    "fs6_light" : macro_rf_6_light,
    "fs6_aural" : macro_rf_6_aural,
}

In [34]:
# save dictionaries with results
pickle.dump(dict_results_grouped_micro, open("../" + folder_path + file_date + "_grouped_micro" + ".pickle", "wb" ))
pickle.dump(dict_results_grouped_macro, open("../" + folder_path + file_date + "_grouped_macro" + ".pickle", "wb" ))
