# Fine Tuning the baseline models
This notebooks performs several calculations to find the best model architectures within our different baseline models.
As baseline models, we introduce Dense, LSTM, BiLSTM, CNN, ResNet, Transformer, Ensemble, LightGBM-Ensemble.

All evaluations are performed for 3 training round with 100 epochs each, calculating the MSE. 
As a final result the MSE for 3 examplary is calculated (mean) and the standard deviation is given for the 3 training rounds

In [2]:
#Imports
import pandas as pd
import os
import tensorflow as tf
from keras import layers, models

from utils.modelgenerator import *
from utils.modelhandler import *
from utils.datahandler import *

In [3]:
#Get data 
cwd = os.path.normpath(os.getcwd())
df = pd.read_csv(cwd+'/data/df_with_final_features.csv', index_col='Date') #df = pd.read_csv('user5.csv')
df.index = pd.to_datetime(df.index)
#df = df[['User5', 'temp', 'rhum']]
df.fillna(0, inplace=True)

#Select only 3 User for testing
df_user10 = df[['User10', 'temp', 'rhum', 'wspd', 'PC1', 'hour sin', 'hour cos', 'User10_lag_24hrs']]
df_user11 = df[['User11', 'temp', 'rhum', 'wspd', 'PC1', 'hour sin', 'hour cos', 'User11_lag_24hrs']]
df_user12 = df[['User12', 'temp', 'rhum', 'wspd', 'PC1', 'hour sin', 'hour cos', 'User12_lag_24hrs']]
df_array = [df_user10, df_user11, df_user12]
df_array[1].head(3)

Unnamed: 0_level_0,User11,temp,rhum,wspd,PC1,hour sin,hour cos,User11_lag_24hrs
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2012-07-08 00:00:00,0.312,9.8,85.0,0.0,-2.453691,0.0,1.0,0.667
2012-07-08 01:00:00,0.263,9.8,85.0,0.0,-2.453691,0.258819,0.965926,0.316
2012-07-08 02:00:00,0.257,9.8,85.0,0.0,-2.453691,0.5,0.866025,0.356


In [4]:
#Train, Validation and Test datasets
sequence_length = 25
batch_size = 16
num_features = df_array[0].shape[1]

dh = Datahandler()

X_train, y_train, X_val, y_val, X_test, y_test = {}, {}, {}, {}, {}, {}

#Create Train, Validation and Test datasets
for idx, df in enumerate(df_array):
    n = len(df)
    train_df = df[0:int(n*0.7)]
    val_df = df[int(n*0.7):int(n*0.9)]
    test_df = df[int(n*0.9):]

    # Min max sclaing
    train_df = dh.min_max_scaling(train_df)
    val_df = dh.min_max_scaling(val_df)
    test_df = dh.min_max_scaling(test_df)

    # Sequencing
    train_sequences = dh.create_sequences(train_df, sequence_length)
    val_sequences = dh.create_sequences(val_df, sequence_length)
    test_sequences = dh.create_sequences(test_df, sequence_length)

    #Split into feature and label
    X_train[f'user1{idx}'], y_train[f'user1{idx}'] = dh.prepare_data(train_sequences, batch_size)
    X_val[f'user1{idx}'], y_val[f'user1{idx}'] = dh.prepare_data(val_sequences, batch_size)
    X_test[f'user1{idx}'], y_test[f'user1{idx}'] = dh.prepare_data(test_sequences, batch_size)

In [5]:
#General Hyperparameters
# #All models
horizon = 1
max_epochs = 100
m1 = ModelGenerator()
mh = Modelhandler()

loss = tf.keras.losses.MeanSquaredError()
metrics=[
    tf.keras.metrics.RootMeanSquaredError(), 
    tf.keras.metrics.MeanAbsolutePercentageError(),
    tf.keras.metrics.MeanAbsoluteError(),
]

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=10,mode='min')
timing_callback = TimingCallback()
custom_callback = CustomCallback()
callbacks=[early_stopping, timing_callback, custom_callback]

In [11]:
results = pd.read_csv('evaluations/transformer_benchmark_results.csv')
results.drop(labels=['Unnamed: 0', 'sum'], axis=1,  inplace=True)
results["mean"] = results[['Loss@User10', 'Loss@User11', 'Loss@User12']].mean(axis=1)
#dense_results_test.to_csv('evaluations/dense_benchmark_results.csv')
results

Unnamed: 0,architecture,Loss@User10,std@User10,Loss@User11,std@User11,Loss@User12,std@User12,mean
0,ED1_h2_d16,0.029387,0.005596,0.025911,0.004667,0.021001,0.00254,0.025433
1,ED1_h4_d16,0.026294,0.000956,0.026504,0.004303,0.020197,0.001528,0.024332
2,ED1_h8_d16,0.0297,0.001989,0.026099,0.002576,0.01865,0.001502,0.024816
3,ED1_h4_d8,0.029206,0.006346,0.02599,0.00089,0.020228,0.00042,0.025141
4,ED1_h4_d32,0.028328,0.001884,0.024695,0.002986,0.019923,0.002258,0.024315
5,ED2_h4_d32,0.02542,0.003399,0.025083,0.004055,0.020464,0.000871,0.023656


### Dense benchmark model - Fine tuning

Best architecture: **L3_U16**

In [5]:
dense_results = pd.DataFrame(columns=['architecture', 'Loss@User10','std@User10', 'Loss@User11','std@User11', 'Loss@User12','std@User12'])

In [78]:
#dense_architectures to test: 

#dense_architectures - tested: L1_U4, L2_U4, L3_U4, L4_U4, L5_U4, L1_U8, L2_U8, L3_U8, L4_U8, L5_U8, L1_U16, L2_U16, L3_U16, L4_U16, L5_U16, L1_U32
#L2_U32, L3_U32, L1_U64, L2_U64, L1_U128, L2_U128, L1_U256, L2_U256, L1_U512, L2_U512

#Dense Hyperparameter
dense_architecture = "L2_U512"
dense_layers = 2
dense_units = 512

#dense_results = pd.DataFrame(columns=['architecture', 'Loss@User10', 'Loss@User11', 'Loss@User12'])

dense_all_results = pd.DataFrame(columns=["user", "architecture", "mse"])
#For each of the 3 user
for idx in range(3):
    print("User: ", idx)
    for round in range(3):
        #print("Round: ", round)
        dense_model = m1.build_dense_model(X_train[f'user1{idx}'], horizon, num_layers=dense_layers, units=dense_units, batch_size=batch_size)
        dense_histroy, dense_user_results = mh.compile_fit_evaluate_model(
            model=dense_model, 
            loss=loss, 
            metrics=metrics, 
            X_train=X_train[f'user1{idx}'],
            y_train = y_train[f'user1{idx}'], 
            max_epochs = max_epochs, 
            batch_size=batch_size, 
            X_val=X_val[f'user1{idx}'], 
            y_val=y_val[f'user1{idx}'], 
            X_test=X_test[f'user1{idx}'], 
            y_test=y_test[f'user1{idx}'], 
            callbacks=callbacks, 
            user=f'user1{idx}', 
            hyper=dense_architecture
        )
        # Add the 'architecture' column from dense_user_results to dense_results
        dense_all_results = pd.merge(dense_all_results, dense_user_results, how='outer')   

new_row = {
    'architecture': dense_all_results["architecture"][0],
    'Loss@User10': dense_all_results[dense_all_results["user"]=="user10"]["mse"].mean(),
    'std@User10' : dense_all_results[dense_all_results["user"]=="user10"]["mse"].std(),
    'Loss@User11': dense_all_results[dense_all_results["user"]=="user11"]["mse"].mean(),
    'std@User11' : dense_all_results[dense_all_results["user"]=="user11"]["mse"].std(),
    'Loss@User12': dense_all_results[dense_all_results["user"]=="user12"]["mse"].mean(),
    'std@User12' : dense_all_results[dense_all_results["user"]=="user12"]["mse"].std(),
}
dense_results.loc[len(dense_results)] = new_row
print(dense_results)

User:  0
User:  1
User:  2
   architecture  Loss@User10  std@User10  Loss@User11  std@User11  \
0         L1_U4     0.022642    0.006582     0.018430    0.001025   
1         L2_U4     0.028477    0.003563     0.019371    0.000457   
2         L3_U4     0.026329    0.007921     0.017782    0.001519   
3         L4_U4     0.028015    0.004525     0.019194    0.001119   
4         L5_U4     0.030057    0.000382     0.025844    0.006254   
5         L1_U8     0.016253    0.002085     0.018391    0.000983   
6         L2_U8     0.019794    0.002767     0.018931    0.002294   
7         L3_U8     0.030561    0.000315     0.020084    0.002126   
8         L4_U8     0.020795    0.008234     0.018708    0.001682   
9         L5_U8     0.030263    0.000460     0.022531    0.004850   
10       L1_U16     0.016676    0.003243     0.016976    0.001311   
11       L2_U16     0.015801    0.001627     0.015413    0.000714   
12       L3_U16     0.015302    0.001213     0.016745    0.001031   
13     

In [88]:
#dense_results_test = dense_results
#dense_results_test["sum"] = dense_results_test[['Loss@User10', 'Loss@User11', 'Loss@User12']].sum(axis=1)
#dense_results_test.to_csv('evaluations/dense_benchmark_results.csv')


### LSTM benchmark model (+ Bidirectional LSTM) - Fine Tuning

In [5]:
#lstm_results = pd.DataFrame(columns=['architecture', 'Loss@User10','std@User10', 'Loss@User11','std@User11', 'Loss@User12','std@User12'])
lstm_results = pd.read_csv('evaluations/lstm_benchmark_results.csv')
lstm_results

Unnamed: 0.1,Unnamed: 0,architecture,Loss@User10,std@User10,Loss@User11,std@User11,Loss@User12,std@User12,sum
0,0,L1_U2,0.022779,0.001011,0.02025,0.001907,0.013213,0.000112,0.056242
1,1,L2_U2,0.023101,0.00309,0.021098,0.000543,0.012715,0.000976,0.056913
2,2,L3_U2,0.024302,0.000476,0.021639,0.001903,0.015428,0.000726,0.061368
3,3,L1_U4,0.021916,0.001188,0.018435,0.001525,0.01247,0.000875,0.052821
4,4,L2_U4,0.022831,0.000402,0.020067,0.001568,0.012214,0.001645,0.055112
5,5,L1_U8,0.022136,0.000835,0.018238,0.001372,0.013381,0.001106,0.053755
6,6,L2_U8,0.020889,0.002551,0.017214,0.001334,0.012031,0.000871,0.050135
7,7,L1_U16,0.021762,0.000518,0.0169,0.00047,0.013258,0.002364,0.05192
8,8,L2_U20,0.02126,0.001295,0.016263,0.000463,0.01149,0.000283,0.049013
9,9,L2_U100,0.019492,0.002465,0.01632,0.00049,0.01265,0.0009,0.048462


In [8]:
#lstm_architectures to test: , , , , , , ,  
#L1_U16, L2_U16, L1_U32, L2_U32, 

#lstm_architectures - tested: L1_U2, L2_U2, L3_U2, L1_U4, L2_U4, L1_U8, L2_U8, L1_U16, L2_U20, Bi_L2_U8, Bi_L2_U20

#LSTM Hyperparameter
lstm_architecture = "Bi_L2_U20"
lstm_layers = 2
lstm_units = 20

#lstm_results = pd.DataFrame(columns=['architecture', 'Loss@User10', 'Loss@User11', 'Loss@User12'])

lstm_all_results = pd.DataFrame(columns=["user", "architecture", "mse"])
#For each of the 3 user
for idx in range(3):
    print("User: ", idx)
    for round in range(3):
        print("Round: ", round)
        #lstm_model = m1.build_lstm_model(X_train[f'user1{idx}'], horizon, lstm_layers, lstm_units, batch_size)
        lstm_model = m1.build_bilstm_model(X_train[f'user1{idx}'], horizon, lstm_layers, lstm_units, batch_size)
        lstm_histroy, lstm_user_results = mh.compile_fit_evaluate_model(
            model=lstm_model, 
            loss=loss, 
            metrics=metrics, 
            X_train=X_train[f'user1{idx}'],
            y_train = y_train[f'user1{idx}'], 
            max_epochs = max_epochs, 
            batch_size=batch_size, 
            X_val=X_val[f'user1{idx}'], 
            y_val=y_val[f'user1{idx}'], 
            X_test=X_test[f'user1{idx}'], 
            y_test=y_test[f'user1{idx}'], 
            callbacks=callbacks, 
            user=f'user1{idx}', 
            hyper=lstm_architecture
        )
        # Add the 'architecture' column from lstm_user_results to lstm_results
        lstm_all_results = pd.merge(lstm_all_results, lstm_user_results, how='outer')   

new_row = {
    'architecture': lstm_all_results["architecture"][0],
    'Loss@User10': lstm_all_results[lstm_all_results["user"]=="user10"]["mse"].mean(),
    'std@User10' : lstm_all_results[lstm_all_results["user"]=="user10"]["mse"].std(),
    'Loss@User11': lstm_all_results[lstm_all_results["user"]=="user11"]["mse"].mean(),
    'std@User11' : lstm_all_results[lstm_all_results["user"]=="user11"]["mse"].std(),
    'Loss@User12': lstm_all_results[lstm_all_results["user"]=="user12"]["mse"].mean(),
    'std@User12' : lstm_all_results[lstm_all_results["user"]=="user12"]["mse"].std(),
}
lstm_results.loc[len(lstm_results)] = new_row
print(lstm_results)

User:  0
Round:  0
Round:  1
Round:  2
User:  1
Round:  0
Round:  1
Round:  2
User:  2
Round:  0
Round:  1
Round:  2
    Unnamed: 0 architecture  Loss@User10  std@User10  Loss@User11  std@User11  \
0          0.0        L1_U2     0.022779    0.001011     0.020250    0.001907   
1          1.0        L2_U2     0.023101    0.003090     0.021098    0.000543   
2          2.0        L3_U2     0.024302    0.000476     0.021639    0.001903   
3          3.0        L1_U4     0.021916    0.001188     0.018435    0.001525   
4          4.0        L2_U4     0.022831    0.000402     0.020067    0.001568   
5          5.0        L1_U8     0.022136    0.000835     0.018238    0.001372   
6          6.0        L2_U8     0.020889    0.002551     0.017214    0.001334   
7          7.0       L1_U16     0.021762    0.000518     0.016900    0.000470   
8          8.0       L2_U20     0.021260    0.001295     0.016263    0.000463   
9          9.0      L2_U100     0.019492    0.002465     0.016320    0.00

In [11]:
lstm_results["sum"] = lstm_results[['Loss@User10', 'Loss@User11', 'Loss@User12']].sum(axis=1)
#lstm_results.drop(labels=['Unnamed: 0', 'sum'], axis=1,  inplace=True)
lstm_results

Unnamed: 0,architecture,Loss@User10,std@User10,Loss@User11,std@User11,Loss@User12,std@User12,sum
0,L1_U2,0.022779,0.001011,0.02025,0.001907,0.013213,0.000112,0.056242
1,L2_U2,0.023101,0.00309,0.021098,0.000543,0.012715,0.000976,0.056913
2,L3_U2,0.024302,0.000476,0.021639,0.001903,0.015428,0.000726,0.061368
3,L1_U4,0.021916,0.001188,0.018435,0.001525,0.01247,0.000875,0.052821
4,L2_U4,0.022831,0.000402,0.020067,0.001568,0.012214,0.001645,0.055112
5,L1_U8,0.022136,0.000835,0.018238,0.001372,0.013381,0.001106,0.053755
6,L2_U8,0.020889,0.002551,0.017214,0.001334,0.012031,0.000871,0.050135
7,L1_U16,0.021762,0.000518,0.0169,0.00047,0.013258,0.002364,0.05192
8,L2_U20,0.02126,0.001295,0.016263,0.000463,0.01149,0.000283,0.049013
9,L2_U100,0.019492,0.002465,0.01632,0.00049,0.01265,0.0009,0.048462


In [12]:
lstm_results.to_csv('evaluations/lstm_benchmark_results.csv')

### CNN benchmark model (+ ResNet) - Fine Tuning

In [5]:
#cnn_results = pd.DataFrame(columns=['architecture', 'Loss@User10','std@User10', 'Loss@User11','std@User11', 'Loss@User12','std@User12'])
cnn_results = pd.read_csv('evaluations/cnn_benchmark_results.csv')
#cnn_results

In [18]:
#cnn_architectures to test: 

#cnn_architectures - tested: 

#CNN Hyperparameter
cnn_architecture = "resnet10_f8k1_d16"
#num_layers = 6
filter_size = 8
kernel_size = 1
dense_units = 16
#ResNet
resnet_blocks = 10

cnn_all_results = pd.DataFrame(columns=["user", "architecture", "mse"])
#For each of the 3 user
for idx in range(3):
    print("User: ", idx)
    for round in range(3):
        print("Round: ", round)
        #cnn_model = m1.build_cnn_model(X_train[f'user1{idx}'], horizon, num_layers, filter_size, kernel_size, dense_units, batch_size)
        cnn_model = m1.build_resnet_model(X_train[f'user1{idx}'], horizon, resnet_blocks, filter_size, kernel_size, dense_units, batch_size, m1)
        cnn_histroy, cnn_user_results = mh.compile_fit_evaluate_model(
            model=cnn_model, 
            loss=loss, 
            metrics=metrics, 
            X_train=X_train[f'user1{idx}'],
            y_train = y_train[f'user1{idx}'], 
            max_epochs = max_epochs, 
            batch_size=batch_size, 
            X_val=X_val[f'user1{idx}'], 
            y_val=y_val[f'user1{idx}'], 
            X_test=X_test[f'user1{idx}'], 
            y_test=y_test[f'user1{idx}'], 
            callbacks=callbacks, 
            user=f'user1{idx}', 
            hyper=cnn_architecture
        )
        cnn_all_results = pd.merge(cnn_all_results, cnn_user_results, how='outer')   

new_row = {
    'architecture': cnn_all_results["architecture"][0],
    'Loss@User10': cnn_all_results[cnn_all_results["user"]=="user10"]["mse"].mean(),
    'std@User10' : cnn_all_results[cnn_all_results["user"]=="user10"]["mse"].std(),
    'Loss@User11': cnn_all_results[cnn_all_results["user"]=="user11"]["mse"].mean(),
    'std@User11' : cnn_all_results[cnn_all_results["user"]=="user11"]["mse"].std(),
    'Loss@User12': cnn_all_results[cnn_all_results["user"]=="user12"]["mse"].mean(),
    'std@User12' : cnn_all_results[cnn_all_results["user"]=="user12"]["mse"].std(),
}
cnn_results.loc[len(cnn_results)] = new_row
print(cnn_results)

User:  0
Round:  0
Round:  1
Round:  2
User:  1
Round:  0
Round:  1
Round:  2
User:  2
Round:  0
Round:  1
Round:  2
         architecture  Loss@User10  std@User10  Loss@User11  std@User11  \
0         L1_f1k1_d16     0.030180    0.001122     0.029047    0.000455   
1         L1_f3k1_d16     0.030563    0.001254     0.028760    0.000512   
2         L1_f3k3_d16     0.030319    0.000927     0.029366    0.000970   
3         L1_f5k1_d16     0.030211    0.000195     0.029168    0.000507   
4         L1_f8k1_d16     0.030082    0.000074     0.028578    0.000356   
5        L1_f16k1_d16     0.030653    0.000885     0.028552    0.000322   
6        L1_f32k1_d16     0.029825    0.000626     0.029376    0.001066   
7        L1_f64k1_d16     0.030381    0.000408     0.029009    0.001052   
8        L1_f32k3_d16     0.030704    0.001989     0.030018    0.001562   
9        L1_f64k3_d16     0.029936    0.001023     0.028893    0.000611   
10         L1_f8k1_d8     0.030150    0.000087     0.02943

In [19]:
cnn_results["sum"] = cnn_results[['Loss@User10', 'Loss@User11', 'Loss@User12']].sum(axis=1)
#cnn_results.drop(labels=['Unnamed: 0'], axis=1,  inplace=True)
cnn_results

Unnamed: 0,architecture,Loss@User10,std@User10,Loss@User11,std@User11,Loss@User12,std@User12,sum
0,L1_f1k1_d16,0.03018,0.001122,0.029047,0.000455,0.024364,0.00017,0.083592
1,L1_f3k1_d16,0.030563,0.001254,0.02876,0.000512,0.023961,2.5e-05,0.083285
2,L1_f3k3_d16,0.030319,0.000927,0.029366,0.00097,0.024243,0.000224,0.083928
3,L1_f5k1_d16,0.030211,0.000195,0.029168,0.000507,0.024134,0.000106,0.083512
4,L1_f8k1_d16,0.030082,7.4e-05,0.028578,0.000356,0.024556,0.000267,0.083216
5,L1_f16k1_d16,0.030653,0.000885,0.028552,0.000322,0.024399,0.000348,0.083604
6,L1_f32k1_d16,0.029825,0.000626,0.029376,0.001066,0.024114,0.000202,0.083315
7,L1_f64k1_d16,0.030381,0.000408,0.029009,0.001052,0.024354,0.000489,0.083744
8,L1_f32k3_d16,0.030704,0.001989,0.030018,0.001562,0.024515,0.000322,0.085238
9,L1_f64k3_d16,0.029936,0.001023,0.028893,0.000611,0.024446,0.000351,0.083275


In [20]:
cnn_results.to_csv('evaluations/cnn_benchmark_results.csv')

### Transformer benchmark model - Fine Tuning

In [5]:
transformer_results = pd.DataFrame(columns=['architecture', 'Loss@User10','std@User10', 'Loss@User11','std@User11', 'Loss@User12','std@User12'])
#transformer_results = pd.read_csv('evaluations/transformer_benchmark_results.csv')
#transformer_results

In [18]:
#transformer_architectures to test: 

#transformer_architectures - tested: 

#transformer Hyperparameter
transformer_architecture = "ED2_h4_d32"
num_layers = 2
num_heads = 4
dense_units = 32

transformer_all_results = pd.DataFrame(columns=["user", "architecture", "mse"])
#For each of the 3 user
for idx in range(3):
    print("User: ", idx)
    for round in range(3):
        print("Round: ", round)
        transformer_model = m1.build_transformer_model(X_train[f'user1{idx}'], horizon, batch_size, sequence_length, num_layers, num_features, num_heads, dense_units, m1)
        transformer_histroy, transformer_user_results = mh.compile_fit_evaluate_model(
            model=transformer_model, 
            loss=loss, 
            metrics=metrics, 
            X_train=X_train[f'user1{idx}'],
            y_train = y_train[f'user1{idx}'], 
            max_epochs = max_epochs, 
            batch_size=batch_size, 
            X_val=X_val[f'user1{idx}'], 
            y_val=y_val[f'user1{idx}'], 
            X_test=X_test[f'user1{idx}'], 
            y_test=y_test[f'user1{idx}'], 
            callbacks=callbacks, 
            user=f'user1{idx}', 
            hyper=transformer_architecture
        )
        transformer_all_results = pd.merge(transformer_all_results, transformer_user_results, how='outer')   

new_row = {
    'architecture': transformer_all_results["architecture"][0],
    'Loss@User10': transformer_all_results[transformer_all_results["user"]=="user10"]["mse"].mean(),
    'std@User10' : transformer_all_results[transformer_all_results["user"]=="user10"]["mse"].std(),
    'Loss@User11': transformer_all_results[transformer_all_results["user"]=="user11"]["mse"].mean(),
    'std@User11' : transformer_all_results[transformer_all_results["user"]=="user11"]["mse"].std(),
    'Loss@User12': transformer_all_results[transformer_all_results["user"]=="user12"]["mse"].mean(),
    'std@User12' : transformer_all_results[transformer_all_results["user"]=="user12"]["mse"].std(),
}
transformer_results.loc[len(transformer_results)] = new_row
print(transformer_results)

User:  0
Round:  0
Round:  1
Round:  2
User:  1
Round:  0
Round:  1
Round:  2
User:  2
Round:  0
Round:  1
Round:  2
  architecture  Loss@User10  std@User10  Loss@User11  std@User11  Loss@User12  \
0   ED1_h2_d16     0.029387    0.005596     0.025911    0.004667     0.021001   
1   ED1_h4_d16     0.026294    0.000956     0.026504    0.004303     0.020197   
2   ED1_h8_d16     0.029700    0.001989     0.026099    0.002576     0.018650   
3    ED1_h4_d8     0.029206    0.006346     0.025990    0.000890     0.020228   
4   ED1_h4_d32     0.028328    0.001884     0.024695    0.002986     0.019923   
5   ED2_h4_d32     0.025420    0.003399     0.025083    0.004055     0.020464   

   std@User12       sum  
0    0.002540  0.076299  
1    0.001528  0.072995  
2    0.001502  0.074449  
3    0.000420  0.075424  
4    0.002258  0.072945  
5    0.000871       NaN  


In [19]:
transformer_results["sum"] = transformer_results[['Loss@User10', 'Loss@User11', 'Loss@User12']].sum(axis=1)
#transformer_results.drop(labels=['Unnamed: 0'], axis=1,  inplace=True)
transformer_results

Unnamed: 0,architecture,Loss@User10,std@User10,Loss@User11,std@User11,Loss@User12,std@User12,sum
0,ED1_h2_d16,0.029387,0.005596,0.025911,0.004667,0.021001,0.00254,0.076299
1,ED1_h4_d16,0.026294,0.000956,0.026504,0.004303,0.020197,0.001528,0.072995
2,ED1_h8_d16,0.0297,0.001989,0.026099,0.002576,0.01865,0.001502,0.074449
3,ED1_h4_d8,0.029206,0.006346,0.02599,0.00089,0.020228,0.00042,0.075424
4,ED1_h4_d32,0.028328,0.001884,0.024695,0.002986,0.019923,0.002258,0.072945
5,ED2_h4_d32,0.02542,0.003399,0.025083,0.004055,0.020464,0.000871,0.070967


In [20]:
transformer_results.to_csv('evaluations/transformer_benchmark_results.csv')

### Ensemble benchmark model - Fine Tuning

###### LSTM-SVR-RF Ensemble

In [13]:
ensemble_results = pd.DataFrame(columns=['architecture', 'Loss@User10','std@User10', 'Loss@User11','std@User11', 'Loss@User12','std@User12'])
ensemble_results = pd.read_csv('evaluations/ensemble_benchmark_results.csv')
#ensemble_results

In [6]:

ensemble_all_results = pd.DataFrame(columns=["user", "architecture", "mse"])
#For each of the 3 user
for idx in range(3):
    print("User: ", idx)
    for round in range(3):
        print("Round: ", round)
        ensemble_user_results = m1.build_compile_evaluate_ensemble_model(
            X_train = X_train[f'user1{idx}'],
            y_train = y_train[f'user1{idx}'], 
            X_val = X_val[f'user1{idx}'], 
            y_val = y_val[f'user1{idx}'], 
            X_test = X_test[f'user1{idx}'], 
            y_test = y_test[f'user1{idx}'], 
            horizon = horizon,
            batch_size = batch_size,
            sequence_length = sequence_length,
            num_features = num_features,
            callbacks = callbacks,
            user=f'user1{idx}',
        )
        ensemble_all_results = pd.merge(ensemble_all_results, ensemble_user_results, how='outer')   

new_row = {
    'architecture': ensemble_all_results["architecture"][0],
    'Loss@User10': ensemble_all_results[ensemble_all_results["user"]=="user10"]["mse"].mean(),
    'std@User10' : ensemble_all_results[ensemble_all_results["user"]=="user10"]["mse"].std(),
    'Loss@User11': ensemble_all_results[ensemble_all_results["user"]=="user11"]["mse"].mean(),
    'std@User11' : ensemble_all_results[ensemble_all_results["user"]=="user11"]["mse"].std(),
    'Loss@User12': ensemble_all_results[ensemble_all_results["user"]=="user12"]["mse"].mean(),
    'std@User12' : ensemble_all_results[ensemble_all_results["user"]=="user12"]["mse"].std(),
}
ensemble_results.loc[len(ensemble_results)] = new_row
print(ensemble_results)

User:  0
Round:  0
Round:  1
Round:  2
User:  1
Round:  0
Round:  1
Round:  2
User:  2
Round:  0
Round:  1
Round:  2
  architecture  Loss@User10  std@User10  Loss@User11  std@User11  Loss@User12  \
0  LSTM_SVR_RF       0.0139    0.000378     0.014611    0.000085     0.011104   

   std@User12  
0    0.000365  


In [14]:
ensemble_results["mean"] = ensemble_results[['Loss@User10', 'Loss@User11', 'Loss@User12']].mean(axis=1)
#ensemble_results.drop(labels=['Unnamed: 0'], axis=1,  inplace=True)
ensemble_results

Unnamed: 0.1,Unnamed: 0,architecture,Loss@User10,std@User10,Loss@User11,std@User11,Loss@User12,std@User12,mean
0,0,LSTM_SVR_RF,0.0139,0.000378,0.014611,8.5e-05,0.011104,0.000365,0.013205


In [8]:
ensemble_results.to_csv('evaluations/ensemble_benchmark_results.csv')

###### LightGBM Ensemble

In [15]:
#lightgbm_results = pd.DataFrame(columns=['architecture', 'Loss@User10','std@User10', 'Loss@User11','std@User11', 'Loss@User12','std@User12'])
lightgbm_results = pd.read_csv('evaluations/lightgbm_benchmark_results.csv')
#lightgbm_results

In [18]:
lightgbm_all_results = pd.DataFrame(columns=["user", "architecture", "mse"])

#For each of the 3 user
for idx in range(3):
    print("User: ", idx)
    n = len(df_array[idx])
    train_df = df_array[idx][0:int(n*0.7)]
    val_df = df_array[idx][int(n*0.7):int(n*0.9)]
    test_df = df_array[idx][int(n*0.9):]

    # Min max sclaing
    train_df = dh.min_max_scaling(train_df)
    val_df = dh.min_max_scaling(val_df)
    test_df = dh.min_max_scaling(test_df)

    target_column = f'User1{idx}'
    features = ['temp', 'rhum', 'wspd', 'PC1', 'hour sin', 'hour cos', f'User1{idx}_lag_24hrs']

    # Split the data into features and target.
    X_train = train_df[features].values
    y_train = train_df[target_column].values
    X_val = val_df[features].values
    y_val = val_df[target_column].values
    X_test = test_df[features].values
    y_test = test_df[target_column].values

    train_data = Dataset(X_train, label=y_train)
    valid_data = Dataset(X_val, label=y_val, reference=train_data)


    for round in range(3):
        print("Round: ", round)
        lightgbm_user_results= m1.build_compile_evaluate_lightgbm_model(train_data, valid_data, X_test, y_test, user=f'User1{idx}')
        results_user = {
            'user': lightgbm_user_results["user"][0],
            'architecture': lightgbm_user_results["architecture"][0],
            'mse' : lightgbm_user_results["mse"][0],
        }
        lightgbm_all_results.loc[len(lightgbm_all_results)] = results_user

        #lightgbm_all_results = pd.merge(lightgbm_all_results, lightgbm_user_results, how='outer')  

new_row = {
    'architecture': lightgbm_all_results["architecture"][0],
    'Loss@User10': lightgbm_all_results[lightgbm_all_results["user"]=="User10"]["mse"].mean(),
    'std@User10' : lightgbm_all_results[lightgbm_all_results["user"]=="User10"]["mse"].std(),
    'Loss@User11': lightgbm_all_results[lightgbm_all_results["user"]=="User11"]["mse"].mean(),
    'std@User11' : lightgbm_all_results[lightgbm_all_results["user"]=="User11"]["mse"].std(),
    'Loss@User12': lightgbm_all_results[lightgbm_all_results["user"]=="User12"]["mse"].mean(),
    'std@User12' : lightgbm_all_results[lightgbm_all_results["user"]=="User12"]["mse"].std(),
}
lightgbm_results.loc[len(lightgbm_results)] = new_row

'lightgbm_all_results = pd.DataFrame(columns=["user", "architecture", "mse"])\n\n#For each of the 3 user\nfor idx in range(3):\n    print("User: ", idx)\n    n = len(df_array[idx])\n    train_df = df_array[idx][0:int(n*0.7)]\n    val_df = df_array[idx][int(n*0.7):int(n*0.9)]\n    test_df = df_array[idx][int(n*0.9):]\n\n    # Min max sclaing\n    train_df = dh.min_max_scaling(train_df)\n    val_df = dh.min_max_scaling(val_df)\n    test_df = dh.min_max_scaling(test_df)\n\n    target_column = f\'User1{idx}\'\n    features = [\'temp\', \'rhum\', \'wspd\', \'PC1\', \'hour sin\', \'hour cos\', f\'User1{idx}_lag_24hrs\']\n\n    # Split the data into features and target.\n    X_train = train_df[features].values\n    y_train = train_df[target_column].values\n    X_val = val_df[features].values\n    y_val = val_df[target_column].values\n    X_test = test_df[features].values\n    y_test = test_df[target_column].values\n\n    train_data = Dataset(X_train, label=y_train)\n    valid_data = Dataset(X

In [16]:
#lightgbm_results["sum"] = lightgbm_results[['Loss@User10', 'Loss@User11', 'Loss@User12']].sum(axis=1)
#lightgbm_results.drop(labels=['Unnamed: 0'], axis=1,  inplace=True)
lightgbm_results

Unnamed: 0,architecture,Loss@User10,std@User10,Loss@User11,std@User11,Loss@User12,std@User12
0,LightGBM,0.020893,0.0,0.020253,0.0,0.020101,0.0


In [16]:
lightgbm_results["mean"] = lightgbm_results[['Loss@User10', 'Loss@User11', 'Loss@User12']].mean(axis=1)
#ensemble_results.drop(labels=['Unnamed: 0'], axis=1,  inplace=True)
lightgbm_results

Unnamed: 0.1,Unnamed: 0,architecture,Loss@User10,std@User10,Loss@User11,std@User11,Loss@User12,std@User12,mean
0,0,LightGBM,0.020893,0.0,0.020253,0.0,0.020101,0.0,0.020416


In [17]:
#lightgbm_results.to_csv('evaluations/lightgbm_benchmark_results.csv')