In [1]:
#Imports
import pandas as pd
import os
import tensorflow as tf

import sys  
sys.path.append("../../../")  
from utils.models import *
from utils.datahandling import *
from utils.modelrunner import *

import wandb
import logging
logging.getLogger("wandb").setLevel(logging.ERROR)
logging.getLogger('tensorflow').setLevel(logging.ERROR)

os.environ['WANDB_SILENT'] = 'true'
os.environ['WANDB_CONSOLE'] = 'off'
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '2'




In [2]:
#Get data 
num_users = 30

cwd = os.path.normpath(os.path.dirname(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd())))))
df = pd.read_csv(cwd+'/data/3final_data/Final_Grossload_dataset.csv', index_col='Date')
df.index = pd.to_datetime(df.index)
df.fillna(0, inplace=True)

# Get the first date from the index
start_date = df.index.min()
# Calculate the end date as one year from the start date
end_date = start_date + pd.DateOffset(years=1)
# Filter the dataframe to only include the first year of data
df = df[(df.index >= start_date) & (df.index < end_date)]

df_array = []
for idx in range(num_users):
    df_array.append(df[[f'User{idx+1}', 'temp', 'rhum', 'wspd', 'PC1', 'hour sin', 'hour cos', f'User{idx+1}_lag_24hrs']])

df_array[0]

Unnamed: 0_level_0,User1,temp,rhum,wspd,PC1,hour sin,hour cos,User1_lag_24hrs
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-07-02 00:00:00,0.111,2.5,92.0,0.0,-2.641741,0.000000,1.000000,0.125
2010-07-02 01:00:00,0.346,2.5,92.0,0.0,-2.641741,0.258819,0.965926,0.471
2010-07-02 02:00:00,0.079,2.5,92.0,0.0,-2.641741,0.500000,0.866025,0.121
2010-07-02 03:00:00,0.107,1.0,95.0,3.6,-1.884345,0.707107,0.707107,0.079
2010-07-02 04:00:00,0.120,1.0,95.0,3.6,-1.884345,0.866025,0.500000,0.098
...,...,...,...,...,...,...,...,...
2011-07-01 19:00:00,0.118,12.6,74.0,0.0,-1.499168,-0.965926,0.258819,0.241
2011-07-01 20:00:00,0.276,12.6,74.0,0.0,-1.499168,-0.866025,0.500000,0.256
2011-07-01 21:00:00,0.168,11.2,84.0,0.0,-1.863761,-0.707107,0.707107,0.246
2011-07-01 22:00:00,0.164,11.2,84.0,0.0,-1.863761,-0.500000,0.866025,0.228


In [3]:
#Hyperparameters
sequence_length = 25
batch_size = 16
num_features = df_array[0].shape[1]
horizon = 1
max_epochs = 100

loss = tf.keras.losses.MeanSquaredError()
metrics=[
    tf.keras.metrics.RootMeanSquaredError(), 
    tf.keras.metrics.MeanAbsolutePercentageError(),
    tf.keras.metrics.MeanAbsoluteError(),
]

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=10,mode='min')
timing_callback = TimingCallback()
custom_callback = CustomCallback()

callbacks=[early_stopping, timing_callback, custom_callback]

#Soft dense MoE
dense_smoe_units = 16
dense_smoe_expert_units = 8
dense_smoe_num_experts = 4
dense_smoe_all_results = pd.DataFrame(columns=["user", "architecture", "train_time", "avg_time_epoch", "mse", "rmse", "mape", "mae"])
dense_smoe_results = pd.DataFrame(columns=['architecture', 'train_time', 'avg_time_epoch', 'mse','mse_std', 'rmse','rmse_std','mape','mape_std','mae','mae_std'])

#Soft lstm MoE
lstm_smoe_units = 20
lstm_smoe_expert_units = 8
lstm_smoe_num_experts = 4
lstm_smoe_all_results = pd.DataFrame(columns=["user", "architecture", "train_time", "avg_time_epoch", "mse", "rmse", "mape", "mae"])
lstm_smoe_results = pd.DataFrame(columns=['architecture', 'train_time', 'avg_time_epoch', 'mse','mse_std', 'rmse','rmse_std','mape','mape_std','mae','mae_std'])

#topK dense MoE
dense_topmoe_units = 16
dense_topmoe_num_experts = 5
dense_topmoe_top_k = 2
dense_topmoe_expert_units = 8
dense_topmoe_all_results = pd.DataFrame(columns=["user", "architecture", "train_time", "avg_time_epoch", "mse", "rmse", "mape", "mae"])
dense_topmoe_results = pd.DataFrame(columns=['architecture', 'train_time', 'avg_time_epoch', 'mse','mse_std', 'rmse','rmse_std','mape','mape_std','mae','mae_std'])

#topK lstm MoE
lstm_topmoe_units = 20
lstm_topmoe_num_experts = 5
lstm_topmoe_top_k = 2
lstm_topmoe_expert_units = 8
lstm_topmoe_all_results = pd.DataFrame(columns=["user", "architecture", "train_time", "avg_time_epoch", "mse", "rmse", "mape", "mae"])
lstm_topmoe_results = pd.DataFrame(columns=['architecture', 'train_time', 'avg_time_epoch', 'mse','mse_std', 'rmse','rmse_std','mape','mape_std','mae','mae_std'])

#Train, Validation and Test datasets
X_train, y_train, X_val, y_val, X_test, y_test = {}, {}, {}, {}, {}, {}

#Create Train, Validation and Test datasets
for idx, df in enumerate(df_array):
    n = len(df)
    train_df = df[0:int(n*0.7)]
    val_df = df[int(n*0.7):int(n*0.9)]
    test_df = df[int(n*0.9):]

    # Min max sclaing
    train_df = min_max_scaling(train_df)
    val_df = min_max_scaling(val_df)
    test_df = min_max_scaling(test_df)

    # Sequencing
    train_sequences = create_sequences(train_df, sequence_length)
    val_sequences = create_sequences(val_df, sequence_length)
    test_sequences = create_sequences(test_df, sequence_length)

    #Split into feature and label
    X_train[f'user{idx+1}'], y_train[f'user{idx+1}'] = prepare_data(train_sequences, batch_size)
    X_val[f'user{idx+1}'], y_val[f'user{idx+1}'] = prepare_data(val_sequences, batch_size)
    X_test[f'user{idx+1}'], y_test[f'user{idx+1}'] = prepare_data(test_sequences, batch_size)

In [4]:
run_soft_dense_moe_model (
    wb_project_name = "TS_LL_Grossload_Forecasting_Moe",
    wb_model_name = "dense_soft_no_loss",
    wb_project = "TS_LL_Grossload",
    save_path = os.getcwd(),
    df_array = df_array,
    max_epochs = max_epochs,
    batch_size = batch_size,
    X_train = X_train,
    horizon = horizon, 
    dense_smoe_units = dense_smoe_units, 
    dense_smoe_num_experts = dense_smoe_num_experts, 
    dense_smoe_expert_units = dense_smoe_expert_units, 
    metrics = metrics,
    loss = loss,
    y_train = y_train,
    X_val = X_val,
    y_val = y_val,
    X_test = X_test,
    y_test = y_test,
    callbacks = callbacks,
    results = dense_smoe_results,
    all_results = dense_smoe_all_results,
    use_new_loss=False
)

-----User:  1
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  2
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  3
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  4
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  5
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  6
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  7
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  8
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----Use

In [5]:
run_topk_dense_moe_model(
    wb_project_name = "TS_LL_Grossload_Forecasting_Moe",
    wb_model_name = "dense_topk_no_loss",
    wb_project = "TS_LL_Grossload",
    save_path = os.getcwd(),
    df_array = df_array,
    max_epochs = max_epochs,
    batch_size = batch_size,
    X_train = X_train,
    horizon = horizon, 
    dense_topmoe_units = dense_topmoe_units, 
    dense_topmoe_num_experts = dense_topmoe_num_experts,
    dense_topmoe_top_k = dense_topmoe_top_k,
    dense_topmoe_expert_units = dense_topmoe_expert_units,
    metrics = metrics,
    loss = loss,
    y_train = y_train,
    X_val = X_val,
    y_val = y_val,
    X_test = X_test,
    y_test = y_test,
    callbacks = callbacks,
    results = dense_topmoe_results,
    all_results = dense_topmoe_all_results,
    use_new_loss=False
)

-----User:  1
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  2
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  3
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  4
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  5
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  6
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  7
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  8
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----Use

In [6]:
run_soft_lstm_moe_model(
    wb_project_name = "TS_LL_Grossload_Forecasting_Moe",
    wb_model_name = "lstm_soft_no_loss",
    wb_project = "TS_LL_Grossload",
    save_path = os.getcwd(),
    df_array = df_array,
    max_epochs = max_epochs,
    batch_size = batch_size,
    X_train = X_train,
    horizon = horizon, 
    lstm_smoe_units = lstm_smoe_units, 
    lstm_smoe_num_experts = lstm_smoe_num_experts, 
    lstm_smoe_expert_units = lstm_smoe_expert_units, 
    metrics = metrics,
    loss = loss,
    y_train = y_train,
    X_val = X_val,
    y_val = y_val,
    X_test = X_test,
    y_test = y_test,
    callbacks = callbacks,
    results = lstm_smoe_results,
    all_results = lstm_smoe_all_results,
    use_new_loss=False
)

-----User:  1
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  2
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  3
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  4
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  5
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  6
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  7
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  8
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----Use

In [7]:
run_topk_lstm_moe_model(
        wb_project_name = "TS_LL_Grossload_Forecasting_Moe",
        wb_model_name = "lstm_topk_no_loss",
        wb_project = "TS_LL_Grossload",
        save_path = os.getcwd(),
        df_array = df_array,
        max_epochs = max_epochs,
        batch_size = batch_size,
        X_train = X_train,
        horizon = horizon,  
        lstm_topmoe_units = lstm_topmoe_units, 
        lstm_topmoe_num_experts = lstm_topmoe_num_experts,
        lstm_topmoe_top_k = lstm_topmoe_top_k,
        lstm_topmoe_expert_units = lstm_topmoe_expert_units,
        metrics = metrics,
        loss = loss,
        y_train = y_train,
        X_val = X_val,
        y_val = y_val,
        X_test = X_test,
        y_test = y_test,
        callbacks = callbacks,
        results = lstm_topmoe_results,
        all_results = lstm_topmoe_all_results,
        use_new_loss=False
)

-----User:  1
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  2
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  3
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  4
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  5
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  6
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  7
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----User:  8
Round:  0
saved model
Round:  1
saved model
Round:  2
saved model
Round:  3
saved model
Round:  4
saved model
-----Use

In [18]:

dense_soft = pd.read_csv("wandb/dense_soft_no_loss_all_results.csv")
dense_topk = pd.read_csv("wandb/dense_topk_no_loss_all_results.csv")
lstm_soft = pd.read_csv("wandb/lstm_soft_no_loss_all_results.csv")
lstm_topk = pd.read_csv("wandb/lstm_topk_no_loss_all_results.csv")

In [22]:
print("dense_soft: ", dense_soft["rmse"].mean().round(5), dense_soft["rmse"].std().round(5))
print("dense_topk: ", dense_topk["rmse"].mean().round(5), dense_topk["rmse"].std().round(5))
print("lstm_soft: ", lstm_soft["rmse"].mean().round(5), lstm_soft["rmse"].std().round(5))
print("lstm_topk: ", lstm_topk["rmse"].mean().round(5), lstm_topk["rmse"].std().round(5))

dense_soft:  0.12403 0.02657
dense_topk:  0.12529 0.02539
lstm_soft:  0.12097 0.02563
lstm_topk:  0.12628 0.02847


In [16]:
dense_soft

Unnamed: 0.1,Unnamed: 0,architecture,train_time,avg_time_epoch,mse,mse_std,rmse,rmse_std,mape,mape_std,mae,mae_std
0,0,dense_soft_no_loss,18.181963,0.773382,0.022937,0.000642,0.151438,0.002103,335469.41875,47417.190689,0.09894,0.003242
1,1,dense_soft_no_loss,17.357387,0.784721,0.014585,0.000452,0.120757,0.00187,101037.378906,34413.862716,0.075208,0.004779
2,2,dense_soft_no_loss,17.930094,0.768176,0.013229,0.000281,0.115013,0.001226,89365.679688,26542.46202,0.070905,0.004907
3,3,dense_soft_no_loss,12.784554,0.791747,0.006111,0.002002,0.077381,0.012423,53328.359375,12467.131957,0.053469,0.014264
4,4,dense_soft_no_loss,12.915472,0.792674,0.005076,0.000471,0.071182,0.003331,248873.765625,151389.341905,0.048808,0.008094
5,5,dense_soft_no_loss,17.231239,0.780717,0.019969,0.001268,0.141256,0.004448,83747.592188,37781.783301,0.099718,0.00943
6,6,dense_soft_no_loss,19.123566,0.774294,0.010086,0.000219,0.100425,0.001091,35895.675586,16883.678133,0.064279,0.006069
7,7,dense_soft_no_loss,17.358756,0.779022,0.018688,0.001696,0.136593,0.006172,25644.726758,10968.768164,0.091298,0.003608
8,8,dense_soft_no_loss,20.599294,0.781696,0.013578,0.000401,0.116515,0.001723,96738.923437,58884.177519,0.05801,0.006776
9,9,dense_soft_no_loss,19.328935,0.777558,0.014455,0.001203,0.120149,0.004919,36432.070312,17598.865314,0.08404,0.004213
