In [1]:
#Imports
import pandas as pd
import os
import tensorflow as tf
from sklearn.preprocessing import MinMaxScaler
import numpy as np
import time

"""import sys  
sys.path.append("../../")  
from utils.modelgenerator import *
from utils.modelhandler import *
from utils.datahandler import *"""




'import sys  \nsys.path.append("../../")  \nfrom utils.modelgenerator import *\nfrom utils.modelhandler import *\nfrom utils.datahandler import *'

# Helper functions

In [2]:
#min_max_scaling
#Sclaes all columns of the dataframe df to the rang (0,1)
def min_max_scaling(df): #normailizing
    #Min Max Sclaing
    col_names = df.columns
    features = df[col_names]
    scaler = MinMaxScaler().fit(features.values)
    features = scaler.transform(features.values)
    df_scaled = pd.DataFrame(features, columns = col_names, index=df.index)
    return df_scaled

#create_sequences
#Split the dataframe into datasets with sequences of lngth=Sequence_length
def create_sequences(df, sequence_length):
    sequences = []
    for i in range(len(df) - sequence_length + 1):
        sequence = df.iloc[i:i+sequence_length, :]  # Take all columns
        sequences.append(sequence.values)
    return np.array(sequences)

#prepare_data
# Split each sequence into X (features) and Y (labels). 
# The label Y must be the FIRST column! The last batch is discarded, when < batch_size
def prepare_data(sequences, batch_size):
    X = sequences[:, :-1, :].astype('float32') #For all sequences, Exclude last row of the sequence, take all columns
    y = sequences[:, -1, 0].astype('float32') #For all sequences, Take the last row of the sequence, take the first column

    #As some models need to reshape the inputs, the correct batch_size is important
    #Adjust the dataset_size to be divisible by batch_size by discarding the remaining data points not fitting a complete batch.
    num_batches = len(X) // batch_size
    adjusted_X = X[:num_batches * batch_size]
    adjusted_y = y[:num_batches * batch_size]

    return adjusted_X, adjusted_y

class TimingCallback(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        self.start_time = time.time()
        self.epoch_times = []

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        epoch_end_time = time.time()
        epoch_time = epoch_end_time - self.epoch_start_time
        self.epoch_times.append(epoch_time)

    def get_training_times_df(self):
        total_training_time = time.time() - self.start_time
        average_epoch_times = [sum(self.epoch_times[:i+1]) / (i + 1) for i in range(len(self.epoch_times))]
        data = {
            'Epoch': list(range(1, len(self.epoch_times) + 1)),
            'Epoch Train_time': self.epoch_times,
            'Epoch Avg Train_time': average_epoch_times,
            'Total Training Time': total_training_time
        }
        return pd.DataFrame(data)
    

class CustomCallback(tf.keras.callbacks.Callback):
    def on_train_begin(self, logs=None):
        self.start_time = time.time()
        self.epoch_times = []
        self.losses = {
            'epoch': [],
            'train_loss': [],
            'val_loss': [],
            'test_loss': []
        }

    def on_epoch_begin(self, epoch, logs=None):
        self.epoch_start_time = time.time()

    def on_epoch_end(self, epoch, logs=None):
        epoch_end_time = time.time()
        epoch_time = epoch_end_time - self.epoch_start_time
        self.epoch_times.append(epoch_time)

        self.losses['epoch'].append(epoch)
        self.losses['train_loss'].append(logs['loss'])
        self.losses['val_loss'].append(logs['val_loss'])

    def on_test_end(self, logs=None):
        self.losses['test_loss'].append(logs['loss'])

    def get_loss_df(self):
        total_training_time = time.time() - self.start_time
        average_epoch_times = [sum(self.epoch_times[:i+1]) / (i + 1) for i in range(len(self.epoch_times))]
        self.losses['avg_epoch_time'] = average_epoch_times
        self.losses['total_training_time'] = total_training_time
        return pd.DataFrame(self.losses)
    
#Helper functions for models
from keras import layers, models

def build_dense_model(X_train, horizon, num_layers, units, batch_size):

    input_data = layers.Input(shape=(X_train.shape[1], X_train.shape[2]), batch_size=batch_size) 
    x =  layers.Dense(units, activation='relu')(input_data)
    for _ in range(num_layers-1):
        x = layers.Dense(units, activation='relu')(x)
    x = layers.Dropout(0.2)(x)
    x = layers.Flatten()(x)
    output = layers.Dense(horizon)(x) 

    dense_model = tf.keras.Model(inputs=input_data, outputs=output, name="Dense_model")

    return dense_model

#This method compiles the model using Adam optimizer, fits the model, and evaluates it
def compile_fit_evaluate_model(model, loss, metrics, X_train, y_train, max_epochs, batch_size, X_val, y_val, X_test, y_test, callbacks, user= "", hyper="", optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)):
    #Compile the model
    model.compile(loss=loss, optimizer=optimizer, metrics=metrics)

    # Train the model
    history = model.fit(X_train, y_train, epochs=max_epochs, batch_size=batch_size, validation_data=(X_val, y_val), callbacks=callbacks, verbose=0,)
    #model = tf.keras.models.load_model('models/best_model.h5')
    #Evaluate the model on test dataset
    test_loss = model.evaluate(X_test, y_test, batch_size=batch_size, verbose=0)

    train_times = callbacks[1].get_training_times_df()
    total_train_time = train_times["Total Training Time"][0]
    avg_time_epoch = train_times["Epoch Avg Train_time"].iloc[-1]

    model_user_result = pd.DataFrame(
        data=[[user, hyper, total_train_time, avg_time_epoch, test_loss[0], test_loss[1], test_loss[2], test_loss[3]]], 
        columns=["user", "architecture", "train_time", "avg_time_epoch", "mse", "rmse", "mape", "mae"]
    )

    return history, model_user_result

# Data processing

In [3]:
#Get data 
cwd = os.path.normpath(os.path.dirname(os.path.dirname(os.path.dirname(os.getcwd()))))
df = pd.read_csv(cwd+'/data/3final_data/Final_Grossload_dataset.csv', index_col='Date') #df = pd.read_csv('user5.csv')
df.index = pd.to_datetime(df.index)
#df = df[['User5', 'temp', 'rhum']]
df.fillna(0, inplace=True)

df_array = []
for idx in range(1):
    df_array.append(df[[f'User{idx+1}', 'temp', 'rhum', 'wspd', 'PC1', 'hour sin', 'hour cos', f'User{idx+1}_lag_24hrs']])

df_array[0].head(3)


Unnamed: 0_level_0,User1,temp,rhum,wspd,PC1,hour sin,hour cos,User1_lag_24hrs
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
2010-07-02 00:00:00,0.111,2.5,92.0,0.0,-2.641741,0.0,1.0,0.125
2010-07-02 01:00:00,0.346,2.5,92.0,0.0,-2.641741,0.258819,0.965926,0.471
2010-07-02 02:00:00,0.079,2.5,92.0,0.0,-2.641741,0.5,0.866025,0.121


In [4]:
#Train, Validation and Test datasets
sequence_length = 25
batch_size = 16
num_features = df_array[0].shape[1]

X_train, y_train, X_val, y_val, X_test, y_test = {}, {}, {}, {}, {}, {}

#Create Train, Validation and Test datasets
for idx, df in enumerate(df_array):
    n = len(df)
    train_df = df[0:int(n*0.7)]
    val_df = df[int(n*0.7):int(n*0.9)]
    test_df = df[int(n*0.9):]

    # Min max sclaing
    train_df = min_max_scaling(train_df)
    val_df = min_max_scaling(val_df)
    test_df = min_max_scaling(test_df)

    # Sequencing
    train_sequences = create_sequences(train_df, sequence_length)
    val_sequences = create_sequences(val_df, sequence_length)
    test_sequences = create_sequences(test_df, sequence_length)

    #Split into feature and label
    X_train[f'user{idx+1}'], y_train[f'user{idx+1}'] = prepare_data(train_sequences, batch_size)
    X_val[f'user{idx+1}'], y_val[f'user{idx+1}'] = prepare_data(val_sequences, batch_size)
    X_test[f'user{idx+1}'], y_test[f'user{idx+1}'] = prepare_data(test_sequences, batch_size)

In [5]:
#General Hyperparameters
# #All models
horizon = 1
max_epochs = 100

loss = tf.keras.losses.MeanSquaredError()
metrics=[
    tf.keras.metrics.RootMeanSquaredError(), 
    tf.keras.metrics.MeanAbsolutePercentageError(),
    tf.keras.metrics.MeanAbsoluteError(),
]

early_stopping = tf.keras.callbacks.EarlyStopping(monitor='val_loss',patience=10,mode='min')
timing_callback = TimingCallback()
custom_callback = CustomCallback()
#model_checkpoint = ModelCheckpoint('models/best_model.h5', save_best_only=True, monitor='val_loss', mode='min')
callbacks=[early_stopping, timing_callback, custom_callback] #model_checkpoint




In [6]:
dense_results = pd.DataFrame(columns=['architecture', 'train_time', 'avg_time_epoch', 'mse','mse_std', 'rmse','rmse_std','mape','mape_std','mae','mae_std'])

In [7]:
#Dense 1 -------------------------------------------------------------

#Dense Hyperparameter
dense_architecture = "L3_U16"
dense_layers = 3
dense_units = 16
dense_all_results = pd.DataFrame(columns=["user", "architecture", "train_time", "avg_time_epoch", "mse", "rmse", "mape", "mae"])

#For each of the users
for idx in range(len(df_array)):
    print("User: ", idx+1)
    for round in range(5):
        #print("Round: ", round)
        dense_model = build_dense_model(X_train[f'user{idx+1}'], horizon, num_layers=dense_layers, units=dense_units, batch_size=batch_size)
        dense_histroy, dense_user_results = compile_fit_evaluate_model(
            model=dense_model, 
            loss=loss, 
            metrics=metrics, 
            X_train=X_train[f'user{idx+1}'],
            y_train = y_train[f'user{idx+1}'], 
            max_epochs = max_epochs, 
            batch_size=batch_size, 
            X_val=X_val[f'user{idx+1}'], 
            y_val=y_val[f'user{idx+1}'], 
            X_test=X_test[f'user{idx+1}'], 
            y_test=y_test[f'user{idx+1}'], 
            callbacks=callbacks, 
            user=f'user{idx+1}', 
            hyper=dense_architecture,
            optimizer=tf.keras.optimizers.Adam(learning_rate=0.001)
        )
        # Add the 'architecture' column from dense_user_results to dense_results
        dense_all_results = pd.merge(dense_all_results, dense_user_results, how='outer')   

    #dense_model.save(cwd + f"/models/Local_learning/Dense/{dense_architecture}/User{idx}")
    print("Saved Soft_Dense_MoE")  


for idx in range(len(df_array)):
    new_row = {
        'architecture': dense_architecture,
        'train_time': dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["train_time"].mean(), 
        'avg_time_epoch' : dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["avg_time_epoch"].mean(),
        'mse': dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["mse"].mean(),
        'mse_std' : dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["mse"].std(),
        'rmse': dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["rmse"].mean(),
        'rmse_std' : dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["rmse"].std(),
        'mape': dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["mape"].mean(),
        'mape_std' : dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["mape"].std(),
        'mae': dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["mae"].mean(),
        'mae_std' : dense_all_results[dense_all_results["user"]==f"user{idx+1}"]["mae"].std(),
    }
    dense_results.loc[len(dense_results)] = new_row

User:  1

Saved Soft_Dense_MoE


In [8]:
#dense_results.to_csv(f'../../evaluations/Test_Datastream_Dense.csv')
dense_results

Unnamed: 0,architecture,train_time,avg_time_epoch,mse,mse_std,rmse,rmse_std,mape,mape_std,mae,mae_std
0,L3_U16,23.917387,1.121345,0.020921,0.000388,0.144634,0.001337,35257.523047,3342.083789,0.092031,0.000817
