In [1]:
import tensorflow as tf
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import LSTM, Dense, Dropout
from tensorflow.keras.optimizers import Adam
from tensorflow import convert_to_tensor

VERBOSE = 1  # Set verbose to 1 to see training progress

In [2]:
def get_model():
    model = Sequential()
    model.add(LSTM(50, return_sequences=False, input_shape=(None, 1))) # This adds an LSTM layer with 50 neurons (units)
    model.add(Dropout(0.2))
    model.add(Dense(1))

    model.compile(optimizer=Adam(learning_rate=0.01), loss='mean_squared_error', metrics=['mean_absolute_error'])
    return model

In [None]:
hourly_data = pd.read_csv("Preprocessed_data2013.csv", dtype={'LCLid': np.int16, 'KWH/hh (per hour) ': np.float64, 'dayoftheyear': np.int16,
       'hour': np.int8, 'is_weekend': np.int8})

hourly_data.info()

In [3]:
# Define number of meters to be used in the project
NUM_OF_METERS = hourly_data['LCLid'].max()
# write hourly_data['LCLid'].max() to include all meters available in the dataset

In [5]:
# Function to split data into train, validation, and test sets
def train_test_validate(data):

    for i in range(0, NUM_OF_METERS):

        
        tmp_data = data[data['LCLid'] == i] # Get the data for current meter

        
        val_split = int(len(tmp_data) * 0.8)
        test_split = int(len(tmp_data) * 0.9)

        # Set initial splits for current meter
        train_ = tmp_data[:val_split]
        vali_ = tmp_data[val_split:test_split]
        test_ = tmp_data[test_split:]

        # Concatanate the test data 
        if (i > 0):
            train = pd.concat([train, train_], ignore_index=True)
            valid = pd.concat([valid, vali_], ignore_index=True)
            test = pd.concat([test, test_], ignore_index=True)
        else:
            train = train_
            valid = vali_
            test = test_

    return {"train": train, "test": test, "validation": valid}

dataset_splits = train_test_validate(hourly_data)

# Convert data to tensors
x_train_tensor = tf.convert_to_tensor(np.asarray(dataset_splits["train"].drop(columns=['KWH/hh (per hour) '])))
y_train_tensor = tf.convert_to_tensor(np.asarray(dataset_splits["train"]['KWH/hh (per hour) ']))
x_val_tensor = tf.convert_to_tensor(np.asarray(dataset_splits["validation"].drop(columns=['KWH/hh (per hour) '])))
y_val_tensor = tf.convert_to_tensor(np.asarray(dataset_splits["validation"]['KWH/hh (per hour) ']))
x_test_tensor = tf.convert_to_tensor(np.asarray(dataset_splits["test"].drop(columns=['KWH/hh (per hour) '])))
y_test_tensor = tf.convert_to_tensor(np.asarray(dataset_splits["test"]['KWH/hh (per hour) ']))

In [6]:
# Initialize and train the model
model = get_model()
history = model.fit(x_train_tensor, y_train_tensor, validation_data=(x_val_tensor, y_val_tensor), epochs=10, batch_size=512, verbose=VERBOSE)


Epoch 1/10


  super().__init__(**kwargs)


[1m2112/2112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 4ms/step - loss: 0.8091 - mean_absolute_error: 0.5076 - val_loss: 0.6529 - val_mean_absolute_error: 0.4652
Epoch 2/10
[1m2112/2112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - loss: 0.7530 - mean_absolute_error: 0.4884 - val_loss: 0.6464 - val_mean_absolute_error: 0.4659
Epoch 3/10
[1m2112/2112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - loss: 0.7273 - mean_absolute_error: 0.4841 - val_loss: 0.6578 - val_mean_absolute_error: 0.4581
Epoch 4/10
[1m2112/2112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - loss: 0.7365 - mean_absolute_error: 0.4846 - val_loss: 0.6444 - val_mean_absolute_error: 0.4718
Epoch 5/10
[1m2112/2112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4ms/step - loss: 0.7348 - mean_absolute_error: 0.4831 - val_loss: 0.6319 - val_mean_absolute_error: 0.4565
Epoch 6/10
[1m2112/2112[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 4

In [7]:
# Evaluate the model
results = model.evaluate(x_test_tensor, y_test_tensor, verbose=VERBOSE)
print(f"Test MSE: {results[0]}, Test MAE: {results[1]}")

[1m4225/4225[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 658us/step - loss: 1.3315 - mean_absolute_error: 0.5663
Test MSE: 1.4369703531265259, Test MAE: 0.6195842027664185


In [8]:
import plotly.express as px
import plotly.graph_objects as go
from plotly.subplots import make_subplots

rounds = np.arange(0,len(history.history['loss']))
loss = history.history['loss']
acc = history.history['mean_absolute_error']

fig = make_subplots(rows=2, cols=1, shared_xaxes=True, 
                    subplot_titles=("MEAN ABSOLUTE ERROR", "MEAN SQUARED ERROR"))

# Add scatter plot for accuracy
fig.add_trace(go.Scatter(x=rounds, y=acc, mode='markers', name='MAE'), row=1, col=1)
# Add line plot for accuracy
fig.add_trace(go.Scatter(x=rounds, y=acc, mode='lines', name='MAE Line'), row=1, col=1)

# Add scatter plot for loss
fig.add_trace(go.Scatter(x=rounds, y=loss, mode='markers', name='MSE'), row=2, col=1)
# Add line plot for loss
fig.add_trace(go.Scatter(x=rounds, y=loss, mode='lines', name='MSE Line'), row=2, col=1)

# Update layout
fig.update_layout(
    height=800,  # Height of the figure
    title_text="Centralized Model",
)

# Update x-axis for all subplots
fig.update_xaxes(title_text="Round", row=2, col=1)
# Update y-axis for each subplot
fig.update_yaxes(title_text="MEAN ABSOLUTE ERROR", row=1, col=1)
fig.update_yaxes(title_text="MEAN SQUARED ERROR", row=2, col=1)

# Show the plot
fig.show()