In [None]:
import os
from pathlib import Path
import pandas as pd
import numpy as np
import tensorflow as tf

In [None]:
import plotly.express as px
import plotly.graph_objects as go

In [None]:
dataset = pd.read_csv("/content/Friday_Interpolated.csv")
dataset.head(2)

Unnamed: 0,CONNECT TIME,hours test,kWhDelivered,userInputs/0/kWhRequested,kwh test
0,4/25/23 0:00,12:00:00 AM,2.2,12.0,3.4
1,4/25/23 0:00,12:00:47 AM,15.575,34.64,3.347691


In [None]:
filtered_df = dataset.filter(items=['hours test', 'kwh test'], axis=1)

In [None]:
filtered_df.head(2)

Unnamed: 0,hours test,kwh test
0,12:00:00 AM,3.4
1,12:00:47 AM,3.347691


In [None]:
filtered_df.isnull().sum()

hours test    0
kwh test      0
dtype: int64

In [None]:
filtered_df.dtypes

hours test     object
kwh test      float64
dtype: object

In [None]:
filtered_df.count()

hours test    2854
kwh test      2854
dtype: int64

In [None]:
dataset['hours test'] = dataset['hours test'].apply(lambda x: pd.to_datetime(x).strftime('%H:%M'))
dataset.head(2)

Unnamed: 0,CONNECT TIME,hours test,kWhDelivered,userInputs/0/kWhRequested,kwh test
0,4/25/23 0:00,00:00,2.2,12.0,3.4
1,4/25/23 0:00,00:00,15.575,34.64,3.347691


Grouping - Optional

In [None]:
sf_df = dataset.groupby('hours test', as_index=False)['kwh test'].sum()
sf_df.head(2)

Unnamed: 0,hours test,kwh test
0,00:00,6.747691
1,00:01,3.297608


In [None]:
sf_df = dataset[['hours test', 'kwh test']]

In [None]:
sf_df.shape

(2854, 2)

In [None]:
hours = dataset['hours test'].to_numpy()
kwh = dataset['kwh test'].to_numpy()

In [None]:
from sklearn.model_selection import train_test_split
#X_train, X_test, y_train, y_test = train_test_split(Timesteps, Aqi, test_size = 0.08)

#split_size = int(0.8*len(Aqi))

#X_train,y_train = Timesteps[:split_size],Aqi[:split_size]
#X_test,y_test = Timesteps[split_size:],Aqi[split_size:]

split_size = int(0.8*len(kwh))

x_train,y_train = hours[:split_size],kwh[:split_size]
x_test,y_test = hours[split_size:],kwh[split_size:]

len(x_train), len(x_test), len(y_train), len(y_test)

(2283, 571, 2283, 571)

In [None]:
viz_train = go.Scatter(
    x = x_train,
    y = y_train,
    mode = 'lines',
    name = 'Actual Train'
)

viz_test = go.Scatter(
    x = x_test,
    y = y_test,
    mode = 'lines',
    name = 'Actual Test'
)

layout = go.Layout(
    title = 'San Francisco Air Quality Index (2010-2021)',
    xaxis = {'title' : "Year"},
    yaxis = {'title' : "AQI Value"}
)
fig = go.Figure(data=[viz_train, viz_test], layout=layout)

fig.show()

In [None]:
HORIZON = 1
WINDOW_SIZE = 7

In [None]:
def get_labelled_windows(x, horizon=1):
  return x[:, :-horizon], x[:, -horizon:]

In [None]:
def make_windows(x, window_size=7, horizon=1):
  window_step = np.expand_dims(np.arange(window_size+horizon), axis=0)
  window_indexes = window_step + np.expand_dims(np.arange(len(x)-(window_size+horizon-1)), axis=0).T
  windowed_array = x[window_indexes]

  windows, labels = get_labelled_windows(windowed_array, horizon=horizon)

  return windows, labels

In [None]:
full_windows, full_labels = make_windows(kwh, window_size=WINDOW_SIZE, horizon=HORIZON)

len(full_windows), len(full_labels)

(2847, 2847)

In [None]:
for i in range(3):
  print(f"Window: {full_windows[i]} -> Label: {full_labels[i]}")

Window: [3.4        3.34769127 3.29760845 3.18965214 3.18742624 2.94368982
 2.88359043] -> Label: [2.87052092]
Window: [3.34769127 3.29760845 3.18965214 3.18742624 2.94368982 2.88359043
 2.87052092] -> Label: [2.86275498]
Window: [3.29760845 3.18965214 3.18742624 2.94368982 2.88359043 2.87052092
 2.86275498] -> Label: [2.86048202]


In [None]:
def make_train_test_splits(windows, labels, test_split=0.2):

  split_size = int(len(windows) * (1-test_split))
  train_windows = windows[:split_size]
  train_labels = labels[:split_size]
  test_windows = windows[split_size:]
  test_labels = labels[split_size:]

  return train_windows, test_windows, train_labels, test_labels

In [None]:
train_windows, test_windows, train_labels, test_labels = make_train_test_splits(full_windows, full_labels)

len(train_windows), len(test_windows), len(train_labels), len(test_labels)

(2277, 570, 2277, 570)

In [None]:
train_windows[:5]

array([[3.4       , 3.34769127, 3.29760845, 3.18965214, 3.18742624,
        2.94368982, 2.88359043],
       [3.34769127, 3.29760845, 3.18965214, 3.18742624, 2.94368982,
        2.88359043, 2.87052092],
       [3.29760845, 3.18965214, 3.18742624, 2.94368982, 2.88359043,
        2.87052092, 2.86275498],
       [3.18965214, 3.18742624, 2.94368982, 2.88359043, 2.87052092,
        2.86275498, 2.86048202],
       [3.18742624, 2.94368982, 2.88359043, 2.87052092, 2.86275498,
        2.86048202, 2.85820906]])

**EVALUATION METRICS**

Utilizing MAE and RMSE Scoring metrics

In [None]:
from sklearn.metrics import mean_absolute_percentage_error
from sklearn.metrics import mean_squared_error
from sklearn.metrics import mean_absolute_error

In [None]:
def evaluate_metrics(y_true, y_pred):

  mae = mean_absolute_error(y_true, y_pred)
  rmse = mean_squared_error(y_true, y_pred, squared=False)

  return { "mae": mae, "rmse": rmse }

**Support Vector Regression**

In [None]:
from sklearn.svm import SVR

svr_regr = SVR(kernel='rbf')

In [None]:
svr_regr.fit(train_windows, train_labels)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



In [None]:
y_svr_pred = svr_regr.predict(test_windows)

In [None]:
y_svr_pred[0:2]

array([4.88773117, 4.40542432])

In [None]:
evaluate_metrics(test_labels, y_svr_pred)

{'mae': 1.4577838833438965, 'rmse': 3.2962062546781508}

In [None]:
mean_absolute_percentage_error(test_labels, y_svr_pred)

0.12031121244860046

**Decision Tree**

In [None]:
#from sklearn.tree import DecisionTreeRegressor
#from sklearn.ensemble import AdaBoostRegressor

#dt_regr = DecisionTreeRegressor(max_depth=2)

In [None]:
#dt_regr.fit(train_windows, train_labels)

In [None]:
#y_dt_pred = dt_regr.predict(test_windows)

In [None]:
#y_dt_pred[0:2]

In [None]:
#evaluate_metrics(test_labels, y_dt_pred)

In [None]:
#mean_absolute_percentage_error(test_labels, y_dt_pred)


**Decision tree with Adaboost**
```
```



In [None]:
#from sklearn.ensemble import AdaBoostRegressor
#from sklearn.tree import DecisionTreeRegressor

In [None]:
#dt1_regr = AdaBoostRegressor(
#    DecisionTreeRegressor(max_depth=4), n_estimators=300
#)

In [None]:
#dt1_regr.fit(train_windows, train_labels)

In [None]:
#y_dt1_pred = dt_regr.predict(test_windows)

In [None]:
#y_dt1_pred[0:2]

In [None]:
#evaluate_metrics(test_labels, y_dt1_pred)

In [None]:
#mean_absolute_percentage_error(test_labels, y_dt1_pred)

**LSTM**

In [None]:
from keras.models import Sequential
from keras.layers import Dense, Input
from keras.layers import LSTM
from tensorflow.keras import layers
from keras.callbacks import ModelCheckpoint, EarlyStopping

In [None]:
tf.random.set_seed(42)

inputs = Input(shape=(WINDOW_SIZE))

layer_inp = layers.Lambda(lambda x: tf.expand_dims(x, axis=1))(inputs)
layer_inp = layers.LSTM(64, activation="relu")(layer_inp)
output = layers.Dense(HORIZON)(layer_inp)

lstm_model = tf.keras.Model(inputs=inputs,
                         outputs=output,
                         name="model_2_lstm")

lstm_model.compile(loss="mae",
                optimizer=tf.keras.optimizers.Adam(),
                metrics=["mae"])

lstm_model.fit(train_windows,
            train_labels,
            epochs=100,
            verbose=1,
            batch_size=32,
            callbacks=[
              EarlyStopping(monitor='loss', patience=10),
            ]
)

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

<keras.src.callbacks.History at 0x7c2488e8c790>

In [None]:
lstm_model.summary()

Model: "model_2_lstm"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_1 (InputLayer)        [(None, 7)]               0         
                                                                 
 lambda (Lambda)             (None, 1, 7)              0         
                                                                 
 lstm (LSTM)                 (None, 64)                18432     
                                                                 
 dense (Dense)               (None, 1)                 65        
                                                                 
Total params: 18497 (72.25 KB)
Trainable params: 18497 (72.25 KB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


In [None]:
y_lstm_pred = lstm_model.predict(test_windows)



In [None]:
y_lstm_pred_out = tf.squeeze(y_lstm_pred)

In [None]:
evaluate_metrics(test_labels, y_lstm_pred)

{'mae': 0.2518768865683685, 'rmse': 0.5950146902572887}

In [None]:
mean_absolute_percentage_error(test_labels, y_lstm_pred)

0.0427845275907897

**Gradient Boosting Regressor**

In [None]:
from sklearn.ensemble import GradientBoostingRegressor

In [None]:
est = GradientBoostingRegressor(
     n_estimators=100, learning_rate=0.1, max_depth=1, random_state=0,
     loss='squared_error'
)

In [None]:
est.fit(train_windows, train_labels)

y_gbr_pred = est.predict(test_windows)
y_gbr_pred[0:2]


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples, ), for example using ravel().



array([5.13110765, 4.60253131])

In [None]:
evaluate_metrics(test_labels, y_gbr_pred)

{'mae': 0.8240397443186656, 'rmse': 1.6910597380300112}

In [None]:
mean_absolute_percentage_error(test_labels, y_gbr_pred)

0.08392119976077367

**RANDOM FOREST**

In [None]:
from sklearn.ensemble import RandomForestRegressor
rf_regr = RandomForestRegressor(n_estimators = 100, random_state = 5)

In [None]:
rf_regr.fit(train_windows, train_labels)


A column-vector y was passed when a 1d array was expected. Please change the shape of y to (n_samples,), for example using ravel().



In [None]:
y_rf_pred = rf_regr.predict(test_windows)

In [None]:
y_rf_pred[0:5]

array([4.09409572, 4.33663178, 4.24694752, 3.53794288, 5.45367115])

In [None]:
evaluate_metrics(test_labels, y_rf_pred)

{'mae': 0.8944509873542101, 'rmse': 1.891445865504761}

In [None]:
mean_absolute_percentage_error(test_labels, y_rf_pred)

0.08247514867611846

**Prediction Visualization**

In [None]:
actual_test = go.Scatter(
    x = x_test,
    y = test_labels[:, 0],
    mode = 'lines',
    name = 'Actual Test Data'
)

rf_test_pred = go.Scatter(
    x = x_test,
    y = y_rf_pred,
    mode = 'lines',
    name = 'Random Forest Prediction'
)

svr_test_pred = go.Scatter(
    x = x_test,
    y = y_svr_pred,
    mode = 'lines',
    name = 'SVR Prediction'
)

lstm_test_pred = go.Scatter(
    x = x_test,
    y = y_lstm_pred_out,
    mode = 'lines',
    name = 'LSTM Prediction'
)

gbr_test_pred = go.Scatter(
    x = x_test,
    y = y_gbr_pred,
    mode = 'lines',
    name = 'GBR Prediction'
)

layout = go.Layout(
    #title = 'Load Forecasting',
    xaxis = {'title' : "Time"},
    yaxis = {'title' : "Excess kWh"}
)
fig = go.Figure(data=[actual_test,
                      svr_test_pred,
                      gbr_test_pred,
                      rf_test_pred,
                      lstm_test_pred],
                layout=layout)
fig.update_layout(
    legend=dict(
        x=.6,
        y=.95,
        traceorder="normal",
        font=dict(
            family="sans-serif",
            size=16,
            color="black"
        ),
    )
)