In [77]:
from sklearn.preprocessing import MinMaxScaler
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
import numpy as np
import math
import datetime 
from joblib import load

%load_ext tensorboard
print(tf.version.VERSION)

The tensorboard extension is already loaded. To reload it, use:
  %reload_ext tensorboard
2.11.0


In [78]:
def tf_dataset(series_x, series_y, batch_size, shuffle_buffer, shuffle=True):
    ds = tf.data.Dataset.from_tensor_slices((series_x, series_y))
    if shuffle:
        ds = ds.cache().shuffle(shuffle_buffer).batch(batch_size).repeat()
    else:
        ds = ds.cache().batch(batch_size).repeat()

    return ds

def create_window_dataset(ds, lb, window_size):
    windowed_dataset = []
    labels = []
    for i in range(window_size, ds.shape[0] + 1):
        windowed_dataset.append(ds[i - window_size:i])
        labels.append(lb[i - 1])
        
    return np.array(windowed_dataset), np.array(labels)

def get_metrics_result(metrics, true_labels, predicted_labels):    
    metrics_result = []
    for metric in metrics:
        metric.reset_states()
        metric.update_state(true_labels, predicted_labels)
        metrics_result.append(metric.result().numpy())
    
    return metrics_result

In [79]:
scaler = load('MinMaxScaler.joblib')
Train_set = pd.read_csv("train.csv" , parse_dates= ["date"] , index_col= "date")
Test_set = pd.read_csv("test.csv" , parse_dates= ["date"] , index_col= "date")


Train_set_np = Train_set.to_numpy()
Test_set_np = Test_set.to_numpy()

In [80]:
BATCH_SIZE = 32
WINDOW_SIZE = 7 *24
SHUFFLE_BUFFER = 1000


windowed_Train , labels_Train = create_window_dataset(Train_set_np[:,:-1] , Train_set_np[:,-1], window_size=WINDOW_SIZE)
train_set = tf_dataset(windowed_Train, labels_Train, batch_size = BATCH_SIZE,shuffle_buffer = SHUFFLE_BUFFER,shuffle=True)

unshuffled_train_set = tf_dataset(windowed_Train, labels_Train, BATCH_SIZE, BATCH_SIZE, False)



windowed_Test , labels_Test = create_window_dataset(Test_set_np[:,:-1] , Test_set_np[:,-1],window_size=WINDOW_SIZE)
test_set = tf_dataset(windowed_Test, labels_Test, batch_size = BATCH_SIZE,shuffle_buffer = SHUFFLE_BUFFER,shuffle=False)

# Create Model

In [81]:
tf.keras.backend.clear_session()

def create_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv1D(filters=64,
                            kernel_size=3,
                            strides=1,
                            padding="causal",
                            activation="relu",
                            input_shape=windowed_Train.shape[-2:]),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv1D(filters=64,
                            kernel_size=3,
                            strides=1,
                            padding="causal",
                            activation="relu"),
        tf.keras.layers.BatchNormalization(),
        tf.keras.layers.Conv1D(filters=32,
                            kernel_size=3,
                            strides=1,
                            padding="causal",
                            activation="relu"),
        tf.keras.layers.MaxPooling1D(pool_size=3),
        tf.keras.layers.LSTM(100, dropout=0.2,return_sequences=True),
        tf.keras.layers.LSTM(50, dropout=0.3,return_sequences=True),
        tf.keras.layers.Flatten(),
        tf.keras.layers.Dense(1, activation="relu")
    ])

    lr_schedule = tf.keras.optimizers.schedules.ExponentialDecay(1e-3,decay_rate=0.9999,decay_steps=1000)

    model.compile(loss=tf.keras.losses.MeanSquaredError(),
                optimizer=tf.keras.optimizers.Adam(learning_rate=lr_schedule),
                metrics=['mae'])
    return model


model = create_model()

model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv1d (Conv1D)             (None, 168, 64)           3712      
                                                                 
 batch_normalization (BatchN  (None, 168, 64)          256       
 ormalization)                                                   
                                                                 
 conv1d_1 (Conv1D)           (None, 168, 64)           12352     
                                                                 
 batch_normalization_1 (Batc  (None, 168, 64)          256       
 hNormalization)                                                 
                                                                 
 conv1d_2 (Conv1D)           (None, 168, 32)           6176      
                                                                 
 max_pooling1d (MaxPooling1D  (None, 56, 32)           0

In [89]:
windowed_Test.shape[0]

6846

In [82]:
TRAIN_STEP = math.ceil(windowed_Train.shape[0] / BATCH_SIZE)
VALIDATION_STEP = math.ceil(windowed_Test.shape[0] / BATCH_SIZE)

In [83]:
log_dir = "logs_lag_7\\" + datetime.datetime.now().strftime("%d-%m-%Y_%H%M%S")
checkpoint_path = "weights_lag_7/cp.ckpt"

# tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)
early_stopping_cb = tf.keras.callbacks.EarlyStopping(
      monitor="loss",
      min_delta=1e-3,
      patience=50,
      verbose=1,
      restore_best_weights=True,
    )


check_point_cb = tf.keras.callbacks.ModelCheckpoint(filepath=checkpoint_path,
                                                 save_weights_only=True,
                                                 verbose=1)
history = model.fit(train_set,
                    epochs=200,
                    steps_per_epoch=TRAIN_STEP,
                    validation_data=test_set,
                    validation_steps=VALIDATION_STEP,
                    verbose=1,
                    callbacks=[early_stopping_cb,check_point_cb])

Epoch 1/200
Epoch 1: saving model to training_1\cp.ckpt
Epoch 2/200
Epoch 2: saving model to training_1\cp.ckpt
Epoch 3/200
Epoch 3: saving model to training_1\cp.ckpt
Epoch 4/200
122/872 [===>..........................] - ETA: 2:28 - loss: 1093.1802 - mae: 19.6658

KeyboardInterrupt: 

# Model Evaluation

In [84]:
model = create_model()
model.load_weights(checkpoint_path)

<tensorflow.python.checkpoint.checkpoint.CheckpointLoadStatus at 0x24e422b4f70>

In [108]:
forecast_train = model.predict(unshuffled_train_set, steps=TRAIN_STEP)
forecast_validation = model.predict(test_set, steps=VALIDATION_STEP)



In [109]:
from sklearn.metrics import mean_absolute_error , r2_score , mean_squared_error

In [110]:
val_MAE = mean_absolute_error(labels_Test, forecast_validation)
val_R2 = r2_score(labels_Test, forecast_validation)
val_RMSE = mean_squared_error(labels_Test, forecast_validation)**0.5

In [111]:
train_MAE = mean_absolute_error(labels_Train, forecast_train)
train_R2 = r2_score(labels_Train, forecast_train)
train_RMSE = mean_squared_error(labels_Train, forecast_train)**0.5

In [114]:
print(f"Mean Absolute Error for Validation data: {round(val_MAE,2)}")
print(f"Mean Absolute Error for Train data: {round(train_MAE)}")

print(f"R2 for Validation data: {round(val_R2,2)}")
print(f"R2 for Train data: {round(train_R2,2)}")

print(f"Root Mean Squared Error for Validation data: {round(val_RMSE,2)}")
print(f"Root Mean Squared Error for Train data: {round(train_RMSE,2)}")

Mean Absolute Error for Validation data: 22.55
Mean Absolute Error for Train data: 22
R2 for Validation data: 0.79
R2 for Train data: 0.81
Root Mean Squared Error for Validation data: 38.21
Root Mean Squared Error for Train data: 35.84
