#### Test input steps

Compute losses and accuracy of the models when providing different input steps ranging from 24 hrs to 7*24 hrs. 

In [1]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit
from cnn_model_yin import CNN, cross_validate, train
import torch
import torch.nn as nn
import random
import os, sys

# load data through the data preprocessor
sys.path.append(os.path.abspath('..'))  # add parent directory to sys.path
from data_cleanup import DataProcessor

# Reproducibility (best-effort)
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)

<torch._C.Generator at 0x11f752830>

### Experiment No.1
Just testing input steps with a fixed kernel size for the convolutions

In [None]:
# Instantiate DataProcessor for 3 days -> 1 day
OUTPUT_STEPS = 24
INPUT_STEPS_ARRAY = [24, 48, 72, 96, 120, 144, 168]
histories = []

for INPUT_STEPS in INPUT_STEPS_ARRAY:

    print("\n-------------- TESTING INPUT STEPS: ", INPUT_STEPS, "----------------\n")
    processor = DataProcessor(input_steps=INPUT_STEPS, output_steps=OUTPUT_STEPS)
    Train, Val, Test = processor.load_and_process_data()

    X_train, y_train = Train
    X_val, y_val = Val
    X_test, y_test = Test

    model = CNN(8, INPUT_STEPS, OUTPUT_STEPS, kernel_size=3, pool_kernel=0, padding=False) # input all features, no pooling, no padding

    # print model num_params
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Number of parameters: ", num_params)

    histories.append(
        train(model, X_train, y_train, X_val, y_val, epochs=20)
    )


-------------- TESTING INPUT STEPS:  24 ----------------

Step 1/5: Fetching, cleaning, and engineering features...


  df = pd.read_csv(data_url)
  df = df.fillna(method='ffill')


Step 2/5: Resampling data to hourly and setting 'Global_active_power' as target...


  df_hourly = df.resample('H').agg(agg_dict)
  df_hourly = df_hourly.fillna(method='ffill')


Step 3/5: Splitting data and applying scaler...
Step 4/5: Creating time-series windows...
Step 5/5: Data processing complete.
Number of parameters:  627960
Epoch 1/20 - train_loss: 0.016564 - val_loss: 0.014551
Epoch 2/20 - train_loss: 0.014239 - val_loss: 0.013645
Epoch 3/20 - train_loss: 0.013533 - val_loss: 0.013305
Epoch 4/20 - train_loss: 0.013236 - val_loss: 0.012903
Epoch 5/20 - train_loss: 0.013005 - val_loss: 0.012889
Epoch 6/20 - train_loss: 0.012828 - val_loss: 0.013061
Epoch 7/20 - train_loss: 0.012614 - val_loss: 0.013108
Epoch 8/20 - train_loss: 0.012425 - val_loss: 0.012864
Epoch 9/20 - train_loss: 0.012252 - val_loss: 0.012785
Epoch 10/20 - train_loss: 0.012051 - val_loss: 0.013019
Epoch 11/20 - train_loss: 0.011744 - val_loss: 0.013102
Epoch 12/20 - train_loss: 0.011451 - val_loss: 0.013284
Epoch 13/20 - train_loss: 0.011180 - val_loss: 0.013660
Epoch 14/20 - train_loss: 0.010902 - val_loss: 0.013866


### Experiment No.2

Adapt the kernel size to the input steps, so that the model can take advantage of larger temporal dependencies.

In [None]:
# Instantiate DataProcessor for 3 days -> 1 day
OUTPUT_STEPS = 24
INPUT_STEPS_ARRAY = [24, 48, 72, 96, 120, 144, 168]
exp2_histories = []

for INPUT_STEPS in INPUT_STEPS_ARRAY:

    print("\n-------------- TESTING INPUT STEPS: ", INPUT_STEPS, "----------------\n")
    processor = DataProcessor(input_steps=INPUT_STEPS, output_steps=OUTPUT_STEPS)
    Train, Val, Test = processor.load_and_process_data()

    X_train, y_train = Train
    X_val, y_val = Val
    X_test, y_test = Test

    kernel_size = INPUT_STEPS // 8

    model = CNN(8, INPUT_STEPS, OUTPUT_STEPS, kernel_size=kernel_size, pool_kernel=0, padding=False) # input all features, no pooling, no padding

    # print model num_params
    num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
    print("Number of parameters: ", num_params)

    histories.append(
        train(model, X_train, y_train, X_val, y_val, epochs=20)
    )