### Simulations
The objective is to run the model based on it's own predicted data, so we get a continuous running simulation. 
We'll run an hour by hour simulation for 24 hours and compare to the bechmark model that comutes the 24 hour prediction at once. 


In [None]:
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import TimeSeriesSplit
from cnn_model_yin import CNN, cross_validate, train
import torch
import torch.nn as nn
import random
import os, sys

sys.path.append(os.path.abspath('..'))  # add parent directory to sys.path
from data_cleanup import DataProcessor

# Reproducibility (best-effort)
SEED = 42
np.random.seed(SEED)
random.seed(SEED)
torch.manual_seed(SEED)

To run a simulation we need to compute a prediction for every input feature, that's why we don't use a flat layer, instead we let the final convolutional layer to compute the prediction for every feature.

In [None]:
class HourlyCNN(nn.Module): 
    def __init__(self):
        super(HourlyCNN, self).__init__()
        self.conv1 = nn.Conv1d(in_channels=8, out_channels=32, kernel_size=6)  
        self.conv2 = nn.Conv1d(in_channels=32, out_channels=64, kernel_size=5) 
        self.conv3 = nn.Conv1d(in_channels=64, out_channels=32, kernel_size=4) 
        self.conv4 = nn.Conv1d(in_channels=32, out_channels=8, kernel_size=1)
        # Collapse time dimension → exactly 1 step
        self.pool = nn.AdaptiveAvgPool1d(output_size=1)

        # get output shape: 
        with torch.no_grad():
            test_input = torch.randn(1, 8, 24)
            test_output = self.conv1(test_input)
            test_output = self.conv2(test_output)
            test_output = self.conv3(test_output)
            test_output = self.conv4(test_output)
            test_output = self.pool(test_output)
            
            print(test_output.shape) # [1, 8, 1]
    
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool(x)
        return x

In [None]:
def sim_hourly(input_steps, output_steps_size, model):
    
    running_input = input_steps.clone()  # shape [8, 24]
    sim_outputs = [] 

    for _ in range(output_steps_size):

        # model expects shape [1, C, T]
        inp = running_input.unsqueeze(0)

        # predict next hour
        output = model(inp).squeeze(0)   # shape [8, 1] or [8] depending on model

        # ensure output is [8, 1]
        if output.ndim == 1:
            output = output.unsqueeze(1)

        sim_outputs.append(output)

        # append new prediction and remove oldest timestep
        running_input = torch.cat([running_input, output], dim=1)[:, 1:]

    # concatenate all simulated timesteps → [8, output_steps_size]
    return torch.cat(sim_outputs, dim=1)

    
# build a cumulative error function
def point_mse_error(y_true, y_pred):
    """
    y_true, y_pred: tensors shaped [channels=8, time]
    Computes MSE only on channel 1.
    """
    diff = y_true[1] - y_pred[1]
    return torch.mean(diff ** 2).item()

In [None]:
# train

INPUT_STEPS = 24
OUTPUT_STEPS = 1

processor = DataProcessor(input_steps=INPUT_STEPS, output_steps=OUTPUT_STEPS)
Train, Val, Test = processor.load_and_process_data()

X_train, y_train = Train
X_val, y_val = Val
X_test, y_test = Test

model = HourlyCNN()

# print model num_params
num_params = sum(p.numel() for p in model.parameters() if p.requires_grad)
print("Number of parameters: ", num_params)

history = train(model, X_train, y_train, X_val, y_val, epochs=20)

# plot history
plt.plot(history['train_loss'], label='train')
plt.plot(history['val_loss'], label='val')
plt.legend()
plt.show()

Now to test the model, we need to generate 24 hour label features, to be compared with the simulated ones

In [None]:
# test model:
processor = DataProcessor(input_steps=24, output_steps=24, get_all_label_features=True) 
Train, Val, Test = processor.load_and_process_data() 
X_train, y_train = Train
X_val, y_val = Val
X_test, y_test = Test

# convert test data
X_test_t = torch.tensor(X_test, dtype=torch.float32)
y_test_t = torch.tensor(y_test, dtype=torch.float32)

# create a test loader
test_loader = torch.utils.data.DataLoader(
    dataset=torch.utils.data.TensorDataset(X_test_t, y_test_t),
    batch_size=4,
    shuffle=False
)

In [None]:
# compute errors

total_cumulative_error = 0
model.eval()
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs = inputs.permute(0, 2, 1)
        labels = labels.permute(0, 2, 1)
        for i in range(len(inputs)):
            sim_result = sim_hourly(inputs[i], 24, model)
            total_cumulative_error += point_mse_error(sim_result, labels[i])


mean_error = total_cumulative_error / len(test_loader.dataset)
model.train()  # go back to training mode

print("Cumulative Error: %.3f" % total_cumulative_error)
print("Mean Error: %.3f" % mean_error)

In [None]:
## lets look at a sample: 

sample_batch_input, sample_batch_label = next(iter(test_loader))
sample_batch_input = sample_batch_input.permute(0, 2, 1)
sample_batch_label = sample_batch_label.permute(0, 2, 1)
print(sample_batch_input.shape)
print(sample_batch_label.shape)

with torch.no_grad():
    sim = sim_hourly(sample_batch_input[0], 24, hourly_net)
# print(sim.shape)
print(sim.shape)

# plt.plot(sample_batch_input[0][1].numpy())
plt.plot(sim[0].numpy(), label="Sim")
plt.plot(sample_batch_label[0][0].numpy(), label="Label")
plt.legend()