In [None]:
#autoreload
%load_ext autoreload
%autoreload 2

In [None]:
import numpy as np
import pandas as pd
from model import *
from viz import *
from dataset import *
from data import *

In [None]:
df = load_tload('data/buildings/datasets/2024/GATES_zone-tloads.csv')
weather = load_weather('data/buildings/datasets/2024/weather.csv')

In [None]:
nan_viz(df)
nan_viz(weather)

In [None]:
tload_viz(df)

We have time series for a given building, corresponding do each zone terminal load, a measure of how needy a zone i in term of cooling or reheat. 

We'll denote this:

$$T_{load}^{(z,t)}$$

With $t$ indexing the time and $z$ denoting the zone of the building. We have  382  zone in this building, which makes a vector of 
$$T_{load, building}^{t} \in \mathbb R ^{(382,1)}$$

We also have a input feature timeseries, which is common for all the building as well. This includes features such as Outside Air temperautre or solar irradiation. 

$$W^{t}$$

In the code the weather dataframe is formatted like so:
| Date                | temperature | RH  | Tdew | wind | sun_rad | daily_rain |
|---------------------|-------------|-----|------|------|---------|------------|
| 2023-05-01 00:00:00 | 52.6        | 75.0| 44.8 | 9.2  | 0.0     | 0.00       |
| 2023-05-01 01:00:00 | 52.4        | 75.0| 44.7 | 7.4  | 0.0     | 0.00       |
| 2023-05-01 02:00:00 | 52.2        | 75.0| 44.4 | 9.0  | 0.0     | 0.00       |

And the 

| Date                | VAV2-33 | VAV2-17   | VAV3-18 | VAV4-22    | VAV2-20 | VAV1-18 | VAV2-03   | VAV3-06 | VAV2-31 | VAV2-29 | ... | VAV2-11 | VAV2-32 | VAV4-25    | VAV4-08 | VAV1-02   | VAV0-00-4  | VAV0-04 | VAV1-12 | VAV2-18 | VAV3-15 |
|---------------------|---------|-----------|---------|------------|---------|---------|-----------|---------|---------|---------|-----|---------|---------|------------|---------|-----------|------------|---------|---------|---------|---------|
| 2023-05-01 00:00:00 | 0.0     | 0.000000  | 0.0     | -10.687083 | 0.0     | 0.0     | -5.494500 | 0.000000| 0.0     | -100.0  | ... | 0.0     | 0.0     | -16.662916 | 0.0     | -0.435333 | -8.518917  | 0.0     | 0.0     | 0.0     | 0.000000|
| 2023-05-01 01:00:00 | 0.0     | -0.355917 | 0.0     | -10.881667 | 0.0     | 0.0     | -7.798417 | 0.000000| 0.0     | -100.0  | ... | 0.0     | 0.0     | -16.492833 | 0.0     | -7.894917 | -7.576250  | 0.0     | 0.0     | 0.0     | 0.000000|
| 2023-05-01 02:00:00 | 0.0     | -0.559250 | 0.0     | -10.827083 | 0.0     | 0.0     | -6.123417 | 0.000000| 0.0     | -100.0  | ... | 0.0     | 0.0     | -16.670916 | 0.0     | -6.497750 | -8.176333  | 0.0     | 0.0     | 0.0     | -1.364750|

I want to train a LSTM architecture that will for an input of the past 2 weeks of data of terminal load vector, will try to predict the next week. I also have the exact weather for the two past weeks and the predicting wek (that could be a forcecast in real life use), to help the prediction of model, as those should be correlated in some ways. 

Give me a way to start formatting my code to have such a model, train it and test it. I want to have september has my validation set. 


# Quick look at the data

In [None]:
plt.figure(figsize=(15,5))
for col in df.columns[:10]:
    plt.plot(df.loc[df.index.month == 5,col], label=col)

# Hyperparameters choice

In [None]:
n_epochs = 400 

# Base model

In [None]:
# Make sure both dataframes have datetime indices
df.index = pd.to_datetime(df.index)
weather.index = pd.to_datetime(weather.index)

# Train the model
model, train_losses, val_losses, train_dataset, val_dataset, train_loader, val_loader = get_trained_model(df, weather, n_epochs=n_epochs)

# Saving the models

In [None]:
import torch
import pickle

# Example usage
model_path = 'model/model.pth'
data_path = 'model/data.pkl'
save_all(model, train_losses, val_losses, train_dataset, val_dataset, train_loader, val_loader, model_path, data_path)

# Loading the model

In [None]:

model_class = TerminalLoadPredictor  # Replace with your model class
model_path = 'model/model.pth'
data_path = 'model/data.pkl'
model, train_losses, val_losses, train_dataset, val_dataset, train_loader, val_loader = load_all(model_class, model_path, data_path)

In [None]:
# Correct way to call visualize_results
visualize_results(
    model=model,
    dataset=val_dataset,  # The dataset object
    load_scaler=train_dataset.load_scaler,  # The actual scaler object
    train_losses=train_losses,
    val_losses=val_losses,
    zones_names=df.columns,
    sample_idx=0,
    n_samples=3,
)

# Dropout model

In [None]:
model_dropout, train_losses_dropout, val_losses_dropout, train_dataset_dropout, val_dataset_dropout, train_loader_dropout, val_loader_dropout = get_trained_model(df, weather, n_epochs=n_epochs, p_dropout=0.2)

# Saving the models

In [None]:
# Example usage
model_path = 'model/model_dropout.pth'
data_path = 'model/data.pkl'
save_all(model_dropout, train_losses_dropout, val_losses_dropout, train_dataset_dropout, val_dataset_dropout, train_loader_dropout, val_loader_dropout, model_path, data_path)

In [None]:
model_path = 'model/model_dropout.pth'
data_path = 'model/data.pkl'
model_dropout, train_losses_dropout, val_losses_dropout, train_dataset_dropout, val_dataset_dropout, train_loader_dropout, val_loader_dropout = load_all(model_path, data_path)

In [None]:
# Correct way to call visualize_results
visualize_results(
    model=model_dropout,
    dataset=val_dataset_dropout,  # The dataset object
    load_scaler=val_dataset_dropout.load_scaler,  # The actual scaler object
    train_losses=train_losses_dropout,
    val_losses=val_losses_dropout,
    zones_names=df.columns,
    sample_idx=0,
    n_samples=3,
)

# Compare models

In [None]:
models = [model, model_dropout]
train_losses_all = [train_losses, train_losses_dropout]
val_losses_all = [val_losses, val_losses_dropout]
model_names = ['No Dropout', 'Dropout']

In [None]:
compare_results(
    models=models,
    train_losses=train_losses_all,
    val_losses=val_losses_all,
    model_names=model_names,
)

# Error Analysis

In [None]:
all_errors, all_predictions, all_targets = calculate_errors(model_dropout, val_dataset_dropout, val_dataset_dropout.load_scaler)

In [None]:
from viz import plot_dataset_error, plot_bad_samples_predictions

plot_dataset_error(all_errors)

plot_bad_samples_predictions(all_errors, all_predictions, all_targets, N_samples=3, N_zones=10)
plot_bad_samples_predictions(all_errors, all_predictions, all_targets, good_zones=True,N_samples=3, N_zones=10)

# Building wide prediction

In [None]:
all_errors, all_predictions, all_targets = calculate_errors(model_dropout, val_dataset_dropout, val_dataset_dropout.load_scaler)

In [None]:
concatenated_predictions = np.concatenate([all_predictions[sample] for sample in range(all_predictions.shape[0])], axis=0)
concatenated_predictions.shape  

In [None]:
predictions_df = pd.DataFrame(concatenated_predictions, columns=df.columns)
predictions_df = predictions_df.map(lambda x: max(-100,x))
predictions_df.index = df.iloc[-concatenated_predictions.shape[0]:].index

In [None]:
from data import error_viz
error_df = predictions_df - df.iloc[-concatenated_predictions.shape[0]:]
# Sort the columns in ascending order of error
error_df = error_df[error_df.abs().mean().sort_values().index]
error_viz(error_df)