# LSTM

In [4]:
import torch.nn as nn

import sys
sys.path.append('../')  # Go up to src/ directory
from models import LSTM_model

ModuleNotFoundError: No module named 'statsmodels'

# Setting up data and dataloader

In [None]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler

from dataset import EnergyPriceDataset, load_and_preprocess_energy_data

df = load_and_preprocess_energy_data('../../data/energy_data.csv')

feature_cols = [
    'Hour', 'day_nr', 'week_nr', 'year', 'month',
    'day_of_year_sin', 'day_of_year_cos',
    'wind_forecast_dah_mw', 'consumption_forecast_dah_mw',
    'temp_forecast_dah_celcius', 'temp_norm_celcius',
    'heating_demand_interaction', 'temp_deviation',
    'spot_lag1'
]
target_col = 'spot'

split_idx = int(len(df) * 0.8)
train_df = df[:split_idx]
test_df = df[split_idx:]

scaler_X = StandardScaler()
scaler_y = StandardScaler()

train_features = scaler_X.fit_transform(train_df[feature_cols])
train_targets = scaler_y.fit_transform(train_df[[target_col]])

test_features = scaler_X.transform(test_df[feature_cols])
test_targets = scaler_y.transform(test_df[[target_col]])

sequence_length = 48 # 24 = one day, 168 = one week
train_dataset = EnergyPriceDataset(train_features, train_targets, sequence_length)
test_dataset = EnergyPriceDataset(test_features, test_targets, sequence_length)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# LSTM Training loop

In [None]:
from tqdm import tqdm

model = LSTM_model(input_size=14)
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 31

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    
    for X_batch, y_batch in progress_bar:
        optimizer.zero_grad()
        predictions = model.forward(X_batch)
        loss = loss_func(predictions, y_batch)
        loss.backward()
        total_loss += loss.item()
        optimizer.step()
        progress_bar.set_postfix({'loss': loss.item()})
        
# NOTE: this is retarded and approximate. 
# We find a proper epoch range by running expanding window cross validation 
# (see other script)

Epoch 1/50: 100%|██████████| 219/219 [00:02<00:00, 101.53it/s, loss=0.0946]
Epoch 2/50: 100%|██████████| 219/219 [00:01<00:00, 120.44it/s, loss=0.074] 
Epoch 3/50: 100%|██████████| 219/219 [00:01<00:00, 115.99it/s, loss=0.0677]
Epoch 4/50: 100%|██████████| 219/219 [00:01<00:00, 120.82it/s, loss=0.0619]
Epoch 5/50: 100%|██████████| 219/219 [00:01<00:00, 120.34it/s, loss=0.0569]
Epoch 6/50: 100%|██████████| 219/219 [00:01<00:00, 109.64it/s, loss=0.0513]
Epoch 7/50: 100%|██████████| 219/219 [00:01<00:00, 110.55it/s, loss=0.0458]
Epoch 8/50: 100%|██████████| 219/219 [00:01<00:00, 117.10it/s, loss=0.0417] 
Epoch 9/50: 100%|██████████| 219/219 [00:02<00:00, 99.85it/s, loss=0.0393] 
Epoch 10/50: 100%|██████████| 219/219 [00:02<00:00, 107.06it/s, loss=0.039]  
Epoch 11/50: 100%|██████████| 219/219 [00:01<00:00, 117.20it/s, loss=0.0381] 
Epoch 12/50: 100%|██████████| 219/219 [00:01<00:00, 119.11it/s, loss=0.0383]
Epoch 13/50: 100%|██████████| 219/219 [00:01<00:00, 118.36it/s, loss=0.0384] 
Epoc

# Save for later use

In [None]:
import pickle
torch.save(model.state_dict(), 'lstm_model.pth')
pickle.dump((scaler_X, scaler_y), open('scalers.pkl', 'wb')) # when we want to undo the scaling of the data, we need to know the used scaling parameters
