# LSTM

In [5]:
import torch.nn as nn

import sys
sys.path.append('../')  # Go up to src/ directory
from models import LSTM_model

# Setting up data and dataloader

In [6]:
import numpy as np
import pandas as pd
import torch
from torch.utils.data import DataLoader
from sklearn.preprocessing import StandardScaler

from dataset import EnergyPriceDataset, load_and_preprocess_energy_data

df = load_and_preprocess_energy_data('../../data/energy_data.csv')

feature_cols = ['Hour', 'day_nr', 'week_nr', 'year', 'wind_forecast_dah_mw', 'consumption_forecast_dah_mw', 'temp_forecast_dah_celcius', 'temp_norm_celcius', 'spot_lag1']
target_col = 'spot'

split_idx = int(len(df) * 0.8)
train_df = df[:split_idx]
test_df = df[split_idx:]

scaler_X = StandardScaler()
scaler_y = StandardScaler()

# train_features = train_df[feature_cols].values.astype(np.float32)
# train_targets = train_df[target_col].values.astype(np.float32)

# test_features = test_df[feature_cols].values.astype(np.float32)
# test_targets = test_df[target_col].values.astype(np.float32)

train_features = scaler_X.fit_transform(train_df[feature_cols])
train_targets = scaler_y.fit_transform(train_df[[target_col]])

test_features = scaler_X.transform(test_df[feature_cols])
test_targets = scaler_y.transform(test_df[[target_col]])

sequence_length = 24 # 24 = one day, 168 = one week
train_dataset = EnergyPriceDataset(train_features, train_targets, sequence_length)
test_dataset = EnergyPriceDataset(test_features, test_targets, sequence_length)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# LSTM Training loop

In [7]:
from tqdm import tqdm # just for a nice progress bar

model = LSTM_model() # using default values
loss_func = nn.MSELoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

num_epochs = 100

for epoch in range(num_epochs):
    model.train()
    total_loss = 0
    
    progress_bar = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs}")
    
    for X_batch, y_batch in progress_bar: # pretty lol to loop over progress bar, but it holds an iterator over train_loader and its just the API of tqdm
        optimizer.zero_grad()

        predictions = model.forward(X_batch)
        loss = loss_func(predictions, y_batch)
        loss.backward()
        total_loss += loss.item()

        optimizer.step()
        progress_bar.set_postfix({'loss': loss.item()})
    

Epoch 1/100: 100%|██████████| 219/219 [00:02<00:00, 96.72it/s, loss=0.0944] 
Epoch 2/100: 100%|██████████| 219/219 [00:01<00:00, 110.26it/s, loss=0.0794]
Epoch 3/100: 100%|██████████| 219/219 [00:02<00:00, 108.41it/s, loss=0.0725]
Epoch 4/100: 100%|██████████| 219/219 [00:02<00:00, 97.09it/s, loss=0.0629] 
Epoch 5/100: 100%|██████████| 219/219 [00:02<00:00, 107.12it/s, loss=0.0624]
Epoch 6/100: 100%|██████████| 219/219 [00:01<00:00, 112.02it/s, loss=0.0594]
Epoch 7/100: 100%|██████████| 219/219 [00:02<00:00, 108.56it/s, loss=0.0572]
Epoch 8/100: 100%|██████████| 219/219 [00:02<00:00, 109.22it/s, loss=0.053] 
Epoch 9/100: 100%|██████████| 219/219 [00:02<00:00, 109.31it/s, loss=0.0489]
Epoch 10/100: 100%|██████████| 219/219 [00:01<00:00, 111.59it/s, loss=0.0445]
Epoch 11/100: 100%|██████████| 219/219 [00:02<00:00, 108.74it/s, loss=0.041] 
Epoch 12/100: 100%|██████████| 219/219 [00:02<00:00, 107.18it/s, loss=0.0385] 
Epoch 13/100: 100%|██████████| 219/219 [00:02<00:00, 109.00it/s, loss=0.

# Save for later use

In [9]:
import pickle
torch.save(model.state_dict(), 'lstm_model.pth')
pickle.dump((scaler_X, scaler_y), open('scalers.pkl', 'wb')) # when we want to undo the scaling of the data, we need to know the used scaling parameters
