#### Model type: Markovian RNN


In [None]:
%load_ext autoreload
%autoreload 2

from tweedejaars_project import *
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
import matplotlib.pyplot as plt

In [None]:
features = [
    "import_capacity",
    "mid_price_published",
    "upward_dispatch_published",
    "downward_dispatch_published",
    'min_ptu_price_known',
    "max_ptu_price_known",
    "settlement_price_bestguess",
    'PTU',
    'forecast_wind',
    'forecast_solar',
    'forecast_demand',
    'time_since_last_two_sided',
    'two_sided_daily_count',
    'ptu_id',
    'naive_strategy_action',
    'minute_in_ptu',
    "hvq_delta",
    "residual_load",
    "dispatch_diff",
    "igcc_diff",
    "is_balanced",
    "weekday",
    "workday",
    "hour",
    "month",
    "day",
    "minute",

]
# already used
target = 'fix_two_sided_ptu_alt'

In [None]:
df = load_df()

In [None]:
# Simple forward filling of features when NaN values are in columns
def interpolate_feature(df, features):
    for feature in features:
        df[feature] = df[feature].ffill()
    return df


df = interpolate_feature(df, 
                        ['forecast_wind', 'forecast_solar', 
                         'forecast_demand', "upward_dispatch_published", 
                         "downward_dispatch_published", "vwap_avg",
                        "forecast_wind_delta", "forecast_solar_delta",
                        "forecast_demand_delta", "residual_load",
                        "dispatch_diff", "igcc_diff", 'hvq_delta' ])

In [None]:
# Replace two features with more informative bool feature
def difference_published(df, features):
    values = [0., 1., 2., 3.]
    conditions = [
        (df[features[0]].notna() & df[features[1]].isna()),  # feature1 has value, feature2 is NaN
        (df[features[0]].isna() & df[features[1]].notna()),  # feature1 is NaN, feature2 has value
        (df[features[0]].isna() & df[features[1]].isna()),   # both feature1 and feature2 are NaN
        (df[features[0]].notna() & df[features[1]].notna())  # both feature1 and feature2 have values
    ]
    df['publish_info'] = np.select(conditions, values)
    return df

df = difference_published(df, ['min_price_published', 'max_price_published'])

In [None]:
# Fill NaN values with 0 
def fill_vals_0(df, features):
    for feature in features:
        df[feature] = df[feature].fillna(0)
    
    return df

df = fill_vals_0(df, ['min_ptu_price_known', 'max_ptu_price_known', 'settlement_price_bestguess'])


In [None]:
# Hyperparameters
ptu_window = 10 # This is the PTU window amount, so how many PTU's we take for the history
batch_size = 15000
input_size = len(features)  # Input size == amount of Features
hidden_size = 2
num_layers = 2
output_size = 1 # always 1 for bool output
num_epochs = 1
learning_rate = 0.0001

In [None]:
# Split the data in vars
splits = get_splits(df, features, target)
train_data = splits['train']
valid_data = splits['valid']
test_data = splits['test']

In [None]:
class TimeSeriesDataset(Dataset):
    """
    Custom dataset class to handle the sequences for our needs in a good format
    """
    def __init__(self, data, ptu_window, ptu_length=15):
        self.data_in = pd.DataFrame(data['in']).astype(np.float32)
        self.data_out = pd.Series(data['out']).astype(np.float32)
        self.ptu_window = ptu_window
        self.ptu_length = ptu_length

        self.ptu_history = self.ptu_window * self.ptu_length  # Entire window
        self.sequence_indices, self.sequence_lengths = self.create_sequences()
        

    # Create custom sequences for each row, so the 'history' added with the current row
    def create_sequences(self):
        sequence_indices, sequence_lengths = [], []

        row_idx = 0  # Index of the current row
        start_idx = 0  # Index of the furthest row in history

        counter = self.ptu_length

        for _ in range(len(self.data_in)):
            sequence_indices.append((start_idx, row_idx))
            sequence_lengths.append(row_idx - start_idx)  # Add length of sequence

            # Start using the counter only if the row idx is more than the history length,
            # for compatability
            if row_idx >= self.ptu_history: 
                counter -= 1
                if counter == 0:
                    start_idx += self.ptu_length
                    counter = self.ptu_length

            row_idx += 1

        return sequence_indices, sequence_lengths


    def __len__(self):
        return len(self.sequence_indices)


    def __getitem__(self, idx):
        # get idx's
        start_idx, row_idx = self.sequence_indices[idx]
        length = self.sequence_lengths[idx]

        # np array of sequence and target 
        sequence = self.data_in.iloc[start_idx:row_idx + 1].values  
        target = self.data_out.iloc[row_idx]

        # flip sequence for correct order
        sequence = torch.tensor(sequence, dtype=torch.float32).flip(0)  

        # Pad for the first row idx's if it is less than the history length,
        if length <= self.ptu_history and start_idx == 0:
            sequence = F.pad(sequence, (0, 0, 0, (self.ptu_history - length) + self.ptu_length -1), mode='constant', value=-np.inf)
        
        # Dynamic padding for in the current ptu if the row is not at the last idx of the ptu
        else:
            current_seq_len = self.ptu_length - ((row_idx - start_idx) % self.ptu_length)
            sequence = F.pad(sequence, (0, 0, 0, current_seq_len - 1), mode='constant', value=-np.inf)
        
        return sequence, torch.tensor(target, dtype=torch.float32), length

In [None]:
class MarkovModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(MarkovModel, self).__init__()
        # SImple rnn with dropout
        self.lstm = nn.RNN(input_size, hidden_size, num_layers, dropout=0.5, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, lengths):
        # Special function to let the model know there are padded rows
        packed_input = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        packed_output, _ = self.lstm(packed_input)
        output, _ = pad_packed_sequence(packed_output, batch_first=True)
        
        # Use the last valid output for each sequence
        idx = (lengths - 1).view(-1, 1, 1).expand(output.size(0), 1, output.size(2))
        output = output.gather(1, idx).squeeze(1)
        output = self.fc(output)
        return output


In [None]:
# Initialize the model and move it to the selected device

model = MarkovModel(input_size, hidden_size, num_layers, output_size)

# Create tensor for the target data and calculate class weights for imbalanced classes
tensor_target = torch.tensor(train_data['out']).float()
criterion = nn.BCEWithLogitsLoss(pos_weight=(len(tensor_target)/ tensor_target.sum())*0.43)

# Initialize optimizer with AdamW and AMSGrad variant

optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, amsgrad=True, weight_decay=0.1)


In [None]:
# Load data 
def prepare_data_loader(data, ptu_window, batch_size):
    dataset = TimeSeriesDataset(data, ptu_window)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False)

In [None]:
train_loader = prepare_data_loader(train_data, ptu_window, batch_size)
valid_loader = prepare_data_loader(valid_data, ptu_window, batch_size)
test_loader = prepare_data_loader(test_data, ptu_window, batch_size)

print(f'expected batches training: {len(train_loader)}')
print(f'expected batches validation: {len(valid_loader)}')
print(f'expected batches testing: {len(test_loader)}')

In [None]:
# Training loop for the RNN
def train_rnn(model, train_loader, val_loader, criterion, optimizer, num_epochs):
    model.train()

    train_losses = []
    val_losses = []
    
    for epoch in range(num_epochs):
        train_loss = 0.0
        val_loss = 0.0
        
        # Training phase
        for batch_idx, (sequences, targets, lengths) in enumerate(train_loader):
            
            print(f"Training batch {batch_idx}, Sequence shape: {sequences.shape}, Lengths: {lengths}")

            sequences = sequences.float()
            targets = targets.float().view(-1, 1)
            lengths += 1
            lengths = lengths.to(torch.int64).cpu()

            # Forward pass
            outputs = model(sequences, lengths)
            loss = criterion(outputs, targets)

            # Backward pass
            optimizer.zero_grad()
            loss.backward()

            optimizer.step()
            train_loss += loss.item()
            train_losses.append(loss.item())

        
        # Validation phase
        model.eval()
        with torch.no_grad():
            for batch_idx, (sequences, targets, lengths) in enumerate(val_loader):

                print(f"Validation batch {batch_idx}, Sequence shape: {sequences.shape}, Lengths: {lengths}")

                sequences = sequences.float()
                targets = targets.float().view(-1, 1)
                lengths += 1
                lengths = lengths.to(torch.int64).cpu()

                outputs = model(sequences, lengths)
                loss = criterion(outputs, targets)
                
                val_loss += loss.item()
                val_losses.append(loss.item())


        train_loss /= len(train_loader)
        val_loss /= len(val_loader)


        print(f'Epoch [{epoch+1}/{num_epochs}], Training Loss: {train_loss:.4f}, Validation Loss: {val_loss:.4f}')
        
        # Switch back to training mode
        model.train()
        
    return train_losses, val_losses

In [None]:
train_losses, val_losses = train_rnn(model, train_loader, valid_loader, criterion, optimizer, num_epochs)

In [None]:
# Compare the losses
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.xlabel('Batch')
plt.ylabel('Loss')
plt.title('Training Loss Over all Batches')
plt.legend()
plt.show()


plt.figure(figsize=(10, 5))
plt.plot(val_losses, label='Validation Loss')
plt.xlabel('Batch')
plt.ylabel('Loss')
plt.title('Validation Loss Over all Batches')
plt.legend()
plt.show()

In [None]:
# save the model
save_name = 'markovian_model_1'
folder_name = 'rnn'

# Uncomment to save the model
# save_model(model, save_name, folder_name)

In [None]:
# testing loop
def test_model(model, data_loader):
    model.eval()
    all_outputs = []
    all_targets = []
    with torch.no_grad():
        for batch_idx, (sequences, targets, lengths) in enumerate(data_loader):
            print(f"batch {batch_idx}, Sequence shape: {sequences.shape}, Lengths: {lengths}")

            sequences = sequences.float()
            targets = targets.float()
            lengths += 1

            # Ensure lengths is a 1D CPU int64 tensor
            lengths = lengths.to(torch.int64).cpu()

            # Forward pass
            outputs = model(sequences, lengths)
            all_outputs.append(outputs)
            all_targets.append(targets)
            
    return torch.cat(all_outputs), torch.cat(all_targets)

In [None]:
# Place to test model

# change these two variables correctly to test different sets
chosen_data_loader = valid_loader
unaltered_df = valid_data['df']

outputs, targets = test_model(model, valid_loader)
probabilities = torch.sigmoid(outputs)
# threshholding to create bolean predictions
predictions = (probabilities > 0.5).float()

# choose correct df for the metric
recasted_pred = recast_pred(predictions.flatten())
show_metrics_adjusted(unaltered_df, recasted_pred)
