#### Model type: Autoregressive RNN


In [None]:
%load_ext autoreload
%autoreload 2

from tweedejaars_project import *
import os
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, pad_sequence
import matplotlib.pyplot as plt

In [None]:
features = [
    "import_capacity",
    "mid_price_published",
    "upward_dispatch_published",
    "downward_dispatch_published",
    'min_ptu_price_known',
    "max_ptu_price_known",
    "settlement_price_bestguess",
    'PTU',
    'forecast_wind',
    'forecast_solar',
    'forecast_demand',
    'time_since_last_two_sided',
    'two_sided_daily_count',
    'ptu_id',
    'naive_strategy_action',
    'minute_in_ptu',
    "hvq_delta",
    "residual_load",
    "dispatch_diff",
    "igcc_diff",
    "is_balanced",
    "weekday",
    "workday",
    "hour",
    "month",
    "day",
    "minute",

]
# already used
target = 'fix_two_sided_ptu_alt'

In [None]:
df = load_df()

In [None]:
# Simple forward filling of features when NaN values are in columns
def interpolate_feature(df, features):
    for feature in features:
        df[feature] = df[feature].ffill()
    return df


df = interpolate_feature(df, 
                        ['forecast_wind', 'forecast_solar', 
                         'forecast_demand', "upward_dispatch_published", 
                         "downward_dispatch_published", "vwap_avg",
                        "forecast_wind_delta", "forecast_solar_delta",
                        "forecast_demand_delta", "residual_load",
                        "dispatch_diff", "igcc_diff", 'hvq_delta' ])

In [None]:
# Replace two features with more informative bool feature
def difference_published(df, features):
    values = [0., 1., 2., 3.]
    conditions = [
        (df[features[0]].notna() & df[features[1]].isna()),  # feature1 has value, feature2 is NaN
        (df[features[0]].isna() & df[features[1]].notna()),  # feature1 is NaN, feature2 has value
        (df[features[0]].isna() & df[features[1]].isna()),   # both feature1 and feature2 are NaN
        (df[features[0]].notna() & df[features[1]].notna())  # both feature1 and feature2 have values
    ]
    df['publish_info'] = np.select(conditions, values)
    return df

df = difference_published(df, ['min_price_published', 'max_price_published'])

In [None]:
# Fill NaN values with 0 
def fill_vals_0(df, features):
    for feature in features:
        df[feature] = df[feature].fillna(0)
    
    return df

df = fill_vals_0(df, ['min_ptu_price_known', 'max_ptu_price_known', 'settlement_price_bestguess'])


In [None]:
input_size = len(features)  # Input size == amount of Features
hidden_size = 2
num_layers = 2
output_size = 1
seq_length = 15  # 15 voor ptu_window dus 15 rows 
batch_size = seq_length * 15 
num_epochs = 1
learning_rate = 0.001
patience = int(num_epochs * 0.75)


splits = get_splits(df, features, target)
# Split the data in vars
train_data = splits['train']
valid_data = splits['valid']
test_data = splits['test']

In [None]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_length, using_train, probability=0.25):
        # Initialize the dataset with data, sequence length, training flag, and probability for subsampling
        self.data_in = pd.DataFrame(data['in']).astype(np.float32)
        self.data_out = pd.Series(data['out']).astype(np.float32)
        self.seq_length = seq_length
        self.using_train = using_train
        self.probability = probability

    def __len__(self):
        # Return the total number of samples
        return len(self.data_in)

    def __getitem__(self, idx):
        # Subsampling part only for training

        # if self.using_train:
        #     if not self.data_out.iloc[max(0, idx - self.seq_length + 1):idx + 1].any():
        #         if np.random.rand() >= self.probability:
        #             return torch.empty(0), torch.empty(0), 0

        # Extract a sequence of length seq_length or truncate if at the start
        if idx + 1 >= self.seq_length:
            sequence = self.data_in.iloc[idx - self.seq_length + 1:idx + 1].values
        else:
            sequence = self.data_in.iloc[:idx + 1].values

        # Get the target value
        target = self.data_out.iloc[idx]
        return torch.tensor(sequence, dtype=torch.float32), torch.tensor(target, dtype=torch.float32)

# Custom collate function to handle variable-length sequences and padding
def collate_fn(batch):
    # Filter out empty tensors
    batch = [item for item in batch if item[0].numel() > 0]

    # Return empty tensors if the batch is empty
    if len(batch) == 0:
        return torch.empty(0), torch.empty(0), []

    # Separate sequences and targets
    sequences, targets = zip(*batch)

    # Calculate the original lengths of the sequences
    lengths = [len(seq) for seq in sequences]

    # Pad the sequences to the length of the longest sequence in the batch
    padded_sequences = pad_sequence(sequences, batch_first=True, padding_value=0)

    return padded_sequences, torch.stack(targets), lengths

In [None]:
def prepare_data_loader(data, seq_length, batch_size, train=False):
    dataset = TimeSeriesDataset(data, seq_length, train)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False, collate_fn=collate_fn)

train_loader = prepare_data_loader(train_data, seq_length, batch_size, train=True)
valid_loader = prepare_data_loader(valid_data, seq_length, batch_size)
test_loader = prepare_data_loader(test_data, seq_length, batch_size)

print(f'expected batches training: {len(train_loader)}')
print(f'expected batches validation: {len(valid_loader)}')
print(f'expected batches testing: {len(test_loader)}')

In [None]:
class LSTMModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(LSTMModel, self).__init__()
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        # Define an LSTM layer with dropout
        self.lstm = nn.LSTM(input_size, hidden_size, num_layers, batch_first=True, dropout=0.5)
        
        # Define a fully connected layer for output
        self.fc = nn.Linear(hidden_size, output_size)
        
    def forward(self, x, lengths):
        # Initialize hidden state with zeros
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Initialize cell state with zeros
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        
        # Pack padded sequence
        packed_inputs = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        
        # Forward propagate LSTM
        out, _ = self.lstm(packed_inputs, (h0, c0))
        
        # Unpack packed sequence
        unpacked_output, _ = pad_packed_sequence(out, batch_first=True)
        
        # Decode the hidden state of the last time step
        out = self.fc(unpacked_output[:, -1, :])  # out: tensor of shape (batch_size, output_size)
        
        return out

In [None]:
def train(model, train_loader, valid_loader, optimizer, criterion, num_epochs, device, scheduler=None, patience=10):
    # Initialize lists to store losses and variables for early stopping
    train_losses = []
    valid_losses = []
    best_valid_loss = float('inf')
    best_model_state_dict = None
    epochs_no_improve = 0

    # Loop over epochs
    for epoch in range(num_epochs):
        model.train()  # Set model to training mode
        train_loss = 0.0

        # Loop over batches
        for i, (inputs, targets, lengths) in enumerate(train_loader):
            if len(inputs) == 0:  # Skip if batch is empty
                continue

            print(f"Training batch {i}, Sequence shape: {inputs.shape}, Lengths: {lengths}")
            inputs, targets = inputs.to(device), targets.to(device)  # Move data to device
            optimizer.zero_grad()  # Zero the gradients
            outputs = model(inputs, lengths)  # Forward pass
            loss = criterion(outputs.squeeze(), targets)  # Compute loss
            loss.backward()  # Backward pass
            optimizer.step()  # Update parameters
            train_loss += loss.item()  # Accumulate training loss

        # Calculate validation loss after every epoch
        valid_loss = evaluate(model, valid_loader, criterion, device)

        print(f'Epoch [{epoch+1}/{num_epochs}], Train Loss: {train_loss / len(train_loader):.4f}, Valid Loss: {valid_loss:.4f}')
        train_losses.append(train_loss / len(train_loader))
        valid_losses.append(valid_loss)

        # Step the scheduler if provided
        if scheduler:
            scheduler.step(valid_loss)

        # Check for improvement in validation loss
        if valid_loss < best_valid_loss:
            best_valid_loss = valid_loss
            best_model_state_dict = model.state_dict()
            epochs_no_improve = 0
        else:
            epochs_no_improve += 1
            if epochs_no_improve == patience:  # Early stopping
                print(f'Early stopping after {epoch+1} epochs.')
                break

    # Reload the best model state
    print(f'Loading best model!')
    model.load_state_dict(best_model_state_dict)

    return train_losses, valid_losses

def evaluate(model, data_loader, criterion, device):
    model.eval()  # Set model to evaluation mode
    total_loss = 0.0

    with torch.no_grad():  # Disable gradient calculation
        # Loop over batches
        for i, (inputs, targets, lengths) in enumerate(data_loader):
            if len(inputs) == 0:  # Skip if batch is empty
                continue

            print(f"Validation batch {i}, Sequence shape: {inputs.shape}, Lengths: {lengths}")
            inputs, targets = inputs.to(device), targets.to(device)  # Move data to device
            outputs = model(inputs, lengths)  # Forward pass
            loss = criterion(outputs.squeeze(), targets)  # Compute loss
            total_loss += loss.item()  # Accumulate validation loss
            
    return total_loss / len(data_loader)  # Return average validation loss


In [None]:
# Select device for training (GPU if available, else CPU)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Initialize the model and move it to the selected device
model = LSTMModel(input_size, hidden_size, num_layers, output_size).to(device)

# Create tensor for the target data and calculate class weights for imbalanced classes
tensor_target = torch.tensor(train_data['out']).float()
criterion = nn.BCEWithLogitsLoss(pos_weight=(len(tensor_target) / tensor_target.sum() * 0.68))

# Initialize optimizer with AdamW and AMSGrad variant
optimizer = torch.optim.AdamW(model.parameters(), lr=learning_rate, amsgrad=True)

# Initialize learning rate scheduler to reduce LR on plateau
scheduler = optim.lr_scheduler.ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=patience)


In [None]:
train_losses, valid_losses = train(model, train_loader, valid_loader, optimizer, criterion, num_epochs, device, scheduler, patience)


In [None]:
# Compare the losses
plt.figure(figsize=(10, 5))
plt.plot(train_losses, label='Training Loss')
plt.xlabel('Batch')
plt.ylabel('Loss')
plt.title('Training Loss Over all Batches')
plt.legend()
plt.show()


plt.figure(figsize=(10, 5))
plt.plot(valid_losses, label='Validation Loss')
plt.xlabel('Batch')
plt.ylabel('Loss')
plt.title('Validation Loss Over all Batches')
plt.legend()
plt.show()

In [None]:
# save the model
save_name = 'autoregressive_rnn'
folder_name = 'rnn'

# Uncomment to save the model
# save_model(model, save_name, folder_name)

In [None]:
# Define the testing loop
def test(model, test_loader, criterion, device):
    # List to store all outputs
    all_outputs = []

    model.eval()  # Set the model to evaluation mode
    
    test_loss = 0.0  # Initialize the test loss
    with torch.no_grad():  # Disable gradient calculation
        for i, (inputs, targets, lengths) in enumerate(test_loader):
            if len(inputs) == 0:  # Skip if batch is empty
                continue

            print(f"Test batch {i}, Sequence shape: {inputs.shape}, Lengths: {targets.shape}")

            # Move inputs and targets to the specified device
            inputs, targets = inputs.to(device), targets.to(device)
            
            # Forward pass
            outputs = model(inputs, lengths)
            
            # Compute the loss
            loss = criterion(outputs.squeeze(), targets)
            test_loss += loss.item()  # Accumulate the test loss

            # Append the outputs to the list
            all_outputs.append(outputs)

    # Print the average test loss
    print(f'Test Loss: {test_loss / len(test_loader):.4f}')
    
    # Concatenate all outputs and return
    return torch.cat(all_outputs)

In [None]:
# Place to test model

# change these two variables correctly to test different sets
chosen_data_loader = valid_loader
unaltered_df = valid_data['df']

outputs = test(model, chosen_data_loader, criterion, device)
probabilities = torch.sigmoid(outputs)
predictions = (probabilities > 0.5)

# choose correct df for the metric
recasted_pred = recast_pred(predictions.flatten())
show_metrics_adjusted(unaltered_df, recasted_pred)
