In [40]:
%load_ext autoreload
%autoreload 2

from tweedejaars_project import *
import os
import pandas as pd
import numpy as np
from sklearn.ensemble import *
from sklearn.tree import *
from sklearn.metrics import *
from sklearn.linear_model import *
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import RobustScaler, StandardScaler, LabelEncoder, MinMaxScaler
from sklearn.model_selection import TimeSeriesSplit, GridSearchCV
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence, pad_sequence




The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [41]:
features = [
    'import_capacity',
    'min_price_published',
    'mid_price_published',
    'max_price_published',
    # 'min_ptu_price_known',
    # 'max_ptu_price_known',
    # 'settlement_price_bestguess',
    'time_since_last_two_sided',
    'two_sided_daily_count',
    'PTU',
    'naive_strategy_action',
    'forecast_wind',
    'forecast_solar',
    'forecast_demand',
    'ptu_id',
    'fix_two_sided_ptu_realtime'
]

# already used
target = 'target_two_sided_ptu'

In [42]:
df = load_df()
splits = get_splits(df, features)

In [43]:
# Hyperparameters
sequence_length = 50 # Dit is het PTU aantal dus ptu_id's niet row aantal
batch_size = 32
input_size = len(features)
hidden_size = 64
num_layers = 2
output_size = 1
num_epochs = 10
learning_rate = 0.001


# Split the data in vars
train_data = splits['train']
test_data = splits['test']


In [52]:
class TimeSeriesDataset(Dataset):
    def __init__(self, data, seq_length, ptu_length=15, nan_val1 = 100000, nan_val2 = -100000):
        self.data_in = data['in'].astype(np.float32)
        self.data_out = data['out'].astype(np.float32)
        self.seq_length = seq_length
        self.ptu_length = ptu_length
        self.sequence_indices = self.create_sequences()
        self.max_length = (self.seq_length * self.ptu_length) + self.ptu_length

        # DIT IS EEN HACK DUS WSS VERANDEREN
        self.data_in = self.data_in.fillna({'min_price_published':nan_val1, 'max_price_published':nan_val2}).astype(np.float32)

    def create_sequences(self):
        sequence_indices = []
        ptu_ids_total = self.data_in['ptu_id']
        ptu_ids = ptu_ids_total.unique()
        
        for ptu_id in ptu_ids:
            last_idx_start_ptu = np.searchsorted(ptu_ids_total, ptu_id, side='left')

            for row_idx in range(self.ptu_length):
                last_idx = last_idx_start_ptu + row_idx
                first_idx = last_idx - (self.seq_length * self.ptu_length) + 1
                
                if first_idx < 0:
                    first_idx = 0
                    
                sequence_indices.append((first_idx, last_idx))

        return sequence_indices
    
    def pad_to_length(self, sequence, padding_value=np.nan):
        padded_sequences = []
        for seq in sequence:
            if seq.shape[0] < self.max_length :
                pad_length = self.max_length - seq.shape[0]
                padded_seq = F.pad(seq, (0, 0, pad_length, 0), mode='constant', value=padding_value)
            else:
                padded_seq = seq[:self.max_length]
            padded_sequences.append(padded_seq)
        return pad_sequence(padded_sequences, batch_first=True, padding_value=padding_value)


    def __len__(self):
        return len(self.sequence_indices)
    
    def __getitem__(self, idx):
        start_idx, end_idx = self.sequence_indices[idx]
        sequence = self.data_in.iloc[start_idx:end_idx + 1].drop('ptu_id', axis=1).values
        target = self.data_out.iloc[start_idx:end_idx + 1].values

        sequence_tensor = torch.tensor(sequence, dtype=torch.float32)
        padded_sequence = self.pad_to_length([sequence_tensor])

        print("Padded sequence shape:", padded_sequence.squeeze(0).shape)

        return padded_sequence.squeeze(0), torch.tensor(target, dtype=torch.float32)



In [53]:
class RNNModel(nn.Module):
    def __init__(self, input_size, hidden_size, num_layers, output_size):
        super(RNNModel, self).__init__()
        self.rnn = nn.RNN(input_size, hidden_size, num_layers, batch_first=True)
        self.fc = nn.Linear(hidden_size, output_size)

    def forward(self, x, lengths):
        packed_input = pack_padded_sequence(x, lengths, batch_first=True, enforce_sorted=False)
        packed_output, _ = self.rnn(packed_input)
        output, _ = pad_packed_sequence(packed_output, batch_first=True)
        output = self.fc(output)
        return output


In [54]:
def prepare_data_loader(data, sequence_length, batch_size):
    dataset = TimeSeriesDataset(data, sequence_length)
    return DataLoader(dataset, batch_size=batch_size, shuffle=False)

In [56]:
train_loader = prepare_data_loader(train_data, sequence_length, batch_size)
model = RNNModel(input_size, hidden_size, num_layers, output_size)
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)



In [58]:
for f, t in train_loader:
    print('hallo')

Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded sequence shape: torch.Size([765, 12])
Padded seq

RuntimeError: stack expects each tensor to be equal size, but got [1] at entry 0 and [2] at entry 1

In [None]:
# Training loop (placeholder, implement training logic)
for epoch in range(num_epochs):
    for sequences, targets in train_loader:
        lengths = [min(len(seq), sequence_length) for seq in sequences]
        optimizer.zero_grad()
        outputs = model(sequences, lengths)
        loss = criterion(outputs, targets)
        loss.backward()
        optimizer.step()
