In [1]:
import sys
sys.path.append("../src")

from models.mamba import MambaConfig, MambaSSM, MambaModelForHourlySeries
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.preprocessing import StandardScaler
from torch.utils.data import DataLoader, Dataset
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error, mean_absolute_error

df = pd.read_csv("../data/merged_data.csv", parse_dates=["datetime"])
df = df.sort_values("datetime")

df["hour"] = df["datetime"].dt.hour
df["is_daytime"] = ((df["hour"] >= 7) & (df["hour"] < 19)).astype(int)

df = df.drop(columns=["hour"])

df["sin_hour"] = np.sin(2 * np.pi * df["datetime"].dt.hour / 24)
df["cos_hour"] = np.cos(2 * np.pi * df["datetime"].dt.hour / 24)
df["price_lag_1h"] = df["price_EUR_MWh"].shift(1)

df = df.dropna()

# features = [
#     "temperature_2m", "wind_speed_180m", "wind_speed_120m", "direct_radiation",
#     "quantity_biomass", "quantity_fossil_gas", "quantity_fossil_hard_coal",
#     "quantity_hydro_run_of_river", "quantity_nuclear", "quantity_solar",
#     "quantity_waste", "quantity_wind_offshore", "quantity_wind_onshore",
#     "quantity_other", "quantity_MW"
# ]
# target = "price_EUR_MWh"

features = [
    "temperature_2m", "wind_speed_180m", "wind_speed_120m", "direct_radiation", "quantity_solar",
    "quantity_other", "price_lag_1h", "is_daytime", "sin_hour", "cos_hour", "quantity_MW"
]
target = "price_EUR_MWh"

# Splitting indices before scaling
val_days = 30
val_size = val_days * 24

train_df = df.iloc[:-val_size]
val_df = df.iloc[-val_size:]

# Fit scalers ONLY on training data
scaler_x = StandardScaler()
scaler_y = StandardScaler()

scaler_x.fit(train_df[features])
scaler_y.fit(train_df[[target]])

# Transform entire dataset safely
X = scaler_x.transform(df[features])
y = scaler_y.transform(df[[target]])

# scaler_x = StandardScaler()
# scaler_y = StandardScaler()

# # X = scaler_x.fit_transform(df[features])
# # y = scaler_y.fit_transform(df[[target]])


In [2]:
class TimeSeriesDataset(Dataset):
    """
    Custom Dataset for time series data with fixed sequence length.
    """
    def __init__(self, X, y, seq_len):
        self.X = torch.tensor(X, dtype=torch.float32)
        self.y = torch.tensor(y, dtype=torch.float32)
        self.seq_len = seq_len

    def __len__(self):
        return len(self.X) - self.seq_len

    def __getitem__(self, idx):
        return (
            self.X[idx:idx+self.seq_len],
            self.y[idx+self.seq_len]
        )

In [3]:
class HourAheadWrapper(nn.Module):
    def __init__(self, base_model):
        super().__init__()
        self.base = base_model  

    def forward(self, x, **kwargs):
        seq = self.base(x)  # [B, 24]
        return seq[:, -1]

In [4]:
# Splitting the dataset into training and validation sets
# Assuming the data is hourly and we want to validate on the last 30 days
sequence_length = 24 # 24 hours = 1 day
val_days = 30
val_size = val_days * 24

train_X = X[:-val_size]
train_y = y[:-val_size]
val_X = X[-val_size - sequence_length:]

val_y = y[-val_size - sequence_length:]

train_dataset = TimeSeriesDataset(train_X, train_y, sequence_length)
val_dataset = TimeSeriesDataset(val_X, val_y, sequence_length)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

In [7]:
# Model Config

config = MambaConfig(    
    state_size   = 64,
    hidden_size  = 32,
    expansion_factor = 2,
    dt_rank      = 3,
    conv_kernel_size = 3
)

# Model, optimizer, and loss function
n_epochs = 30

model = HourAheadWrapper(MambaModelForHourlySeries(config, in_features=11)).cuda()
optimizer = optim.AdamW(model.parameters(), lr=2e-3, weight_decay=1e-4)
criterion = nn.L1Loss().cuda()
scheduler = torch.optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=n_epochs)



# Training the model

model.train()
for epoch in range(n_epochs):
    epoch_loss = 0
    for x, t in train_loader:
        # print(f"Current learning rate: {scheduler.get_last_lr()}")
        x, t = x.cuda(), t.cuda()
        optimizer.zero_grad()
        preds = model(x)
        # print(f"preds shape: {preds.shape}, t shape: {t.shape}, t.squeeze(-1) shape: {t.squeeze(-1).shape}")
        loss = criterion(preds, t.squeeze(-1))
        loss.backward()
        optimizer.step()
        scheduler.step()
        epoch_loss += loss.item()

    # Validation
    model.eval()
    val_loss = 0
    with torch.no_grad():
        for x, t in val_loader:
            x, t = x.cuda(), t.cuda()
            preds = model(x)
            loss = criterion(preds, t.squeeze(-1))
            val_loss += loss.item()
    model.train()

    print(f"Epoch {epoch+1}, Train Loss: {epoch_loss/len(train_loader):.4f}, Val Loss: {val_loss/len(val_loader):.4f}")

RuntimeError: Expected all tensors to be on the same device, but found at least two devices, cuda:0 and cpu!