In [1]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics.pairwise import haversine_distances

# --- PARAMETERS ---
history_len = 48
pred_horizon = 4
k_neighbors = 2
val_ratio = 0.15
holdout_ratio = 0.15
epochs = 80
batch_size = 32
hidden_dim = 64
learning_rate = 0.001
patience = 10

# --- LOAD DATA ---
train_df = pd.read_csv('../data/bicikelj_train.csv')
meta = pd.read_csv('../data/bicikelj_metadata.csv')
station_cols = train_df.columns[1:]

for col in station_cols:
    train_df[col] = pd.to_numeric(train_df[col], errors='coerce')
train_df[station_cols] = train_df[station_cols].fillna(method="ffill").fillna(method="bfill")
train_df = train_df.dropna(subset=station_cols, how='all').reset_index(drop=True)

# --- NEIGHBORS ---
coords = np.deg2rad(meta[['latitude', 'longitude']].values)
station_names = meta['name'].tolist()
dists = haversine_distances(coords, coords) * 6371
neighbors = {}
for i, name in enumerate(station_names):
    order = np.argsort(dists[i])
    nn_idx = [j for j in order if j != i][:k_neighbors]
    neighbors[name] = [station_names[j] for j in nn_idx]

# --- FEATURE ENGINEERING FOR LSTM ---
def make_seq_features_targets(df, station_cols, neighbors, history_len, pred_horizon):
    T = len(df)
    features_dict = {}
    targets_dict = {}
    timestamps = pd.to_datetime(df['timestamp'])
    hours = (timestamps.dt.hour / 23.0).values
    dows = (timestamps.dt.dayofweek / 6.0).values
    for station in station_cols:
        own_vals = df[station].values.astype(float)
        nn_arrays = [df[nn].values.astype(float) for nn in neighbors[station]]
        feats = []
        tgts = []
        for i in range(history_len, T - pred_horizon + 1):
            hist_feat = []
            for t in range(i - history_len, i):
                row = [own_vals[t]]
                row.extend(arr[t] for arr in nn_arrays)
                row.extend([hours[t], dows[t]])
                hist_feat.append(row)
            f = np.stack(hist_feat)
            t = own_vals[i:i + pred_horizon]
            feats.append(f)
            tgts.append(t)
        features_dict[station] = np.stack(feats)
        targets_dict[station] = np.stack(tgts)
    return features_dict, targets_dict

features_dict, targets_dict = make_seq_features_targets(train_df, station_cols, neighbors, history_len, pred_horizon)

# --- SPLIT FUNCTION ---
def get_splits(n, val_ratio, holdout_ratio):
    train_end = int(n * (1 - val_ratio - holdout_ratio))
    val_end = int(n * (1 - holdout_ratio))
    return slice(0, train_end), slice(train_end, val_end), slice(val_end, n)

splits = {}
for station in station_cols:
    n = features_dict[station].shape[0]
    train_idx, val_idx, holdout_idx = get_splits(n, val_ratio, holdout_ratio)
    splits[station] = {
        "train": train_idx,
        "val": val_idx,
        "holdout": holdout_idx
    }

# --- DATASET ---
class BikeSeqDataset(Dataset):
    def __init__(self, X, y):
        self.X = torch.from_numpy(X).float()
        self.y = torch.from_numpy(y).float()
    def __len__(self):
        return len(self.X)
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]

# --- LSTM MODEL ---
class LSTMModel(nn.Module):
    def __init__(self, input_dim, hidden_dim=64, output_dim=4, num_layers=1, dropout=0.3):
        super().__init__()
        self.lstm = nn.LSTM(input_dim, hidden_dim, num_layers=num_layers, batch_first=True, dropout=dropout)
        self.fc = nn.Sequential(
            nn.LayerNorm(hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )
    def forward(self, x):
        _, (hn, _) = self.lstm(x)  # hn: [num_layers, batch, hidden]
        return self.fc(hn[-1])

# --- TRAINING FUNCTION ---
def train_lstm(X_train, y_train, X_val, y_val, input_dim, output_dim,
               epochs=80, batch_size=32, hidden_dim=64, lr=1e-3, dropout=0.3,
               patience=10, device='cpu'):
    model = LSTMModel(input_dim, hidden_dim, output_dim, dropout=dropout).to(device)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr)
    loss_fn = nn.MSELoss()
    train_loader = DataLoader(BikeSeqDataset(X_train, y_train), batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(BikeSeqDataset(X_val, y_val), batch_size=batch_size)
    best_val_loss = float('inf')
    patience_counter = 0
    best_state = None

    for epoch in range(epochs):
        model.train()
        train_losses = []
        for xb, yb in train_loader:
            xb, yb = xb.to(device), yb.to(device)
            optimizer.zero_grad()
            preds = model(xb)
            loss = loss_fn(preds, yb)
            loss.backward()
            optimizer.step()
            train_losses.append(loss.item())

        model.eval()
        val_losses = []
        with torch.no_grad():
            for xb, yb in val_loader:
                xb, yb = xb.to(device), yb.to(device)
                preds = model(xb)
                loss = loss_fn(preds, yb)
                val_losses.append(loss.item())

        avg_train = np.mean(train_losses)
        avg_val = np.mean(val_losses)
        print(f"Epoch {epoch+1:03d} | Train: {avg_train:.4f} | Val: {avg_val:.4f}")
        if avg_val < best_val_loss:
            best_val_loss = avg_val
            best_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= patience:
                print("Early stopping.")
                break
    model.load_state_dict(best_state)
    return model

# --- MAIN LOOP ---
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
lstm_models = {}
holdout_mse = {}

# Use a fixed subset to train faster
selected_stations = [
    'LIDL BEÅ½IGRAD', 
    'CITYPARK', 
]

for station in selected_stations:
    feat = features_dict[station]
    targ = targets_dict[station]
    train_idx, val_idx, holdout_idx = splits[station]["train"], splits[station]["val"], splits[station]["holdout"]
    X_train, y_train = feat[train_idx], targ[train_idx]
    X_val, y_val = feat[val_idx], targ[val_idx]
    X_hold, y_hold = feat[holdout_idx], targ[holdout_idx]
    
    print(f"\n--- Training station {station} ---")
    print("Train samples:", len(X_train), "| Val samples:", len(X_val), "| Holdout samples:", len(X_hold))
    
    model = train_lstm(X_train, y_train, X_val, y_val,
                       input_dim=X_train.shape[2], output_dim=pred_horizon,
                       epochs=epochs, batch_size=batch_size,
                       hidden_dim=hidden_dim, lr=learning_rate,
                       dropout=0.3, patience=patience, device=device)
    
    lstm_models[station] = model.cpu()

    # Evaluate on holdout
    model.eval()
    with torch.no_grad():
        hold_pred = model(torch.from_numpy(X_hold).float())
        mse = ((hold_pred.numpy() - y_hold) ** 2).mean()
        holdout_mse[station] = mse
    print(f"Holdout MSE for {station}: {mse:.4f}")

print("\nMean Holdout MSE across stations:", np.mean(list(holdout_mse.values())))


  train_df[station_cols] = train_df[station_cols].fillna(method="ffill").fillna(method="bfill")


KeyboardInterrupt: 