In [7]:
!pip install torch numpy scikit-learn



In [1]:
import itertools
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.metrics.pairwise import haversine_distances
from tqdm import tqdm
import holidays
import random

# --- Static Params ---
K_NEIGHBORS = 2
PRED_HORIZON = 4
EPOCHS = 20
PATIENCE = 5
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")
MAX_COMBINATIONS = 20
TRAIN_FRACTION = 0.01
EMBED_DIM = 8

# --- Load data ---
df = pd.read_csv('bicikelj_train.csv')
meta = pd.read_csv('bicikelj_metadata.csv')
station_cols = df.columns[1:]

# Clean and fill
for col in station_cols:
    df[col] = pd.to_numeric(df[col], errors='coerce')
df[station_cols] = df[station_cols].ffill().bfill()
df = df.dropna(subset=station_cols, how='all').reset_index(drop=True)

# --- Load weather ---
weather_df = pd.read_csv("weather_ljubljana.csv", skiprows=2)
weather_df = weather_df.rename(columns={
    'temperature_2m (¬∞C)': 'temperature_2m',
    'precipitation (mm)': 'precipitation',
    'windspeed_10m (km/h)': 'windspeed_10m',
    'cloudcover (%)': 'cloudcover'
})
weather_df['time'] = pd.to_datetime(weather_df['time'])
df['timestamp'] = pd.to_datetime(df['timestamp']).dt.tz_localize(None)
df_merged = pd.merge(df, weather_df, left_on='timestamp', right_on='time', how='left')

weather_features = ['temperature_2m', 'precipitation', 'windspeed_10m', 'cloudcover']
df_merged[weather_features] = df_merged[weather_features].ffill().bfill()

# Normalize
station_means = df_merged[station_cols].mean()
station_stds = df_merged[station_cols].std().replace(0, 1)
df_norm = df_merged.copy()
df_norm[station_cols] = (df_merged[station_cols] - station_means) / station_stds
weather_means = df_merged[weather_features].mean()
weather_stds = df_merged[weather_features].std().replace(0, 1)
df_norm[weather_features] = (df_merged[weather_features] - weather_means) / weather_stds

# --- Neighbors ---
coords = np.deg2rad(meta[['latitude', 'longitude']].values)
station_names = meta['name'].tolist()
dists = haversine_distances(coords, coords) * 6371
neighbors = {}
for i, name in enumerate(station_names):
    order = np.argsort(dists[i])
    nn_idx = [j for j in order if j != i][:K_NEIGHBORS]
    neighbors[name] = [station_names[j] for j in nn_idx]

# --- Dataset (t-1 only) ---
class MLPStationDataset(Dataset):
    def __init__(self, df, station_cols, neighbors, pred_horizon, weather_features):
        self.samples = []
        self.station_to_idx = {name: i for i, name in enumerate(station_cols)}
        timestamps = pd.to_datetime(df['timestamp'])
        hour_sin = np.sin(2 * np.pi * timestamps.dt.hour / 24)
        hour_cos = np.cos(2 * np.pi * timestamps.dt.hour / 24)
        dow_sin = np.sin(2 * np.pi * timestamps.dt.dayofweek / 7)
        dow_cos = np.cos(2 * np.pi * timestamps.dt.dayofweek / 7)
        month_sin = np.sin(2 * np.pi * timestamps.dt.month / 12)
        month_cos = np.cos(2 * np.pi * timestamps.dt.month / 12)
        is_weekend = (timestamps.dt.dayofweek >= 5).astype(float)
        slo_holidays = holidays.Slovenia()
        is_holiday = timestamps.dt.date.astype(str).isin([str(d) for d in slo_holidays]).astype(float)
        time_feats = np.stack([hour_sin, hour_cos, dow_sin, dow_cos,
                               month_sin, month_cos, is_weekend, is_holiday], axis=1)
        bikes = df[station_cols].values.astype(np.float32)
        weather = df[weather_features].values.astype(np.float32)
        N = len(df)

        # For each station, sample (t-1) features, predict next 4 (PRED_HORIZON)
        for s_name in station_cols:
            s_idx = self.station_to_idx[s_name]
            nn_idx = [self.station_to_idx[nn] for nn in neighbors[s_name]]
            for i in range(1, N - PRED_HORIZON + 1):
                x_feats = []
                # Station and neighbors at t-1
                x_feats.append(bikes[i - 1, s_idx])
                x_feats.extend(bikes[i - 1, nn_idx])
                # Time + weather at t-1
                x_feats.extend(time_feats[i - 1])
                x_feats.extend(weather[i - 1])
                x_feats.append(s_idx)  # Optionally, can remove for strict feature test
                x = np.array(x_feats, dtype=np.float32)
                y = bikes[i:i + PRED_HORIZON, s_idx]
                self.samples.append((x, y, s_idx))

    def __len__(self): return len(self.samples)
    def __getitem__(self, idx):
        x, y, sid = self.samples[idx]
        return (torch.tensor(x, dtype=torch.float32),
                torch.tensor(y, dtype=torch.float32),
                torch.tensor(sid, dtype=torch.long))

# --- MLP Model ---
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=64, dropout=0.2):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(dropout),
            nn.Linear(hidden_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, output_dim)
        )

    def forward(self, x, sid=None):
        return self.net(x)

# --- Training ---
def train_mlp(model, train_loader, val_loader, lr, weight_decay):
    model = model.to(DEVICE)
    optimizer = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=weight_decay)
    criterion = nn.MSELoss()
    best_loss = float('inf')
    best_state = None
    patience_counter = 0

    for epoch in range(EPOCHS):
        model.train()
        for xb, yb, sid in train_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            optimizer.zero_grad()
            loss = criterion(model(xb), yb)
            loss.backward()
            optimizer.step()

        model.eval()
        val_loss = 0.0
        with torch.no_grad():
            for xb, yb, sid in val_loader:
                xb, yb = xb.to(DEVICE), yb.to(DEVICE)
                val_loss += criterion(model(xb), yb).item()
        val_loss /= len(val_loader)

        if val_loss < best_loss:
            best_loss = val_loss
            best_state = model.state_dict()
            patience_counter = 0
        else:
            patience_counter += 1
            if patience_counter >= PATIENCE:
                break

    model.load_state_dict(best_state)
    return model, best_loss

# --- Grid search ---
param_grid = {
    'hidden_dim':    [32, 64, 128, 256],
    'dropout':       [0.0, 0.1, 0.2, 0.3],
    'lr':            [1e-2, 1e-3, 5e-4, 1e-4],
    'weight_decay':  [0.0, 1e-5, 1e-4, 1e-3]
}

param_combos = list(itertools.product(*param_grid.values()))
random.shuffle(param_combos)
param_combos = param_combos[:MAX_COMBINATIONS]

# --- Dataset ---
dataset = MLPStationDataset(df_norm, station_cols, neighbors, PRED_HORIZON, weather_features)
N = len(dataset)
reduced_N = int(N * TRAIN_FRACTION)
indices = list(range(N))
random.shuffle(indices)

train_size = int(reduced_N * 0.7)
val_size = int(reduced_N * 0.15)
holdout_size = reduced_N - train_size - val_size

train_set = Subset(dataset, indices[:train_size])
val_set = Subset(dataset, indices[train_size:train_size + val_size])
holdout_set = Subset(dataset, indices[train_size + val_size:train_size + val_size + holdout_size])

train_loader = DataLoader(train_set, batch_size=64, shuffle=True)
val_loader = DataLoader(val_set, batch_size=64)
holdout_loader = DataLoader(holdout_set, batch_size=64)

# --- Run ---
input_dim = 1 + K_NEIGHBORS + 8 + len(weather_features) + 1  # add 1 for station_id (or remove for "strict")
output_dim = PRED_HORIZON

results = []
print(f"‚è≥ Running grid search over {len(param_combos)} combinations...")
for i, (hdim, dr, lr, wd) in enumerate(param_combos):
    print(f"\nüîç Combo {i+1}: hidden_dim={hdim}, dropout={dr}, lr={lr}, weight_decay={wd}")
    model = MLP(input_dim=input_dim, output_dim=output_dim, hidden_dim=hdim, dropout=dr)
    model, val_loss = train_mlp(model, train_loader, val_loader, lr, wd)

    model.eval()
    holdout_loss = 0.0
    criterion = nn.MSELoss()
    with torch.no_grad():
        for xb, yb, sid in holdout_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            holdout_loss += criterion(model(xb), yb).item()
    holdout_loss /= len(holdout_loader)

    print(f"‚úÖ Val Loss: {val_loss:.4f}, Holdout Loss: {holdout_loss:.4f}")
    results.append({
        "hidden_dim": hdim,
        "dropout": dr,
        "lr": lr,
        "weight_decay": wd,
        "val_loss": val_loss,
        "holdout_loss": holdout_loss
    })

# --- Save ---
results_df = pd.DataFrame(results)
results_df = results_df.sort_values(by="holdout_loss")
results_df.to_csv("grid_search_mlp_results.csv", index=False)
print("\nüìä Top 5 Results:")
print(results_df.head())


‚è≥ Running grid search over 20 combinations...

üîç Combo 1: hidden_dim=256, dropout=0.2, lr=0.0005, weight_decay=1e-05
‚úÖ Val Loss: 0.4100, Holdout Loss: 0.3968

üîç Combo 2: hidden_dim=256, dropout=0.3, lr=0.0001, weight_decay=0.001


KeyboardInterrupt: 

In [2]:
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.metrics.pairwise import haversine_distances
from tqdm import tqdm
import holidays
import random

# --- Hyperparameters ---
HISTORY_LEN = 1           # Only t-1
PRED_HORIZON = 4
K_NEIGHBORS = 2
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
MLP_HIDDEN = 128
MLP_LAYERS = 2
MLP_DROPOUT = 0.1
EPOCHS = 50
PATIENCE = 8
BATCH_SIZE = 128
LR = 0.001
WEIGHT_DECAY = 0.001

# --- Load data ---
df = pd.read_csv("bicikelj_train.csv")
meta = pd.read_csv("bicikelj_metadata.csv")
station_cols = df.columns[1:]

for col in station_cols:
    df[col] = pd.to_numeric(df[col], errors="coerce")
df[station_cols] = df[station_cols].ffill().bfill()
df = df.dropna(subset=station_cols, how='all').reset_index(drop=True)

# --- Load weather ---
weather_df = pd.read_csv("weather_ljubljana.csv", skiprows=2)
weather_df = weather_df.rename(columns={
    'temperature_2m (¬∞C)': 'temperature_2m',
    'precipitation (mm)': 'precipitation',
    'windspeed_10m (km/h)': 'windspeed_10m',
    'cloudcover (%)': 'cloudcover'
})
weather_df['time'] = pd.to_datetime(weather_df['time'])
df['timestamp'] = pd.to_datetime(df['timestamp']).dt.tz_localize(None)
df_merged = pd.merge(df, weather_df, left_on='timestamp', right_on='time', how='left')

weather_features = ['temperature_2m', 'precipitation', 'windspeed_10m', 'cloudcover']
df_merged[weather_features] = df_merged[weather_features].ffill().bfill()

# --- Normalize ---
station_means = df_merged[station_cols].mean()
station_stds = df_merged[station_cols].std().replace(0, 1)
df_norm = df_merged.copy()
df_norm[station_cols] = (df_merged[station_cols] - station_means) / station_stds

weather_means = df_merged[weather_features].mean()
weather_stds = df_merged[weather_features].std().replace(0, 1)
df_norm[weather_features] = (df_merged[weather_features] - weather_means) / weather_stds

# --- Neighbors ---
coords = np.deg2rad(meta[['latitude', 'longitude']].values)
station_names = meta['name'].tolist()
dists = haversine_distances(coords, coords) * 6371
neighbors = {}
for i, name in enumerate(station_names):
    order = np.argsort(dists[i])
    nn_idx = [j for j in order if j != i][:K_NEIGHBORS]
    neighbors[name] = [station_names[j] for j in nn_idx]

# --- Dataset using only t-1 ---
class LastStepMLPDataset(Dataset):
    def __init__(self, df, station_cols, neighbors, pred_horizon, weather_features):
        self.samples = []
        self.station_to_idx = {name: i for i, name in enumerate(station_cols)}
        timestamps = pd.to_datetime(df['timestamp'])
        hour_sin = np.sin(2 * np.pi * timestamps.dt.hour / 24)
        hour_cos = np.cos(2 * np.pi * timestamps.dt.hour / 24)
        dow_sin = np.sin(2 * np.pi * timestamps.dt.dayofweek / 7)
        dow_cos = np.cos(2 * np.pi * timestamps.dt.dayofweek / 7)
        month_sin = np.sin(2 * np.pi * timestamps.dt.month / 12)
        month_cos = np.sin(2 * np.pi * timestamps.dt.month / 12)
        is_weekend = (timestamps.dt.dayofweek >= 5).astype(float)
        slo_holidays = holidays.Slovenia()
        is_holiday = timestamps.dt.date.astype(str).isin([str(d) for d in slo_holidays]).astype(float)

        weather_array = df[weather_features].values  # [N, W]
        time_feats = np.stack([hour_sin, hour_cos, dow_sin, dow_cos,
                               month_sin, month_cos, is_weekend, is_holiday], axis=1)

        bikes = df[station_cols].values.astype(np.float32)
        N = len(df)
        for s_name in station_cols:
            s_idx = self.station_to_idx[s_name]
            nn_idx = [self.station_to_idx[nn] for nn in neighbors[s_name]]
            series = bikes[:, [s_idx] + nn_idx]
            # For each prediction, use only t-1
            for i in range(1, N - pred_horizon + 1):
                x = np.concatenate([
                    series[i-1],              # [station_t-1, neighbors_t-1]
                    time_feats[i-1],          # time feats at t-1
                    weather_array[i-1]        # weather at t-1
                ])
                y = bikes[i:i + pred_horizon, s_idx]
                self.samples.append((x, y, s_idx))

    def __len__(self): return len(self.samples)
    def __getitem__(self, idx):
        x, y, sid = self.samples[idx]
        return (torch.tensor(x, dtype=torch.float32),
                torch.tensor(y, dtype=torch.float32),
                torch.tensor(sid, dtype=torch.long))

# --- Model ---
class MLP(nn.Module):
    def __init__(self, input_dim, output_dim, hidden_dim=128, n_layers=2, dropout=0.1, num_stations=None, embed_dim=None):
        super().__init__()
        layers = []
        in_dim = input_dim
        for i in range(n_layers):
            layers.append(nn.Linear(in_dim, hidden_dim))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(dropout))
            in_dim = hidden_dim
        layers.append(nn.Linear(hidden_dim, output_dim))
        self.mlp = nn.Sequential(*layers)
        # Optional: embedding if you want to match TCN
        if num_stations is not None and embed_dim is not None:
            self.embedding = nn.Embedding(num_stations, embed_dim)
            self.head = nn.Linear(hidden_dim + embed_dim, output_dim)
            self.use_embedding = True
        else:
            self.use_embedding = False

    def forward(self, x, station_id=None):
        z = self.mlp[:-1](x)
        if self.use_embedding and station_id is not None:
            emb = self.embedding(station_id)
            z = torch.cat([z, emb], dim=1)
            out = self.head(z)
        else:
            out = self.mlp[-1](z)
        return out

# --- Prepare Data ---
dataset = LastStepMLPDataset(df_norm, station_cols, neighbors, PRED_HORIZON, weather_features)
N = len(dataset)
indices = list(range(N))
random.shuffle(indices)
val_size = int(0.1 * N)
train_size = N - val_size
train_indices = indices[:train_size]
val_indices = indices[train_size:]
train_set = Subset(dataset, train_indices)
val_set = Subset(dataset, val_indices)
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True)

input_dim = 1 + K_NEIGHBORS + 8 + len(weather_features)
output_dim = PRED_HORIZON

model = MLP(input_dim=input_dim, output_dim=output_dim,
            hidden_dim=MLP_HIDDEN, n_layers=MLP_LAYERS, dropout=MLP_DROPOUT).to(DEVICE)
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
criterion = nn.MSELoss()

# --- Training ---
best_loss = float('inf')
best_state = None
patience_counter = 0

for epoch in range(EPOCHS):
    model.train()
    running_loss = 0.0
    for xb, yb, sid in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        xb, yb = xb.to(DEVICE), yb.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(xb), yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_train_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}")

    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb, sid in val_loader:
            xb, yb = xb.to(DEVICE), yb.to(DEVICE)
            val_loss += criterion(model(xb), yb).item()
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1}: Val Loss = {avg_val_loss:.4f}")

    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        best_state = model.state_dict()
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print("Early stopping!")
            break

model.load_state_dict(best_state)
torch.save(model.state_dict(), "mlp_t1_model_final_weather.pt")
print("‚úÖ Saved model to 'mlp_t1_model_final_weather.pt'")

# --- PREDICTION ---
# --- Prepare test set ---
test_df = pd.read_csv("bicikelj_test.csv")
test_feats = test_df[station_cols].values.astype(np.float32)
timestamps = pd.to_datetime(test_df["timestamp"])
weather_test_df = pd.read_csv("weather_ljubljana_test.csv", skiprows=2)
weather_test_df = weather_test_df.rename(columns={
    'temperature_2m (¬∞C)': 'temperature_2m',
    'precipitation (mm)': 'precipitation',
    'windspeed_10m (km/h)': 'windspeed_10m',
    'cloudcover (%)': 'cloudcover'
})
weather_test_df['time'] = pd.to_datetime(weather_test_df['time'])
test_df['timestamp'] = pd.to_datetime(test_df['timestamp']).dt.tz_localize(None)
test_df_merged = pd.merge(test_df, weather_test_df, left_on='timestamp', right_on='time', how='left')
test_df_merged[weather_features] = test_df_merged[weather_features].ffill().bfill()

# --- Time features ---
hour_sin = np.sin(2 * np.pi * timestamps.dt.hour / 24)
hour_cos = np.cos(2 * np.pi * timestamps.dt.hour / 24)
dow_sin = np.sin(2 * np.pi * timestamps.dt.dayofweek / 7)
dow_cos = np.cos(2 * np.pi * timestamps.dt.dayofweek / 7)
month_sin = np.sin(2 * np.pi * timestamps.dt.month / 12)
month_cos = np.sin(2 * np.pi * timestamps.dt.month / 12)
is_weekend = (timestamps.dt.dayofweek >= 5).astype(float)
slo_holidays = holidays.Slovenia()
is_holiday = timestamps.dt.date.astype(str).isin([str(d) for d in slo_holidays]).astype(float)
time_feats = np.stack([hour_sin, hour_cos, dow_sin, dow_cos,
                       month_sin, month_cos, is_weekend, is_holiday], axis=1)

# --- Normalize test_feats and weather ---
test_feats_norm = (test_feats - station_means.values) / station_stds.values
weather_feats_norm = (test_df_merged[weather_features].values - weather_means.values) / weather_stds.values

name_to_idx = {name: i for i, name in enumerate(station_cols)}
pred_matrix = np.full_like(test_feats, np.nan)

model.eval()
with torch.no_grad():
    for i in range(1, len(test_df) - PRED_HORIZON + 1):
        if np.isnan(test_feats[i:i + PRED_HORIZON]).all(axis=0).all():
            for station in station_cols:
                s_idx = name_to_idx[station]
                nn_idx = [name_to_idx[nn] for nn in neighbors[station]]
                row = []
                # features at t-1
                row.append(test_feats_norm[i-1, s_idx])
                row.extend([test_feats_norm[i-1, j] for j in nn_idx])
                row.extend(list(time_feats[i-1]))
                row.extend(list(weather_feats_norm[i-1]))
                row = np.array(row, dtype=np.float32)
                row_tensor = torch.tensor([row], dtype=torch.float32).to(DEVICE)
                pred_norm = model(row_tensor).cpu().numpy().flatten()
                pred = pred_norm * station_stds[station] + station_means[station]
                for j in range(PRED_HORIZON):
                    pred_matrix[i + j, s_idx] = pred[j]

# --- Save predictions ---
pred_df = pd.DataFrame(pred_matrix, columns=station_cols)
pred_df.insert(0, "timestamp", test_df["timestamp"])
rows_to_output = test_df[station_cols].isna().all(axis=1)
pred_df_filtered = pred_df[rows_to_output].copy()
pred_df_filtered.to_csv("bicikelj_test_predictions_mlp_t1_weather.csv", index=False)
print("‚úÖ Saved predictions to 'bicikelj_test_predictions_mlp_t1_weather.csv'")


Epoch 1/50: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12084/12084 [00:33<00:00, 360.82it/s]

Epoch 1: Train Loss = 0.4036





Epoch 1: Val Loss = 0.4014


Epoch 2/50: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12084/12084 [00:33<00:00, 362.55it/s]

Epoch 2: Train Loss = 0.4000





Epoch 2: Val Loss = 0.3991


Epoch 3/50: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 12084/12084 [00:33<00:00, 365.43it/s]

Epoch 3: Train Loss = 0.3998





Epoch 3: Val Loss = 0.4036


Epoch 4/50:   7%|‚ñã         | 903/12084 [00:02<00:29, 374.44it/s]Exception in thread Thread-15 (_pin_memory_loop):
Traceback (most recent call last):
  File "/usr/lib/python3.11/threading.py", line 1045, in _bootstrap_inner
    self.run()
  File "/usr/lib/python3.11/threading.py", line 982, in run
    self._target(*self._args, **self._kwargs)
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/pin_memory.py", line 59, in _pin_memory_loop
    do_one_step()
  File "/usr/local/lib/python3.11/dist-packages/torch/utils/data/_utils/pin_memory.py", line 35, in do_one_step
    r = in_queue.get(timeout=MP_STATUS_CHECK_INTERVAL)
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/lib/python3.11/multiprocessing/queues.py", line 122, in get
    return _ForkingPickler.loads(res)
           ^^^^^^^^^^^^^^^^^^^^^^^^^^
  File "/usr/local/lib/python3.11/dist-packages/torch/multiprocessing/reductions.py", line 541, in rebuild_storage_fd
    fd = df.detach()
         

KeyboardInterrupt: 

# Holdout tcn eval