In [None]:
# --- TCN Bicikelj final training + test prediction with speedups ---

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader, Subset
from sklearn.metrics.pairwise import haversine_distances
from tqdm import tqdm
import holidays
import random

# --- Hyperparameters ---
HISTORY_LEN = 48
PRED_HORIZON = 4
K_NEIGHBORS = 2
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
EMBED_DIM = 8
HIDDEN_DIM = 64
N_LAYERS = 3
LR = 0.0005
WEIGHT_DECAY = 0.0001
DROPOUT = 0.2
EPOCHS = 50
PATIENCE = 8
BATCH_SIZE = 128  # increased safely

# --- Load data ---
df = pd.read_csv("bicikelj_train.csv")
meta = pd.read_csv("bicikelj_metadata.csv")
station_cols = df.columns[1:]

# Clean and fill
for col in station_cols:
    df[col] = pd.to_numeric(df[col], errors="coerce")
df[station_cols] = df[station_cols].ffill().bfill()
df = df.dropna(subset=station_cols, how='all').reset_index(drop=True)

# Station normalization
station_means = df[station_cols].mean()
station_stds = df[station_cols].std().replace(0, 1)
df_norm = df.copy()
df_norm[station_cols] = (df[station_cols] - station_means) / station_stds

# Neighbors
coords = np.deg2rad(meta[['latitude', 'longitude']].values)
station_names = meta['name'].tolist()
dists = haversine_distances(coords, coords) * 6371
neighbors = {}
for i, name in enumerate(station_names):
    order = np.argsort(dists[i])
    nn_idx = [j for j in order if j != i][:K_NEIGHBORS]
    neighbors[name] = [station_names[j] for j in nn_idx]

# --- Dataset ---
class SharedTCNDataset(Dataset):
    def __init__(self, df, station_cols, neighbors, history_len, pred_horizon):
        self.samples = []
        self.station_to_idx = {name: i for i, name in enumerate(station_cols)}
        timestamps = pd.to_datetime(df['timestamp'])

        hour_sin = np.sin(2 * np.pi * timestamps.dt.hour / 24)
        hour_cos = np.cos(2 * np.pi * timestamps.dt.hour / 24)
        dow_sin = np.sin(2 * np.pi * timestamps.dt.dayofweek / 7)
        dow_cos = np.cos(2 * np.pi * timestamps.dt.dayofweek / 7)
        month_sin = np.sin(2 * np.pi * timestamps.dt.month / 12)
        month_cos = np.sin(2 * np.pi * timestamps.dt.month / 12)
        is_weekend = (timestamps.dt.dayofweek >= 5).astype(float)
        slo_holidays = holidays.Slovenia()
        is_holiday = timestamps.dt.date.astype(str).isin([str(d) for d in slo_holidays]).astype(float)
        time_feats = np.stack([hour_sin, hour_cos, dow_sin, dow_cos,
                               month_sin, month_cos, is_weekend, is_holiday], axis=1)

        bikes = df[station_cols].values.astype(np.float32)
        N = len(df)

        for s_name in station_cols:
            s_idx = self.station_to_idx[s_name]
            nn_idx = [self.station_to_idx[nn] for nn in neighbors[s_name]]
            series = bikes[:, [s_idx] + nn_idx]
            full_feats = np.concatenate([series, time_feats], axis=1)

            for i in range(history_len, N - pred_horizon + 1):
                x = full_feats[i - history_len:i]
                y = bikes[i:i + pred_horizon, s_idx]
                self.samples.append((x, y, s_idx))

    def __len__(self): return len(self.samples)
    def __getitem__(self, idx):
        x, y, sid = self.samples[idx]
        return (torch.tensor(x, dtype=torch.float32),
                torch.tensor(y, dtype=torch.float32),
                torch.tensor(sid, dtype=torch.long))

# --- TCN Block ---
class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout):
        super().__init__()
        self.padding = (kernel_size - 1) * dilation
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
                               padding=self.padding, dilation=dilation)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                               padding=self.padding, dilation=dilation)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None
        self.init_weights()

    def init_weights(self):
        for m in self.modules():
            if isinstance(m, nn.Conv1d):
                nn.init.kaiming_normal_(m.weight)

    def forward(self, x):
        out = self.conv1(x)
        out = out[:, :, :-self.padding]
        out = self.relu(out)
        out = self.dropout(out)

        out = self.conv2(out)
        out = out[:, :, :-self.padding]
        out = self.relu(out)
        out = self.dropout(out)

        res = x if self.downsample is None else self.downsample(x)
        return out + res

class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout, num_stations, embed_dim):
        super().__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_ch = input_size if i == 0 else num_channels[i - 1]
            out_ch = num_channels[i]
            layers += [TemporalBlock(in_ch, out_ch, kernel_size, dilation_size, dropout)]
        self.tcn = nn.Sequential(*layers)
        self.embedding = nn.Embedding(num_stations, embed_dim)
        self.head = nn.Sequential(
            nn.Linear(num_channels[-1] + embed_dim, 64),
            nn.ReLU(),
            nn.Linear(64, output_size)
        )

    def forward(self, x, station_id):
        x = x.permute(0, 2, 1)
        tcn_out = self.tcn(x)[:, :, -1]
        emb = self.embedding(station_id)
        combined = torch.cat([tcn_out, emb], dim=1)
        return self.head(combined)

# --- Create Dataset and split ---
dataset = SharedTCNDataset(df_norm, station_cols, neighbors, HISTORY_LEN, PRED_HORIZON)

N = len(dataset)
indices = list(range(N))
random.shuffle(indices)

val_size = int(0.1 * N)  # 10% for val
train_size = N - val_size

train_indices = indices[:train_size]
val_indices = indices[train_size:]

train_set = Subset(dataset, train_indices)
val_set = Subset(dataset, val_indices)

# --- DataLoaders with speedups ---
train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=4, pin_memory=True)
val_loader = DataLoader(val_set, batch_size=BATCH_SIZE, num_workers=4, pin_memory=True)

# --- Model ---
model = TCN(input_size=1 + K_NEIGHBORS + 8,
            output_size=PRED_HORIZON,
            num_channels=[HIDDEN_DIM] * N_LAYERS,
            kernel_size=3,
            dropout=DROPOUT,
            num_stations=len(station_cols),
            embed_dim=EMBED_DIM).to(DEVICE)

# --- Optimizer and Loss ---
optimizer = torch.optim.Adam(model.parameters(), lr=LR, weight_decay=WEIGHT_DECAY)
criterion = nn.MSELoss()

# --- Training loop ---
best_loss = float('inf')
best_state = None
patience_counter = 0

for epoch in range(EPOCHS):
    # --- Train ---
    model.train()
    running_loss = 0.0
    for xb, yb, sid in tqdm(train_loader, desc=f"Epoch {epoch+1}/{EPOCHS}"):
        xb, yb, sid = xb.to(DEVICE), yb.to(DEVICE), sid.to(DEVICE)
        optimizer.zero_grad()
        loss = criterion(model(xb, sid), yb)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    avg_train_loss = running_loss / len(train_loader)
    print(f"Epoch {epoch+1}: Train Loss = {avg_train_loss:.4f}")

    # --- Validation ---
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for xb, yb, sid in val_loader:
            xb, yb, sid = xb.to(DEVICE), yb.to(DEVICE), sid.to(DEVICE)
            val_loss += criterion(model(xb, sid), yb).item()
    avg_val_loss = val_loss / len(val_loader)
    print(f"Epoch {epoch+1}: Val Loss = {avg_val_loss:.4f}")

    # --- Early stopping on val loss ---
    if avg_val_loss < best_loss:
        best_loss = avg_val_loss
        best_state = model.state_dict()
        patience_counter = 0
    else:
        patience_counter += 1
        if patience_counter >= PATIENCE:
            print("Early stopping!")
            break

# --- Save best model ---
model.load_state_dict(best_state)
torch.save(model.state_dict(), "tcn_model_final.pt")
print("✅ Saved model to 'tcn_model_final.pt'")

# --- Predict on test set ---
# (keep your current test loop as is → no changes needed there)

# --- Predict on bicikelj_test.csv ---
test_df = pd.read_csv("bicikelj_test.csv")
test_feats = test_df[station_cols].values.astype(np.float32)
timestamps = pd.to_datetime(test_df["timestamp"])

# Time features
hour_sin = np.sin(2 * np.pi * timestamps.dt.hour / 24)
hour_cos = np.cos(2 * np.pi * timestamps.dt.hour / 24)
dow_sin = np.sin(2 * np.pi * timestamps.dt.dayofweek / 7)
dow_cos = np.cos(2 * np.pi * timestamps.dt.dayofweek / 7)
month_sin = np.sin(2 * np.pi * timestamps.dt.month / 12)
month_cos = np.cos(2 * np.pi * timestamps.dt.month / 12)
is_weekend = (timestamps.dt.dayofweek >= 5).astype(float)
slo_holidays = holidays.Slovenia()
is_holiday = timestamps.dt.date.astype(str).isin([str(d) for d in slo_holidays]).astype(float)

time_feats = np.stack([hour_sin, hour_cos, dow_sin, dow_cos,
                       month_sin, month_cos, is_weekend, is_holiday], axis=1)

name_to_idx = {name: i for i, name in enumerate(station_cols)}

# Load model for inference
model.eval()

pred_matrix = np.full_like(test_feats, np.nan)

with torch.no_grad():
    for i in range(HISTORY_LEN, len(test_df) - PRED_HORIZON + 1):
        if np.isnan(test_feats[i:i + PRED_HORIZON]).all(axis=0).all():
            for station in station_cols:
                s_idx = name_to_idx[station]
                nn_idx = [name_to_idx[nn] for nn in neighbors[station]]

                seq = []
                for t in range(i - HISTORY_LEN, i):
                    row = [test_feats[t, s_idx]]
                    row += [test_feats[t, j] for j in nn_idx]
                    row += list(time_feats[t])
                    seq.append(row)
                seq = torch.tensor([seq], dtype=torch.float32).to(DEVICE)

                pred_norm = model(seq, torch.tensor([s_idx], dtype=torch.long, device=DEVICE)).cpu().numpy().flatten()
                pred = pred_norm * station_stds[station] + station_means[station]

                for j in range(PRED_HORIZON):
                    pred_matrix[i + j, s_idx] = pred[j]

# Save predictions
pred_df = pd.DataFrame(pred_matrix, columns=station_cols)
pred_df.insert(0, "timestamp", test_df["timestamp"])
rows_to_output = test_df[station_cols].isna().all(axis=1)
pred_df_filtered = pred_df[rows_to_output].copy()
pred_df_filtered.to_csv("bicikelj_test_predictions_tcn.csv", index=False)
print("✅ Saved predictions to 'bicikelj_test_predictions_tcn.csv'")


In [None]:
# --- Predict only the unknown rows in bicikelj_test.csv using final TCN model ---

import numpy as np
import pandas as pd
import torch
import torch.nn as nn
from sklearn.metrics.pairwise import haversine_distances
import holidays

# --- Constants ---
HISTORY_LEN = 48
PRED_HORIZON = 4
K_NEIGHBORS = 2
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
EMBED_DIM = 8
HIDDEN_DIM = 64
N_LAYERS = 3
DROPOUT = 0.2
BATCH_SIZE = 128

# --- Load metadata ---
meta = pd.read_csv("bicikelj_metadata.csv")
station_cols = pd.read_csv("bicikelj_test.csv").columns[1:]
station_names = meta['name'].tolist()

# --- Neighbors ---
coords = np.deg2rad(meta[['latitude', 'longitude']].values)
dists = haversine_distances(coords, coords) * 6371
neighbors = {}
for i, name in enumerate(station_names):
    order = np.argsort(dists[i])
    nn_idx = [j for j in order if j != i][:K_NEIGHBORS]
    neighbors[name] = [station_names[j] for j in nn_idx]

# --- Load training stats for normalization ---
df_train = pd.read_csv("bicikelj_train.csv")
station_means = df_train[station_cols].mean()
station_stds = df_train[station_cols].std().replace(0, 1)

# --- TCN Model definition ---
class TemporalBlock(nn.Module):
    def __init__(self, in_channels, out_channels, kernel_size, dilation, dropout):
        super().__init__()
        self.padding = (kernel_size - 1) * dilation
        self.conv1 = nn.Conv1d(in_channels, out_channels, kernel_size,
                               padding=self.padding, dilation=dilation)
        self.conv2 = nn.Conv1d(out_channels, out_channels, kernel_size,
                               padding=self.padding, dilation=dilation)
        self.relu = nn.ReLU()
        self.dropout = nn.Dropout(dropout)
        self.downsample = nn.Conv1d(in_channels, out_channels, 1) if in_channels != out_channels else None

    def forward(self, x):
        out = self.conv1(x)
        out = out[:, :, :-self.padding]
        out = self.relu(out)
        out = self.dropout(out)

        out = self.conv2(out)
        out = out[:, :, :-self.padding]
        out = self.relu(out)
        out = self.dropout(out)

        res = x if self.downsample is None else self.downsample(x)
        return out + res

class TCN(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout, num_stations, embed_dim):
        super().__init__()
        layers = []
        num_levels = len(num_channels)
        for i in range(num_levels):
            dilation_size = 2 ** i
            in_ch = input_size if i == 0 else num_channels[i - 1]
            out_ch = num_channels[i]
            layers += [TemporalBlock(in_ch, out_ch, kernel_size, dilation_size, dropout)]
        self.tcn = nn.Sequential(*layers)
        self.embedding = nn.Embedding(num_stations, embed_dim)
        self.head = nn.Sequential(
            nn.Linear(num_channels[-1] + embed_dim, 64),
            nn.ReLU(),
            nn.Linear(64, output_size)
        )

    def forward(self, x, station_id):
        x = x.permute(0, 2, 1)
        tcn_out = self.tcn(x)[:, :, -1]
        emb = self.embedding(station_id)
        combined = torch.cat([tcn_out, emb], dim=1)
        return self.head(combined)

# --- Load model ---
model = TCN(input_size=1 + K_NEIGHBORS + 8,
            output_size=PRED_HORIZON,
            num_channels=[HIDDEN_DIM] * N_LAYERS,
            kernel_size=3,
            dropout=DROPOUT,
            num_stations=len(station_cols),
            embed_dim=EMBED_DIM).to(DEVICE)

model.load_state_dict(torch.load("tcn_model_final.pt"))
model.eval()

# --- Load test set ---
test_df = pd.read_csv("bicikelj_test.csv")
test_feats = test_df[station_cols].values.astype(np.float32)
timestamps = pd.to_datetime(test_df["timestamp"])

# --- Time features ---
hour_sin = np.sin(2 * np.pi * timestamps.dt.hour / 24)
hour_cos = np.cos(2 * np.pi * timestamps.dt.hour / 24)
dow_sin = np.sin(2 * np.pi * timestamps.dt.dayofweek / 7)
dow_cos = np.cos(2 * np.pi * timestamps.dt.dayofweek / 7)
month_sin = np.sin(2 * np.pi * timestamps.dt.month / 12)
month_cos = np.cos(2 * np.pi * timestamps.dt.month / 12)
is_weekend = (timestamps.dt.dayofweek >= 5).astype(float)
slo_holidays = holidays.Slovenia()
is_holiday = timestamps.dt.date.astype(str).isin([str(d) for d in slo_holidays]).astype(float)

time_feats = np.stack([hour_sin, hour_cos, dow_sin, dow_cos,
                       month_sin, month_cos, is_weekend, is_holiday], axis=1)

# --- Normalize test_feats using training stats ---
test_feats_norm = (test_feats - station_means.values) / station_stds.values

# --- Predict ---
name_to_idx = {name: i for i, name in enumerate(station_cols)}

pred_matrix = np.full_like(test_feats, np.nan)

with torch.no_grad():
    for i in range(HISTORY_LEN, len(test_df) - PRED_HORIZON + 1):
        if np.isnan(test_feats[i:i + PRED_HORIZON]).all(axis=0).all():
            for station in station_cols:
                s_idx = name_to_idx[station]
                nn_idx = [name_to_idx[nn] for nn in neighbors[station]]

                seq = []
                for t in range(i - HISTORY_LEN, i):
                    row = [test_feats_norm[t, s_idx]]
                    row += [test_feats_norm[t, j] for j in nn_idx]
                    row += list(time_feats[t])
                    seq.append(row)

                seq = torch.tensor([seq], dtype=torch.float32).to(DEVICE)
                sid_tensor = torch.tensor([s_idx], dtype=torch.long, device=DEVICE)

                pred_norm = model(seq, sid_tensor).cpu().numpy().flatten()
                pred = pred_norm * station_stds[station] + station_means[station]

                for j in range(PRED_HORIZON):
                    pred_matrix[i + j, s_idx] = pred[j]

# --- Save predictions ---
pred_df = pd.DataFrame(pred_matrix, columns=station_cols)
pred_df.insert(0, "timestamp", test_df["timestamp"])

rows_to_output = test_df[station_cols].isna().all(axis=1)
pred_df_filtered = pred_df[rows_to_output].copy()

pred_df_filtered.to_csv("bicikelj_test_predictions_tcn.csv", index=False)
print("✅ Saved predictions to 'bicikelj_test_predictions_tcn.csv'")
