In [3]:
import wandb
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
import os
import sys
from pytorch_tcn import TCN


# Reproducability

In [4]:
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)


# Model

In [5]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


class TimeSeriesDataset(Dataset):
    def __init__(self, X, y, seq_length):
        self.X = X
        self.y = y
        self.seq_length = seq_length
        
    def __len__(self):
        return len(self.X) - self.seq_length
    
    def __getitem__(self, idx):
        # Get sequence of features
        X_seq = self.X[idx:idx + self.seq_length]
        # Get target value (next value after sequence)
        y_value = self.y[idx + self.seq_length]
        
        return torch.FloatTensor(X_seq), torch.FloatTensor([y_value])
    


class TCNModel(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout=0.2):
        super(TCNModel, self).__init__()
        

        self.tcn = TCN(
            num_inputs=input_size,         
            num_channels=[num_channels] * 8,  # (number of filters  in each convolutional layer)
            kernel_size=kernel_size,        # (temporal receptive field)
            dropout=dropout,                
            causal=True,                    # Causal convolutions (dont look into future)
            use_skip_connections=True       # Use skip connections for better gradient flow
        )
        

        self.linear = nn.Linear(num_channels, output_size)
        
    def forward(self, x):
        # Input shape: [batch_size, seq_len, input_size]
        # TCN expects shape: [batch_size, input_size, seq_len]
        x = x.transpose(1, 2)
        
        # Apply TCN - output will be [batch_size, num_channels, seq_len]
        y = self.tcn(x)
        
        # Get the last time step output and apply the linear layer
        y = y[:, :, -1]
        
        return self.linear(y)


Using device: cuda


# Helperfunctions

In [6]:

def preprocess_data(df, target_ride=None):
    """
    Preprocess the data for a single ride
    """
    # Drop time_bucket column as not needed
    if 'time_bucket' in df.columns:
        df = df.drop(columns=['time_bucket'])
    
    print(f"Building model for ride: {target_ride}")
    

    ride_col = f'ride_name_{target_ride}'
    if ride_col in df.columns:
        df = df[df[ride_col] == 1].copy()
    
    ride_cols = [col for col in df.columns if col.startswith('ride_name_')]
    df = df.drop(columns=ride_cols)
    
    df = df.fillna(0)
    
    return df

def create_features(df):
    """
    Create features for the model
    """
    # The features are everything except wait_time (target)
    feature_cols = [col for col in df.columns if col != 'wait_time']
    
    return df, feature_cols

def build_linear_model(X_train, y_train):
    """
    Build and train a linear regression model
    """
    from sklearn.linear_model import LinearRegression
    model = LinearRegression()
    model.fit(X_train, y_train)
    return model


# Train

In [7]:
def train_with_wandb(config=None):
    run = wandb.init(config=config)
    config = wandb.config
    
    data_path = config.data_path
    splits_output_dir = config.splits_output_dir
    target_ride = config.target_ride
    seq_length = config.seq_length
    batch_size = config.batch_size
    num_channels = config.num_channels
    kernel_size = config.kernel_size
    dropout = config.dropout
    learning_rate = config.learning_rate
    epochs = config.epochs
    
    # Add scheduler parameters
    scheduler_type = config.get('scheduler_type', 'CosineAnnealingLR')  # Default to CosineAnnealingLR
    t_max = config.get('t_max', epochs)  # Default to total epochs
    eta_min = config.get('eta_min', 1e-6)  # Minimum learning rate
    
    df = pd.read_parquet(data_path)
    
    df = preprocess_data(df, target_ride)
    
    train_indices = pd.read_parquet(os.path.join(splits_output_dir, "train_indices.parquet"))
    val_indices = pd.read_parquet(os.path.join(splits_output_dir, "validation_indices.parquet"))
    test_indices = pd.read_parquet(os.path.join(splits_output_dir, "test_indices.parquet"))
    
    ride_name_normalized = target_ride.replace(' ', '_')
    train_indices = train_indices[train_indices['ride_name'] == ride_name_normalized]['original_index'].values
    val_indices = val_indices[val_indices['ride_name'] == ride_name_normalized]['original_index'].values
    test_indices = test_indices[test_indices['ride_name'] == ride_name_normalized]['original_index'].values
    
    if len(train_indices) == 0 or len(val_indices) == 0 or len(test_indices) == 0:
        raise ValueError(f"No indices found for ride {target_ride}. Check ride name or indices files.")
    
    print(f"Found {len(train_indices)} train, {len(val_indices)} validation, and {len(test_indices)} test samples")
    
    df, feature_cols = create_features(df)
    
    train_df = df.iloc[train_indices].copy()
    val_df = df.iloc[val_indices].copy()
    test_df = df.iloc[test_indices].copy()
        
    # Prepare features and target
    X_train = train_df[feature_cols].values
    y_train = train_df['wait_time'].values
    X_val = val_df[feature_cols].values
    y_val = val_df['wait_time'].values
    X_test = test_df[feature_cols].values
    y_test = test_df['wait_time'].values
    
    linear_model = build_linear_model(X_train, y_train)
    
    # Get predictions from linear model
    y_train_pred_linear = linear_model.predict(X_train)
    y_val_pred_linear = linear_model.predict(X_val)
    y_test_pred_linear = linear_model.predict(X_test)
    
    # Calculate residuals (actual - predicted)
    train_residuals = y_train - y_train_pred_linear
    val_residuals = y_val - y_val_pred_linear
    test_residuals = y_test - y_test_pred_linear
    
    train_dataset = TimeSeriesDataset(X_train, train_residuals, seq_length)
    val_dataset = TimeSeriesDataset(X_val, val_residuals, seq_length)
    test_dataset = TimeSeriesDataset(X_test, test_residuals, seq_length)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    input_size = X_train.shape[1]  # Number of features
    output_size = 1  # Predicting a single value (residual)
    
    tcn_model = TCNModel(
        input_size=input_size,
        output_size=output_size,
        num_channels=num_channels,
        kernel_size=kernel_size,
        dropout=dropout
    )
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(tcn_model.parameters(), lr=learning_rate)
    
    # Add the learning rate scheduler
    from torch.optim.lr_scheduler import CosineAnnealingLR
    scheduler = CosineAnnealingLR(
        optimizer,
        T_max=t_max,
        eta_min=eta_min
    )
    
    tcn_model.to(device)
    
    best_val_loss = float('inf')
    counter = 0
    best_model = None
    patience = config.patience
    
    for epoch in range(epochs):
        tcn_model.train()
        train_loss = 0.0
        
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = tcn_model(inputs)
            loss = criterion(outputs, targets)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        train_loss /= len(train_loader)
        
        # Step the scheduler after each epoch
        scheduler.step()
        current_lr = scheduler.get_last_lr()[0]
        
        tcn_model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = tcn_model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                
        val_loss /= len(val_loader)
        
        wandb.log({
            "train_loss": train_loss,
            "val_loss": val_loss,
            "epoch": epoch,
            "learning_rate": current_lr  # Log the current learning rate
        })
        
        print(f'Epoch {epoch+1}/{epochs} - Train loss: {train_loss:.4f}, Val loss: {val_loss:.4f}, LR: {current_lr:.6f}')
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = tcn_model.state_dict().copy()
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break
    
    if best_model is not None:
        tcn_model.load_state_dict(best_model)
    
    # Model evaluation on test set
    tcn_model.to(torch.device("cpu"))
    tcn_model.eval()
    
    # Get TCN predictions on test data
    all_tcn_preds = []
    
    with torch.no_grad():
        for i, (inputs, _) in enumerate(test_loader):
            outputs = tcn_model(inputs)
            all_tcn_preds.extend(outputs.numpy().flatten())
    
    # Get the corresponding test data (accounting for sequence length)
    y_test_seq_linear = y_test_pred_linear[seq_length:][:len(all_tcn_preds)]
    y_test_seq_actual = y_test[seq_length:][:len(all_tcn_preds)]
    
    test_eval_df = test_df.iloc[seq_length:].reset_index(drop=True).iloc[:len(all_tcn_preds)].copy()
    test_eval_df['linear_pred'] = y_test_seq_linear
    test_eval_df['tcn_pred'] = all_tcn_preds
    test_eval_df['combined_pred'] = y_test_seq_linear + np.array(all_tcn_preds)
    
    # Filter out rows where closed = 1
    if 'closed' in test_eval_df.columns:
        print(f"\nExcluding {test_eval_df['closed'].sum()} data points where ride is closed from evaluation")
        open_ride_df = test_eval_df[test_eval_df['closed'] == 0]
    else:
        print("Warning: 'closed' column not found in the data. Evaluating on all test data.")
        open_ride_df = test_eval_df
    
    y_test_open_actual = open_ride_df['wait_time'].values
    y_test_open_linear = open_ride_df['linear_pred'].values
    y_test_open_combined = open_ride_df['combined_pred'].values

    linear_mae = mean_absolute_error(y_test_open_actual, y_test_open_linear)
    linear_mse = mean_squared_error(y_test_open_actual, y_test_open_linear)
    linear_rmse = np.sqrt(linear_mse)
    linear_r2 = r2_score(y_test_open_actual, y_test_open_linear)
    
    combined_mae = mean_absolute_error(y_test_open_actual, y_test_open_combined)
    combined_mse = mean_squared_error(y_test_open_actual, y_test_open_combined)
    combined_rmse = np.sqrt(combined_mse)
    combined_r2 = r2_score(y_test_open_actual, y_test_open_combined)
    
    wandb.log({
        "linear_mae": linear_mae,
        "combined_mae": combined_mae,
        "combined_rmse": combined_rmse,
        "combined_r2": combined_r2,
        "best_val_loss": best_val_loss,
    })
    
    os.makedirs("models", exist_ok=True)
    linear_model_filename = f"{target_ride.replace(' ', '_')}_linear_model.pkl"
    tcn_model_filename = f"{target_ride.replace(' ', '_')}_tcn_model.pt"
    
    with open(f"models/{linear_model_filename}", "wb") as f:
        pickle.dump(linear_model, f)
    
    torch.save(tcn_model.state_dict(), f"models/{tcn_model_filename}")

    linear_artifact = wandb.Artifact(f"linear_model_{wandb.run.id}", type="model")
    linear_artifact.add_file(f"models/{linear_model_filename}")
    wandb.log_artifact(linear_artifact)
    
    tcn_artifact = wandb.Artifact(f"tcn_model_{wandb.run.id}", type="model")
    tcn_artifact.add_file(f"models/{tcn_model_filename}")
    wandb.log_artifact(tcn_artifact)

    wandb.finish()

def setup_wandb_sweep(project_name="queue-prediction"):
    sweep_config = {
        'method': 'bayes',  # Bayesian optimization
        'metric': {
            'name': 'combined_mae',  # Metric to optimize
            'goal': 'minimize'  # We want to minimize RMSE
        },
        'parameters': {
            'seq_length': {
                'values': [24, 48, 96, 192, 384, 768] 
            },
            'batch_size': {
                'values': [128, 256, 512, 1048] 
            },
            'num_channels': {
                'values': [32, 64, 128, 256] 
            },
            'kernel_size': {
                'values': [2, 3, 5, 8] 
            },
            'dropout': {
                'values': [0.1, 0.2, 0.3]  
            },
            'learning_rate': {
                'values': [1e-3, 10**-3.5, 1e-4, 10**-4.5, 1e-5]  
            },
            'epochs': {
                'value': 100 
            },
            'patience': {
                'value': 10  
            },
            'data_path': {
                'value': '../data/processed/ep/rides/poseidon.parquet' 
            },
            'splits_output_dir': {
                'value': '../data/processed/splits' 
            },
            'target_ride': {
                'value': 'poseidon' 
            }
        }
    }
    
    # Initialize the sweep
    sweep_id = wandb.sweep(sweep_config, project=project_name)
    return sweep_id

def setup_wandb_sweep(project_name="queue-prediction"):
    sweep_config = {
        'method': 'bayes',  # Bayesian optimization
        'metric': {
            'name': 'combined_mae',  # Metric to optimize
            'goal': 'minimize'  # We want to minimize RMSE
        },
        'parameters': {
            'seq_length': {
                'values': [24, 48, 96, 192, 384, 768] 
            },
            'batch_size': {
                'values': [128, 256, 512, 1048] 
            },
            'num_channels': {
                'values': [32, 64, 128, 256] 
            },
            'kernel_size': {
                'values': [2, 3, 5, 8] 
            },
            'dropout': {
                'values': [0.1, 0.2, 0.3]  
            },
            'learning_rate': {
                'values': [1e-3, 10**-3.5, 1e-4, 10**-4.5, 1e-5]  
            },
            'epochs': {
                'value': 100 
            },
            'patience': {
                'value': 10  
            },
            'scheduler_type': {
                'value': 'CosineAnnealingLR'  
            },
            't_max': {
                'values': [10, 25, 50, 100]  
            },
            'eta_min': {
                'values': [0, 1e-7, 1e-6]  # learning rate will follow a cosine curve from the initial learning rate to eta_min over T_max epochs
            },
            'data_path': {
                'value': '../data/processed/ep/rides/poseidon.parquet' 
            },
            'splits_output_dir': {
                'value': '../data/processed/splits' 
            },
            'target_ride': {
                'value': 'poseidon' 
            }
        }
    }

    sweep_id = wandb.sweep(sweep_config, project=project_name)
    return sweep_id

In [None]:

project_name = "queue-prediction-sweeps"
entity = "maikotrede-hochschule-luzern" 
wandb.login()
sweep_id = setup_wandb_sweep(project_name)
wandb.agent(sweep_id, train_with_wandb, count=1)  



[34m[1mwandb[0m: Currently logged in as: [33mmaikotrede[0m ([33mmaikotrede-hochschule-luzern[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin
