In [1]:
import wandb
from pathlib import Path
import pandas as pd
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import pickle
import os
import sys
from pytorch_tcn import TCN
from sklearn.ensemble import GradientBoostingRegressor

# Reproducability

In [2]:
np.random.seed(42)
torch.manual_seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)


# Model

In [3]:

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


class TimeSeriesDataset(Dataset):
    def __init__(self, X, y, seq_length):
        self.X = X
        self.y = y
        self.seq_length = seq_length
        
    def __len__(self):
        return len(self.X) - self.seq_length
    
    def __getitem__(self, idx):
        # Get sequence of features
        X_seq = self.X[idx:idx + self.seq_length]
        # Get target value (next value after sequence)
        y_value = self.y[idx + self.seq_length]
        
        return torch.FloatTensor(X_seq), torch.FloatTensor([y_value])
    


class TCNModel(nn.Module):
    def __init__(self, input_size, output_size, num_channels, kernel_size, dropout=0.2):
        super(TCNModel, self).__init__()
        

        self.tcn = TCN(
            num_inputs=input_size,         
            num_channels=[num_channels] * 8,  # (number of filters  in each convolutional layer)
            kernel_size=kernel_size,        # (temporal receptive field)
            dropout=dropout,                
            causal=True,                    # Causal convolutions (dont look into future)
            use_skip_connections=True       # Use skip connections for better gradient flow
        )
        

        self.linear = nn.Linear(num_channels, output_size)
        
    def forward(self, x):
        # Input shape: [batch_size, seq_len, input_size]
        # TCN expects shape: [batch_size, input_size, seq_len]
        x = x.transpose(1, 2)
        
        # Apply TCN - output will be [batch_size, num_channels, seq_len]
        y = self.tcn(x)
        
        # Get the last time step output and apply the linear layer
        y = y[:, :, -1]
        
        return self.linear(y)


Using device: cuda


# Helperfunctions

In [5]:

def preprocess_data(df, target_ride=None):
    """
    Preprocess the data for a single ride
    """
    # Drop time_bucket column as not needed
    if 'time_bucket' in df.columns:
        df = df.drop(columns=['time_bucket'])
    
    print(f"Building model for ride: {target_ride}")
    

    ride_col = f'ride_name_{target_ride}'
    if ride_col in df.columns:
        df = df[df[ride_col] == 1].copy()
    
    ride_cols = [col for col in df.columns if col.startswith('ride_name_')]
    df = df.drop(columns=ride_cols)
    
    df = df.fillna(0)
    
    return df

def create_features(df):
    """
    Create features for the model
    """
    # The features are everything except wait_time (target)
    feature_cols = [col for col in df.columns if col != 'wait_time' and col != "timestamp"]
    
    return df, feature_cols

def build_linear_model(X_train, y_train):
    """
    Build and train a linear regression model
    """

    model = GradientBoostingRegressor(
            n_estimators=100,
            learning_rate=0.1,
            max_depth=6,
            min_samples_split=10,
            min_samples_leaf=5,
            random_state=42
     )
    
    model.fit(X_train, y_train)
        
    #from sklearn.linear_model import LinearRegression
    #model = LinearRegression()
    #model.fit(X_train, y_train)
    return model


# Train

In [6]:
def train_with_wandb(config=None):
    run = wandb.init(config=config)
    config = wandb.config
    
    data_path = config.data_path
    splits_output_dir = config.splits_output_dir
    target_ride = config.target_ride
    seq_length = config.seq_length
    batch_size = config.batch_size
    num_channels = config.num_channels
    kernel_size = config.kernel_size
    dropout = config.dropout
    learning_rate = config.learning_rate
    epochs = config.epochs
    
    # Add scheduler parameters
    scheduler_type = config.get('scheduler_type', 'CosineAnnealingLR')  # Default to CosineAnnealingLR
    t_max = config.get('t_max', epochs)  # Default to total epochs
    eta_min = config.get('eta_min', 1e-6)  # Minimum learning rate
    
    df = pd.read_parquet(data_path)
    
    df = preprocess_data(df, target_ride)
    
    train_indices = pd.read_parquet(os.path.join(splits_output_dir, "train_indices.parquet"))
    val_indices = pd.read_parquet(os.path.join(splits_output_dir, "validation_indices.parquet"))
    test_indices = pd.read_parquet(os.path.join(splits_output_dir, "test_indices.parquet"))
    
    ride_name_normalized = target_ride.replace(' ', '_')
    train_indices = train_indices[train_indices['ride_name'] == ride_name_normalized]['original_index'].values
    val_indices = val_indices[val_indices['ride_name'] == ride_name_normalized]['original_index'].values
    test_indices = test_indices[test_indices['ride_name'] == ride_name_normalized]['original_index'].values
    
    if len(train_indices) == 0 or len(val_indices) == 0 or len(test_indices) == 0:
        raise ValueError(f"No indices found for ride {target_ride}. Check ride name or indices files.")
    
    print(f"Found {len(train_indices)} train, {len(val_indices)} validation, and {len(test_indices)} test samples")
    
    df, feature_cols = create_features(df)
    
    train_df = df.iloc[train_indices].copy()
    val_df = df.iloc[val_indices].copy()
    test_df = df.iloc[test_indices].copy()
        
    # Prepare features and target
    X_train = train_df[feature_cols].values
    y_train = train_df['wait_time'].values
    X_val = val_df[feature_cols].values
    y_val = val_df['wait_time'].values
    X_test = test_df[feature_cols].values
    y_test = test_df['wait_time'].values
    
    linear_model = build_linear_model(X_train, y_train)
    
    # Get predictions from linear model
    y_train_pred_linear = linear_model.predict(X_train)
    y_val_pred_linear = linear_model.predict(X_val)
    y_test_pred_linear = linear_model.predict(X_test)
    
    # Calculate residuals (actual - predicted)
    train_residuals = y_train - y_train_pred_linear
    val_residuals = y_val - y_val_pred_linear
    test_residuals = y_test - y_test_pred_linear
    
    train_dataset = TimeSeriesDataset(X_train, train_residuals, seq_length)
    val_dataset = TimeSeriesDataset(X_val, val_residuals, seq_length)
    test_dataset = TimeSeriesDataset(X_test, test_residuals, seq_length)

    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

    input_size = X_train.shape[1]  # Number of features
    output_size = 1  # Predicting a single value (residual)
    
    tcn_model = TCNModel(
        input_size=input_size,
        output_size=output_size,
        num_channels=num_channels,
        kernel_size=kernel_size,
        dropout=dropout
    )
    
    criterion = nn.MSELoss()
    optimizer = optim.Adam(tcn_model.parameters(), lr=learning_rate)
    
    # Add the learning rate scheduler
    from torch.optim.lr_scheduler import CosineAnnealingLR
    scheduler = CosineAnnealingLR(
        optimizer,
        T_max=t_max,
        eta_min=eta_min
    )
    
    tcn_model.to(device)
    
    best_val_loss = float('inf')
    counter = 0
    best_model = None
    patience = config.patience
    
    for epoch in range(epochs):
        tcn_model.train()
        train_loss = 0.0
        
        for inputs, targets in train_loader:
            inputs, targets = inputs.to(device), targets.to(device)
            
            optimizer.zero_grad()
            outputs = tcn_model(inputs)
            loss = criterion(outputs, targets)
            
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            
        train_loss /= len(train_loader)
        
        # Step the scheduler after each epoch
        scheduler.step()
        current_lr = scheduler.get_last_lr()[0]
        
        tcn_model.eval()
        val_loss = 0.0
        
        with torch.no_grad():
            for inputs, targets in val_loader:
                inputs, targets = inputs.to(device), targets.to(device)
                outputs = tcn_model(inputs)
                loss = criterion(outputs, targets)
                val_loss += loss.item()
                
        val_loss /= len(val_loader)
        
        wandb.log({
            "train_loss": train_loss,
            "val_loss": val_loss,
            "epoch": epoch,
            "learning_rate": current_lr  # Log the current learning rate
        })
        
        print(f'Epoch {epoch+1}/{epochs} - Train loss: {train_loss:.4f}, Val loss: {val_loss:.4f}, LR: {current_lr:.6f}')
        
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            best_model = tcn_model.state_dict().copy()
            counter = 0
        else:
            counter += 1
            if counter >= patience:
                print(f"Early stopping at epoch {epoch+1}")
                break
    
    if best_model is not None:
        tcn_model.load_state_dict(best_model)
    
    # Model evaluation on test set
    tcn_model.to(torch.device("cpu"))
    tcn_model.eval()
    
    # Get TCN predictions on test data
    all_tcn_preds = []
    
    with torch.no_grad():
        for i, (inputs, _) in enumerate(test_loader):
            outputs = tcn_model(inputs)
            all_tcn_preds.extend(outputs.numpy().flatten())
    
    # Get the corresponding test data (accounting for sequence length)
    y_test_seq_linear = y_test_pred_linear[seq_length:][:len(all_tcn_preds)]
    y_test_seq_actual = y_test[seq_length:][:len(all_tcn_preds)]
    
    test_eval_df = test_df.iloc[seq_length:].reset_index(drop=True).iloc[:len(all_tcn_preds)].copy()
    test_eval_df['linear_pred'] = y_test_seq_linear
    test_eval_df['tcn_pred'] = all_tcn_preds
    test_eval_df['combined_pred'] = y_test_seq_linear + np.array(all_tcn_preds)
    
    # Filter out rows where closed = 1
    if 'closed' in test_eval_df.columns:
        print(f"\nExcluding {test_eval_df['closed'].sum()} data points where ride is closed from evaluation")
        open_ride_df = test_eval_df[test_eval_df['closed'] == 0]
    else:
        print("Warning: 'closed' column not found in the data. Evaluating on all test data.")
        open_ride_df = test_eval_df
    
    y_test_open_actual = open_ride_df['wait_time'].values
    y_test_open_linear = open_ride_df['linear_pred'].values
    y_test_open_combined = open_ride_df['combined_pred'].values

    linear_mae = mean_absolute_error(y_test_open_actual, y_test_open_linear)
    linear_mse = mean_squared_error(y_test_open_actual, y_test_open_linear)
    linear_rmse = np.sqrt(linear_mse)
    linear_r2 = r2_score(y_test_open_actual, y_test_open_linear)
    
    combined_mae = mean_absolute_error(y_test_open_actual, y_test_open_combined)
    combined_mse = mean_squared_error(y_test_open_actual, y_test_open_combined)
    combined_rmse = np.sqrt(combined_mse)
    combined_r2 = r2_score(y_test_open_actual, y_test_open_combined)
    
    wandb.log({
        "linear_mae": linear_mae,
        "combined_mae": combined_mae,
        "combined_rmse": combined_rmse,
        "combined_r2": combined_r2,
        "best_val_loss": best_val_loss,
    })
    
    os.makedirs("models", exist_ok=True)
    linear_model_filename = f"{target_ride.replace(' ', '_')}_linear_model.pkl"
    tcn_model_filename = f"{target_ride.replace(' ', '_')}_tcn_model.pt"
    
    with open(f"models/{linear_model_filename}", "wb") as f:
        pickle.dump(linear_model, f)
    
    torch.save(tcn_model.state_dict(), f"models/{tcn_model_filename}")

    linear_artifact = wandb.Artifact(f"linear_model_{wandb.run.id}", type="model")
    linear_artifact.add_file(f"models/{linear_model_filename}")
    wandb.log_artifact(linear_artifact)
    
    tcn_artifact = wandb.Artifact(f"tcn_model_{wandb.run.id}", type="model")
    tcn_artifact.add_file(f"models/{tcn_model_filename}")
    wandb.log_artifact(tcn_artifact)

    wandb.finish()


def setup_wandb_sweep(project_name="waitless-hslu-dspro2-fs25", entity="waitless-hslu-dspro2-fs25"):
    sweep_config = {
        'method': 'bayes',  # Bayesian optimization
        'metric': {
            'name': 'combined_mae',  # Metric to optimize
            'goal': 'minimize'  # We want to minimize RMSE
        },
        'parameters': {
            'seq_length': {
                'values': [24, 48, 96, 192, 384] 
            },
            'batch_size': {
                'values': [128, 256, 512, 1048] 
            },
            'num_channels': {
                'values': [32, 64, 128, 256] 
            },
            'kernel_size': {
                'values': [2, 3, 5, 8] 
            },
            'dropout': {
                'values': [0.1, 0.2, 0.3]  
            },
            'learning_rate': {
                'values': [1e-3, 10**-3.5, 1e-4, 10**-4.5, 1e-5]  
            },
            'epochs': {
                'value': 100 
            },
            'patience': {
                'value': 10  
            },
            'scheduler_type': {
                'value': 'CosineAnnealingLR'  
            },
            't_max': {
                'values': [10, 25, 50, 100]  
            },
            'eta_min': {
                'values': [0, 1e-7, 1e-6]  # learning rate will follow a cosine curve from the initial learning rate to eta_min over T_max epochs
            },
            'data_path': {
                'value': '../data/processed/ep/rides/poseidon.parquet' 
            },
            'splits_output_dir': {
                'value': '../data/processed/splits' 
            },
            'target_ride': {
                'value': 'poseidon' 
            }
        }
    }

    sweep_id = wandb.sweep(sweep_config, project=project_name, entity=entity)
    return sweep_id

In [None]:

project_name = "waitless-tcn-hslu-dspro2-fs25"
entity = "waitless-hslu-dspro2-fs25" 
wandb.login()
sweep_id = setup_wandb_sweep(project_name=project_name, entity=entity)
wandb.agent(sweep_id, train_with_wandb, count=10)  



[34m[1mwandb[0m: Currently logged in as: [33mmaikotrede[0m ([33mmaikotrede-hochschule-luzern[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Create sweep with ID: cbguxinw
Sweep URL: https://wandb.ai/waitless-hslu-dspro2-fs25/waitless-tcn-hslu-dspro2-fs25/sweeps/cbguxinw


[34m[1mwandb[0m: Agent Starting Run: ihxhk0gy with config:
[34m[1mwandb[0m: 	batch_size: 512
[34m[1mwandb[0m: 	data_path: ../data/processed/ep/rides/poseidon.parquet
[34m[1mwandb[0m: 	dropout: 0.1
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	eta_min: 0
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 0.001
[34m[1mwandb[0m: 	num_channels: 128
[34m[1mwandb[0m: 	patience: 10
[34m[1mwandb[0m: 	scheduler_type: CosineAnnealingLR
[34m[1mwandb[0m: 	seq_length: 192
[34m[1mwandb[0m: 	splits_output_dir: ../data/processed/splits
[34m[1mwandb[0m: 	t_max: 50
[34m[1mwandb[0m: 	target_ride: poseidon
[34m[1mwandb[0m: Currently logged in as: [33mmaikotrede[0m ([33mwaitless-hslu-dspro2-fs25[0m) to [32mhttps://api.wandb.ai[0m. Use [1m`wandb login --relogin`[0m to force relogin


Building model for ride: poseidon
Found 190333 train, 43138 validation, and 38050 test samples
Epoch 1/100 - Train loss: 8.6087, Val loss: 10.0373, LR: 0.000999
Epoch 2/100 - Train loss: 7.9839, Val loss: 9.7861, LR: 0.000996
Epoch 3/100 - Train loss: 7.5825, Val loss: 9.3961, LR: 0.000991
Epoch 4/100 - Train loss: 7.2454, Val loss: 8.8069, LR: 0.000984
Epoch 5/100 - Train loss: 6.9013, Val loss: 9.3143, LR: 0.000976
Epoch 6/100 - Train loss: 6.7309, Val loss: 9.3118, LR: 0.000965
Epoch 7/100 - Train loss: 6.4160, Val loss: 8.9895, LR: 0.000952
Epoch 8/100 - Train loss: 6.1270, Val loss: 9.5060, LR: 0.000938
Epoch 9/100 - Train loss: 5.8700, Val loss: 9.7607, LR: 0.000922
Epoch 10/100 - Train loss: 5.6879, Val loss: 9.4333, LR: 0.000905
Epoch 11/100 - Train loss: 5.4467, Val loss: 8.9909, LR: 0.000885
Epoch 12/100 - Train loss: 5.2432, Val loss: 9.3258, LR: 0.000864
Epoch 13/100 - Train loss: 4.9397, Val loss: 9.4802, LR: 0.000842
Epoch 14/100 - Train loss: 4.7155, Val loss: 9.2557, LR

[34m[1mwandb[0m: [32m[41mERROR[0m The nbformat package was not found. It is required to save notebook history.


0,1
best_val_loss,▁
combined_mae,▁
combined_r2,▁
combined_rmse,▁
epoch,▁▂▂▃▃▄▄▅▅▆▆▇▇█
learning_rate,███▇▇▇▆▆▅▄▄▃▂▁
linear_mae,▁
train_loss,█▇▆▆▅▅▄▄▃▃▂▂▁▁
val_loss,█▇▄▁▄▄▂▅▆▅▂▄▅▄

0,1
best_val_loss,8.8069
combined_mae,6.49438
combined_r2,0.66031
combined_rmse,9.81331
epoch,13.0
learning_rate,0.00082
linear_mae,6.58764
train_loss,4.7155
val_loss,9.25571


[34m[1mwandb[0m: Agent Starting Run: hl4xx63y with config:
[34m[1mwandb[0m: 	batch_size: 256
[34m[1mwandb[0m: 	data_path: ../data/processed/ep/rides/poseidon.parquet
[34m[1mwandb[0m: 	dropout: 0.3
[34m[1mwandb[0m: 	epochs: 100
[34m[1mwandb[0m: 	eta_min: 1e-07
[34m[1mwandb[0m: 	kernel_size: 5
[34m[1mwandb[0m: 	learning_rate: 1e-05
[34m[1mwandb[0m: 	num_channels: 32
[34m[1mwandb[0m: 	patience: 10
[34m[1mwandb[0m: 	scheduler_type: CosineAnnealingLR
[34m[1mwandb[0m: 	seq_length: 24
[34m[1mwandb[0m: 	splits_output_dir: ../data/processed/splits
[34m[1mwandb[0m: 	t_max: 50
[34m[1mwandb[0m: 	target_ride: poseidon


Building model for ride: poseidon
Found 190333 train, 43138 validation, and 38050 test samples
Epoch 1/100 - Train loss: 8.8674, Val loss: 10.3797, LR: 0.000010
Epoch 2/100 - Train loss: 8.5418, Val loss: 10.3329, LR: 0.000010
Epoch 3/100 - Train loss: 8.4718, Val loss: 10.3179, LR: 0.000010
Epoch 4/100 - Train loss: 8.4264, Val loss: 10.3085, LR: 0.000010
Epoch 5/100 - Train loss: 8.4211, Val loss: 10.3107, LR: 0.000010
Epoch 6/100 - Train loss: 8.3936, Val loss: 10.3041, LR: 0.000010
Epoch 7/100 - Train loss: 8.3800, Val loss: 10.3042, LR: 0.000010
Epoch 8/100 - Train loss: 8.3742, Val loss: 10.2950, LR: 0.000009
Epoch 9/100 - Train loss: 8.3730, Val loss: 10.2940, LR: 0.000009
Epoch 10/100 - Train loss: 8.3595, Val loss: 10.2918, LR: 0.000009
Epoch 11/100 - Train loss: 8.3521, Val loss: 10.2875, LR: 0.000009
Epoch 12/100 - Train loss: 8.3539, Val loss: 10.2922, LR: 0.000009
Epoch 13/100 - Train loss: 8.3453, Val loss: 10.2760, LR: 0.000008
Epoch 14/100 - Train loss: 8.3396, Val loss

# Evaluation

In [None]:
import wandb
import pandas as pd
import numpy as np
import os
import torch
import pickle
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score


# Set up device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def evaluate_best_model():
    # Initialize wandb
    api = wandb.Api()
    
    # Get the project and find the best run based on combined_mae metric
    project = api.project("maikotrede-hochschule-luzern/queue-prediction-sweeps")
    runs = api.runs("maikotrede-hochschule-luzern/queue-prediction-sweeps")
    
    # Find the best run with the lowest combined_mae
    best_run = None
    best_mae = float('inf')
    
    for run in runs:
        if run.state == "finished" and "combined_mae" in run.summary:
            mae = run.summary["combined_mae"]
            if mae < best_mae:
                best_mae = mae
                best_run = run
    
    if best_run is None:
        print("Could not find any completed runs with combined_mae metric")
        return
    
    print(f"Best run: {best_run.name}, combined_mae: {best_mae}")
    
    # Get the config from the best run
    config = best_run.config
    
    # Download the artifacts for the best run
    linear_artifacts = best_run.logged_artifacts()
    linear_model_artifact = None
    tcn_model_artifact = None
    
    for artifact in linear_artifacts:
        if "linear_model" in artifact.name:
            linear_model_artifact = artifact
        elif "tcn_model" in artifact.name:
            tcn_model_artifact = artifact
    
    if linear_model_artifact is None or tcn_model_artifact is None:
        print("Could not find model artifacts")
        return
    
    # Download artifacts
    os.makedirs("downloaded_models", exist_ok=True)
    linear_model_path = linear_model_artifact.download("downloaded_models")
    tcn_model_path = tcn_model_artifact.download("downloaded_models")
    
    # Load models
    linear_model_file = os.path.join(linear_model_path, os.listdir(linear_model_path)[0])
    tcn_model_file = os.path.join(tcn_model_path, os.listdir(tcn_model_path)[0])
    
    with open(linear_model_file, "rb") as f:
        linear_model = pickle.load(f)
    
    # Load data
    data_path = config["data_path"]
    splits_output_dir = config["splits_output_dir"]
    target_ride = config["target_ride"]
    seq_length = config["seq_length"]
    batch_size = config.get("batch_size", 256)
    
    # Load and prepare test data
    df = pd.read_parquet(data_path)
    df = preprocess_data(df, target_ride)
    
    test_indices = pd.read_parquet(os.path.join(splits_output_dir, "test_indices.parquet"))
    
    ride_name_normalized = target_ride.replace(' ', '_')
    test_indices = test_indices[test_indices['ride_name'] == ride_name_normalized]['original_index'].values
    
    df, feature_cols = create_features(df)
    test_df = df.iloc[test_indices].copy()
    
    # Prepare features and target
    X_test = test_df[feature_cols].values
    y_test = test_df['wait_time'].values
    
    # Get predictions from linear model
    y_test_pred_linear = linear_model.predict(X_test)
    
    # Calculate residuals (actual - predicted)
    test_residuals = y_test - y_test_pred_linear
    
    # Create dataset for TCN model
    test_dataset = TimeSeriesDataset(X_test, test_residuals, seq_length)
    test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    
    # Initialize TCN model with the same architecture
    input_size = X_test.shape[1]
    output_size = 1
    
    tcn_model = TCNModel(
        input_size=input_size,
        output_size=output_size,
        num_channels=config["num_channels"],
        kernel_size=config["kernel_size"],
        dropout=config["dropout"]
    )
    
    # Load the state dict
    tcn_model.load_state_dict(torch.load(tcn_model_file, map_location=device))
    tcn_model.to(device)
    tcn_model.eval()
    
    # Get TCN predictions on test data
    all_tcn_preds = []
    
    with torch.no_grad():
        for inputs, _ in test_loader:
            inputs = inputs.to(device)
            outputs = tcn_model(inputs)
            all_tcn_preds.extend(outputs.cpu().numpy().flatten())
    
    # Get the corresponding test data (accounting for sequence length)
    y_test_seq_linear = y_test_pred_linear[seq_length:][:len(all_tcn_preds)]
    y_test_seq_actual = y_test[seq_length:][:len(all_tcn_preds)]
    
    # Combine predictions
    y_test_combined = y_test_seq_linear + np.array(all_tcn_preds)
    
    # Create evaluation dataframe
    test_eval_df = test_df.iloc[seq_length:].reset_index(drop=True).iloc[:len(all_tcn_preds)].copy()
    
    # Filter out rows where ride is closed
    if 'closed' in test_eval_df.columns:
        open_ride_df = test_eval_df[test_eval_df['closed'] == 0]
        y_test_open_actual = open_ride_df['wait_time'].values
        y_test_open_linear = y_test_seq_linear[open_ride_df.index]
        y_test_open_combined = y_test_combined[open_ride_df.index]
    else:
        y_test_open_actual = y_test_seq_actual
        y_test_open_linear = y_test_seq_linear
        y_test_open_combined = y_test_combined

    linear_mae = mean_absolute_error(y_test_open_actual, y_test_open_linear)
    linear_mse = mean_squared_error(y_test_open_actual, y_test_open_linear)
    linear_rmse = np.sqrt(linear_mse)
    linear_r2 = r2_score(y_test_open_actual, y_test_open_linear)
    
    combined_mae = mean_absolute_error(y_test_open_actual, y_test_open_combined)
    combined_mse = mean_squared_error(y_test_open_actual, y_test_open_combined)
    combined_rmse = np.sqrt(combined_mse)
    combined_r2 = r2_score(y_test_open_actual, y_test_open_combined)
    
    print("\nTest Set Evaluation:")
    print(f"Linear Model - MAE: {linear_mae:.2f}, RMSE: {linear_rmse:.2f}, R²: {linear_r2:.4f}")
    print(f"Combined Model - MAE: {combined_mae:.2f}, RMSE: {combined_rmse:.2f}, R²: {combined_r2:.4f}")
    print(f"Improvement - MAE: {linear_mae - combined_mae:.2f} ({(1 - combined_mae/linear_mae) * 100:.2f}%)")

    try:
        import matplotlib.pyplot as plt
        
        plt.figure(figsize=(12, 6))
        plt.plot(y_test_open_actual[:100], label='Actual')
        plt.plot(y_test_open_linear[:100], label='Linear Prediction')
        plt.plot(y_test_open_combined[:100], label='Combined Prediction')
        plt.legend()
        plt.title(f'Actual vs Predicted Wait Times - {target_ride}')
        plt.xlabel('Time Step')
        plt.ylabel('Wait Time (minutes)')
        plt.savefig(f"{target_ride}_predictions.png")
        plt.close()
        
        print(f"Saved plot to {target_ride}_predictions.png")
    except Exception as e:
        print(f"Could not create plot: {e}")

if __name__ == "__main__":
    evaluate_best_model()

Best run: mild-sweep-1, combined_mae: 6.421060816451057


[34m[1mwandb[0m:   1 of 1 files downloaded.  
[34m[1mwandb[0m:   1 of 1 files downloaded.  


Building model for ride: poseidon




UnpicklingError: Weights only load failed. In PyTorch 2.6, we changed the default value of the `weights_only` argument in `torch.load` from `False` to `True`. Re-running `torch.load` with `weights_only` set to `False` will likely succeed, but it can result in arbitrary code execution. Do it only if you got the file from a trusted source.
Please file an issue with the following so that we can make `weights_only=True` compatible with your use case: WeightsUnpickler error: Unsupported operand 149

Check the documentation of torch.load to learn more about types accepted by default with weights_only https://pytorch.org/docs/stable/generated/torch.load.html.