## (Tuning) Second version of model (has early stopping, learning rate scheduler, dropout, multiple lstm layers)

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
import torch
from torch.utils.data import Dataset, DataLoader
from torch.nn.utils.rnn import pad_sequence, pack_padded_sequence, pad_packed_sequence
from torch.optim.lr_scheduler import ReduceLROnPlateau
import torch.nn.functional as F
import os

def set_seed(seed=42):
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

def pad_sequence_data_2024(sequences, targets, sequence_lengths_2024, pad_length=87):
    # sequence_tensors = [torch.tensor(seq, dtype=torch.float32) for seq in sequences]
    # target_tensors = [torch.tensor(tgt, dtype=torch.float32) for tgt in targets]
    # padded_sequences = pad_sequence(sequence_tensors, batch_first=True)
    # padded_targets = pad_sequence(target_tensors, batch_first=True)
    # return padded_sequences, padded_targets, sequence_lengths_2024

    sequence_tensors = [torch.tensor(seq, dtype=torch.float32) for seq in sequences]
    target_tensors = [torch.tensor(tgt, dtype=torch.float32) for tgt in targets]
    
    # Manually pad or truncate each sequence to the fixed length
    padded_sequences = []
    padded_targets = []
    
    for seq, tgt in zip(sequence_tensors, target_tensors):
        # Pad sequences (if shorter than pad_length) or truncate them (if longer)
        if seq.size(0) < pad_length:
            padded_seq = F.pad(seq, (0, 0, 0, pad_length - seq.size(0)))  # Pad along the time dimension
        else:
            padded_seq = seq[:pad_length]  # Truncate if it's longer than pad_length
            
        # Pad targets similarly (only time dimension, since targets are 1D)
        if tgt.size(0) < pad_length:
            padded_tgt = F.pad(tgt, (0, pad_length - tgt.size(0)))  # Only pad the time dimension
        else:
            padded_tgt = tgt[:pad_length]  # Truncate if it's longer
            
        padded_sequences.append(padded_seq)
        padded_targets.append(padded_tgt)
    
    # Stack the padded sequences to create batch tensors
    padded_sequences = torch.stack(padded_sequences, dim=0)
    padded_targets = torch.stack(padded_targets, dim=0)
    
    return padded_sequences, padded_targets, sequence_lengths_2024


def evaluate_2024_season(model):
    # Step 1: Load the full dataset for the 2024 season
    combined_df_2024 = pd.read_csv('laptimestest3.csv')  # Load the full dataset
    combined_df_2024 = combined_df_2024.sort_values(by=['raceId', 'driverId', 'lap']) 

    # Select only the data for the 2024 season (all rounds)
    combined_df_2024_season = combined_df_2024[combined_df_2024['year'] == 2024]

    # Apply the same scaling as done on the training data (ensure it's using the same scaler)
    # combined_df_2023_season[features] = feature_scaler.transform(combined_df_2023_season[features])
    # combined_df_2023_season[target] = target_scaler.transform(combined_df_2023_season[[target]])

    combined_df_2024_season = custom_scaler(combined_df_2024_season)

    # Step 2: Form sequences for all rounds in 2024
    sequences_2024 = []
    targets_2024 = []  # Store the true target values
    sequence_lengths_2024 = []

    for (raceId, driverId), group in combined_df_2024_season.groupby(['raceId', 'driverId']):
        group = group.sort_values(by='lap')  # sort by lap within the group
        seq_data = group[features].values  # extract feature columns
        seq_target = group[target].values  # extract target values (true lap times)
        sequences_2024.append(seq_data)
        targets_2024.append(seq_target)
        sequence_lengths_2024.append(len(seq_data))  # Store the sequence lengths

    padded_sequences_2024, padded_targets_2024, sequence_lengths_2024 = pad_sequence_data_2024(sequences_2024, targets_2024, sequence_lengths_2024)
    
    test_dataset_2024 = TestRaceDataset(padded_sequences_2024, padded_targets_2024, sequence_lengths_2024)
    test_data_loader_2024 = DataLoader(test_dataset_2024, batch_size=32, shuffle=False)

    
    model.eval()
    all_predictions = []
    total_loss = 0
    criterion = torch.nn.MSELoss()

    with torch.no_grad():  # Disable gradient calculation for inference
        for batch_X, batch_y, batch_lengths in test_data_loader_2024:
            batch_X = batch_X.to(device)
            batch_y = batch_y.to(device)
            batch_lengths = batch_lengths.to(device)
            
            outputs = model(batch_X, batch_lengths)  # Get the model outputs
            
            # Apply masking to handle padded sequences
            # mask = torch.arange(outputs.size(1))[None, :] < batch_lengths[:, None]
            mask = torch.arange(outputs.size(1), device=batch_lengths.device)[None, :] < batch_lengths[:, None]
            masked_outputs = outputs[mask]
            masked_targets = batch_y[mask]

            # Calculate loss for the current batch
            loss = criterion(masked_outputs, masked_targets)
            total_loss += loss.item()

            all_predictions.append(masked_outputs.cpu().numpy())

    # Step 5: Post-process the predictions and calculate final loss
    # all_predictions = np.concatenate(all_predictions, axis=0)  # Concatenate predictions
    total_loss /= len(test_data_loader_2024)  # Calculate average loss over all batches

    return total_loss


def pad_sequence_data_2024_round(sequences, sequence_lengths_2024):
    sequence_tensors = [torch.tensor(seq, dtype=torch.float32) for seq in sequences]
    padded_sequences = pad_sequence(sequence_tensors, batch_first=True)
    return padded_sequences, sequence_lengths_2024


def predict_round_22(model):

    combined_df_2024 = pd.read_csv('laptimestest3.csv')  # Load the full dataset
    combined_df_2024 = combined_df_2024.sort_values(by=['raceId', 'driverId', 'lap']) 

    # Select round 1 of the 2024 season
    combined_df_2024_round1 = combined_df_2024[(combined_df_2024['year'] == 2024) & (combined_df_2024['round'] == 1)]

    driver_ids = combined_df_2024_round1.groupby(['raceId', 'driverId']).apply(lambda group: group['driverId'].iloc[0]).values


    # Apply the same scaling as done on the training data (ensure it's using the same scaler)

    combined_df_2024_round1 = custom_scaler(combined_df_2024_round1)


    # Step 2: Form sequences for round 1, 2024
    sequences_2024 = []
    sequence_lengths_2024 = []

    for (raceId, driverId), group in combined_df_2024_round1.groupby(['raceId', 'driverId']):
        group = group.sort_values(by='lap')  # sort by lap within the group
        seq_data = group[features].values  # extract feature columns
        sequences_2024.append(seq_data)
        sequence_lengths_2024.append(len(seq_data))  # Store the sequence lengths

    padded_sequences_2024, sequence_lengths_2024 = pad_sequence_data_2024_round(sequences_2024,sequence_lengths_2024)

    test_dataset_2024 = TestRaceDatasetRound(padded_sequences_2024, sequence_lengths_2024)
    # test_data_loader_2024 = DataLoader(test_dataset_2024, batch_size=32, shuffle=False)

    model.eval()  # Set the model to evaluation mode

    all_predictions = []

    print("Outputs")
    with torch.no_grad():
        for i in range(len(test_dataset_2024)):
            batch_X, length = test_dataset_2024[i]
            length = torch.tensor([length], dtype=torch.int64).cpu()  # Convert to 1D tensor

            batch_X = batch_X.to(device)
            length = length.to(device)
            outputs = model(batch_X.unsqueeze(0), length)  # Add batch dimension to X

            # mask = torch.arange(outputs.size(1))[None, :] < length[:, None]

            # masked_outputs = outputs[mask]
            print(f'O: {outputs}')
            all_predictions.append(outputs)

    # Step 4: Post-process the predictions
    all_predictions = torch.cat(all_predictions, dim=0).cpu().numpy()  # Concatenate predictions

    all_predictions_original_scale = all_predictions

    prediction_features = []
    masked_preds = []

    print('Mask')
    print(all_predictions_original_scale[0])
    for i in range(len(test_dataset_2024)):
        prediction_features.append(test_dataset_2024[i][0])
        
        sequence_length = test_dataset_2024[i][1] if isinstance(test_dataset_2024[i][1], torch.Tensor) else torch.tensor(test_dataset_2024[i][1])

        # Generate the mask for valid indices based on sequence length
        mask = torch.arange(len(all_predictions_original_scale[i])) < sequence_length

        # Apply the mask by slicing, since mask is now compatible with the 1D array
        masked_output = all_predictions_original_scale[i][mask.numpy()]

        masked_preds.append(masked_output)


    print('END Mask')

    # Print or return the predictions
    print(all_predictions_original_scale)


    predictions_df = pd.DataFrame(masked_preds)
    

    # Each driver will have all their lap features in one set of consecutive rows
    prediction_features_array = np.concatenate([np.array(driver_laps) for driver_laps in prediction_features], axis=0)

    # prediction_features_df = pd.DataFrame(prediction_features_array, columns=features)


    driverinfo = pd.read_csv('f1db_csv/drivers.csv')

    driver_id_name = []
    for id in driver_ids:
        name = driverinfo.loc[driverinfo['driverId'] == int(id), 'driverRef']
        print(f'{id} Name: {name}')
        if(name.empty):
            name = None
            id = None
        else:
            # id_name = [id, name]
            driver_id_name.append(name.values[0])


    predictions_df.index = driver_id_name

    return predictions_df
    
def custom_scaler(df):
    df_scaled = df.copy()
    
    # Scale specific features
    scaling_factors = {
        'raceId': 100,
        'driverId': 10,
        'constructorId': 10,
        'year': 100,
        'q1milli': 10,
        'q2milli': 10,
        'q3milli': 10,
        'Driver_Season_Points': 10,
        'Races_before': 10,
        'milliseconds_y': 10000
    }
    
    # Apply scaling to main features
    for feature, factor in scaling_factors.items():
        if feature in df_scaled.columns:
            df_scaled[feature] = df_scaled[feature] / factor
    
    # # Apply scaling to P1-P20 prefixed features
    # for i in range(1, 21):
    #     prefix = f'P{i}_'
    #     for feature, factor in scaling_factors.items():
    #         prefixed_feature = f'{prefix}{feature}'
    #         if prefixed_feature in df_scaled.columns:
    #             df_scaled[prefixed_feature] = df_scaled[prefixed_feature] / factor
    
    return df_scaled

def pad_sequence_data(sequences, targets):
    sequence_tensors = [torch.tensor(seq, dtype=torch.float32) for seq in sequences]
    target_tensors = [torch.tensor(tgt, dtype=torch.float32) for tgt in targets]
    padded_sequences = pad_sequence(sequence_tensors, batch_first=True)
    padded_targets = pad_sequence(target_tensors, batch_first=True)
    return padded_sequences, padded_targets, sequence_lengths


def calculate_race_positions(lap_times, mask):
    """
    Calculate total race time and positions for each driver
    lap_times: tensor of shape [batch_size, num_laps]
    mask: tensor of shape [batch_size, num_laps]
    """
    # Sum valid lap times for each driver
    masked_times = lap_times * mask.float()
    total_times = torch.sum(masked_times, dim=1)  # [batch_size]
    
    # Get positions (argsort gives positions in ascending order - fastest first)
    positions = torch.argsort(total_times)
    
    return total_times, positions

def get_driver_features(batch_X, features):
    """
    Extract historical performance features for each driver in the batch
    """
    # Get indices of relevant features
    points_idx = features.index('Driver_Season_Points')
    wins_idx = features.index('driverwins')
    podiums_idx = features.index('Podiums')
    
    # Extract features from first timestep (they remain constant for each sequence)
    driver_points = batch_X[:, 0, points_idx]
    driver_wins = batch_X[:, 0, wins_idx]
    driver_podiums = batch_X[:, 0, podiums_idx]
    
    # Combine features into a single performance score
    historical_performance = (
        driver_points + 
        driver_wins * 25 +  # Weight wins more heavily
        driver_podiums * 15  # Weight podiums less than wins
    )
    
    return historical_performance

def historical_performance_penalty(pred_positions, historical_performance):
    """
    Penalize predictions that deviate from historical performance expectations
    """
    # Get expected positions based on historical performance (higher score -> better position)
    expected_positions = torch.argsort(historical_performance, descending=True)
    
    # Convert to float and normalize to [0, 1] range
    pred_positions_norm = pred_positions.float() / (pred_positions.size(0) - 1)
    expected_positions_norm = expected_positions.float() / (expected_positions.size(0) - 1)
    
    # Calculate MSE between predicted and expected positions
    position_penalty = torch.mean((pred_positions_norm - expected_positions_norm) ** 2)
    
    return position_penalty

def calculate_time_decay_weights(years, current_year=2024, old_decay=0.5, recent_decay=0.1):
    """
    Calculate weights with different decay rates for old and recent data
    old_decay: stronger decay for 2014-2018
    recent_decay: gentler decay for 2019-2022
    """
    years = np.array(years)
    if years.ndim == 0:
        years = np.array([years])
    
    weights = np.zeros_like(years, dtype=float)
    
    for i, year in enumerate(years):
        if (year*100) < 2019:
            # Stronger decay for older years (2014-2018)
            years_from_2018 = 2018 - (year*100)
            weights[i] = np.exp(-old_decay * years_from_2018)
        else:
            # Gentler decay for recent years (2019-2022)
            years_from_current = current_year - (year*100)
            weights[i] = 1.0 * np.exp(-recent_decay * years_from_current)
    
    # Add small constant to prevent zero weights
    weights = weights + 0.01
    
    # Normalize weights
    weights = weights / weights.sum()
    
    return weights[0] if len(weights) == 1 else weights

def combined_loss_function(outputs, targets, mask, batch_X, features, 
                         laptime_weight=0.3, position_weight=0.4, historical_weight=0.3):
    """
    Combine laptime MSE, position, and historical performance losses with time decay
    """
    # Get years and calculate time weights
    years = batch_X[:, 0, features.index('year')].cpu().numpy()
    time_weights = torch.tensor([calculate_time_decay_weights(year) for year in years], 
                              dtype=torch.float32, 
                              device=outputs.device)
    
    # Laptime MSE loss
    masked_outputs = outputs[mask]
    masked_targets = targets[mask]
    laptime_loss = criterion(masked_outputs, masked_targets)
    
    # Position loss
    pred_times, pred_positions = calculate_race_positions(outputs, mask)
    true_times, true_positions = calculate_race_positions(targets, mask)
    position_loss = criterion(pred_positions.float(), true_positions.float())
    
    # Historical performance loss
    historical_performance = get_driver_features(batch_X, features)
    historical_loss = historical_performance_penalty(pred_positions, historical_performance)
    
    # Apply time weights to each loss component
    weighted_laptime_loss = torch.mean(laptime_loss * time_weights)
    weighted_position_loss = torch.mean(position_loss * time_weights)
    weighted_historical_loss = torch.mean(historical_loss * time_weights)
    
    # Combine losses with weights
    total_loss = (
        laptime_weight * weighted_laptime_loss + 
        position_weight * weighted_position_loss +
        historical_weight * weighted_historical_loss
    )
    
    return total_loss, weighted_laptime_loss, weighted_position_loss, weighted_historical_loss
    
# Create a custom PyTorch Dataset
class RaceDataset(Dataset):
    def __init__(self, X, y, lengths):
        self.X = X
        self.y = y
        self.lengths = lengths

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i], self.lengths[i]
    
class LSTMModel(torch.nn.Module):
    def __init__(self, input_size, hidden_size, output_size, num_layers=2, dropout=0.2):
        super(LSTMModel, self).__init__()
        
        # Multiple LSTM layers with dropout
        self.lstm = torch.nn.LSTM(
            input_size=input_size,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout if num_layers > 1 else 0  # Dropout between LSTM layers
        )
        
        # Dropout layer for the output of the last LSTM layer
        self.dropout = torch.nn.Dropout(dropout)
        
        # Batch normalization layer
        self.batch_norm = torch.nn.BatchNorm1d(hidden_size)
        
        # Fully connected layer
        self.fc = torch.nn.Linear(hidden_size, output_size)

    def forward(self, x, lengths):
        # Pack the padded sequences
        packed_input = pack_padded_sequence(x, lengths.cpu(), batch_first=True, enforce_sorted=False)
        
        # Pass through LSTM layers
        packed_output, (hn, cn) = self.lstm(packed_input)
        
        # Unpack the output
        output, _ = pad_packed_sequence(packed_output, batch_first=True, total_length=87)
        
        # Apply batch normalization
        output = self.batch_norm(output.transpose(1, 2)).transpose(1, 2)
        
        # Apply dropout
        output = self.dropout(output)
        
        # Apply the fully connected layer
        output = self.fc(output)
        
        # Squeeze the last dimension to match the shape of batch_y
        output = output.squeeze(-1)
        
        return output

class TestRaceDataset(Dataset):
    def __init__(self, X, y, lengths):
        self.X = X
        self.y = y
        self.lengths = lengths

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.y[i], self.lengths[i]
    

class TestRaceDatasetRound(Dataset):
    def __init__(self, X, lengths):
        self.X = X
        self.lengths = lengths

    def __len__(self):
        return len(self.X)

    def __getitem__(self, i):
        return self.X[i], self.lengths[i]


# Define parameter ranges to test
batch_sizes = [5, 8, 16] 
# [5, 8, 16, 32] 
num_epochs = [150]
learning_rates = [0.0001]
# [0.0001, 0.0005, 0.001] 
hidden_sizes = [256, 512] 
# [128, 256, 512]
lstm_layers = [1, 2, 3]
dropout_rates = [0.1, 0.2, 0.3]

# Weight distributions (laptime, position, historical) must sum to 1
weight_distributions = [
    (0.6, 0.3, 0.1),
    (0.4, 0.4, 0.2)
]

# [
#     (0.6, 0.3, 0.1),
#     (0.4, 0.4, 0.2),
#     (0.3, 0.4, 0.3)
# ]

results = []
if os.path.exists('./2024/model_results2024.csv'):
    results_df = pd.read_csv('./2024/model_results2024.csv')
else:
    # Initialize an empty DataFrame if file does not exist
    results_df = pd.DataFrame(columns=['model_name', 'batch_size', 'epochs', 'learning_rate',
                                       'hidden_size', 'lstm_layers', 'dropout',
                                       'laptime_weight', 'position_weight', 'historical_weight',
                                       'train_loss', 'test_loss'])

# Convert model_name column to a set for faster lookup (optional but efficient for large datasets)
existing_model_names = set(results_df['model_name']) if not results_df.empty else set()

for batch_size in batch_sizes:
    for epochs in num_epochs:
        for lr in learning_rates:
            for hidden_size in hidden_sizes:
                for num_layers in lstm_layers:
                    for dropout in dropout_rates:
                        for weights in weight_distributions:
                            # Create model name based on configuration
                            model_name = f"model_b{batch_size}_e{epochs}_lr{lr}_h{hidden_size}_l{num_layers}_d{dropout}_w{weights[0]}_{weights[1]}_{weights[2]}.pth"
                            print(f"\n\nTraining model: {model_name}")

                            if model_name in existing_model_names:
                                print(f"Skipping {model_name} as it already exists.")
                                continue  # Skip this iteration
                            
                            set_seed(42)
                            combined_df = pd.read_csv('laptimestest3.csv')
                            combined_df = combined_df.sort_values(by=['raceId', 'driverId', 'lap']) 

                            features = ['raceId','circuitId','driverId','constructorId', 'grid', 'year', 'round', 'lap', 'q1milli', 'q2milli', 'q3milli', 
                                        'Driver_Season_Points', 'driverwins', 'YOB', 'Races_before', 'Races_won', 'Podiums', 'isSafetyCar', 'isSafetyCarPrev', 
                                        'isPitting','tyre_age', 'tyre_compound', 'isVET', 'isZHO', 'isVER', 'isTSU', 'isSTR', 'isMSC', 'isSAR', 'isRIC', 
                                        'isSAI', 'isRUS', 'isPIA', 'isPER', 'isOCO', 'isNOR', 'isMAG', 'isLEC', 'isLAW', 'isLAT', 'isHUL', 'isHAM', 
                                        'isGAS', 'isDEV', 'isBOT', 'isBEA', 'isALO', 'isALB', 'isRBR', 'isFER', 'isMER', 'isALP', 'isMCL', 'isALF', 
                                        'isAST', 'isHAA', 'isATR', 'isWIL']

                            target = 'milliseconds_y'

                            combined_df = combined_df[combined_df['year']<2024]


                            combined_df = custom_scaler(combined_df)

                            # forming sequences
                            sequences = []
                            targets = []
                            sequence_lengths = []  # Store sequence lengths

                            for (raceId, driverId), group in combined_df.groupby(['raceId', 'driverId']):
                                group = group.sort_values(by='lap')  # sort by lap within the group

                                if (group['year'] >= 2024).any():
                                    continue  # Skip this group if it contains year 2024 or later

                                seq_data = group[features].values  # extract feature columns
                                seq_target = group[target].values  # extract target values
                                sequences.append(seq_data)
                                targets.append(seq_target)
                                sequence_lengths.append(len(seq_data))  # Store the sequence lengths

                            padded_sequences, padded_targets, sequence_lengths = pad_sequence_data(sequences, targets)
                            
                            dataset = RaceDataset(padded_sequences, padded_targets, sequence_lengths)
                            
                            

                            device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
                            
                            
                            
                            # Initialize model with current configuration
                            model = LSTMModel(
                                input_size=len(features),
                                hidden_size=hidden_size,
                                output_size=1,
                                num_layers=num_layers,
                                dropout=dropout
                            ).to(device)
                            
                            # Initialize optimizer
                            optimizer = torch.optim.Adam(model.parameters(), lr=lr)
                            scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)

                            
                            
                            # Create data loader
                            data_loader = DataLoader(dataset, batch_size=batch_size, shuffle=False)
                            
                            # Train model
                            criterion = torch.nn.MSELoss()
                            # Modified training loop with learning rate scheduling
                            model.train()

                            best_loss = float('inf')
                            patience = 15
                            patience_counter = 0
                            for epoch in range(epochs):
                                # Your existing training loop code here

                                epoch_loss = 0
                                epoch_laptime_loss = 0
                                epoch_position_loss = 0
                                epoch_hist_loss = 0
                                batch_count = 0
                                
                                for batch_X, batch_y, batch_lengths in data_loader:
                                    optimizer.zero_grad()

                                    batch_X = batch_X.to(device)
                                    batch_y = batch_y.to(device)
                                    batch_lengths = batch_lengths.to(device)
                                    
                                    outputs = model(batch_X, batch_lengths)
                                    mask = torch.arange(outputs.size(1), device=batch_lengths.device)[None, :] < batch_lengths[:, None]

                                    
                                    # Calculate combined loss
                                    loss, laptime_loss, pos_loss, hist_loss = combined_loss_function(
                                        outputs, batch_y, mask,batch_X, features, weights[0], weights[1], weights[2]
                                    )
                                    
                                    loss.backward()
                                    torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                                    optimizer.step()
                                    
                                    epoch_loss += loss.item()
                                    epoch_laptime_loss += laptime_loss.item()
                                    epoch_position_loss += pos_loss.item()
                                    epoch_hist_loss += hist_loss.item()
                                    batch_count += 1
                                
                                avg_epoch_loss = epoch_loss / batch_count
                                avg_laptime_loss = epoch_laptime_loss / batch_count
                                avg_position_loss = epoch_position_loss / batch_count
                                avg_hist_loss = epoch_hist_loss / batch_count

                                # Step the scheduler
                                scheduler.step(avg_epoch_loss)


                                # Early stopping
                                if avg_epoch_loss < best_loss:
                                    best_loss = avg_epoch_loss
                                    patience_counter = 0
                                    # Save the best model
                                    # torch.save(model.state_dict(), 'best_model.pth')
                                    torch.save(model, './2024/best_model.pth')
                                    print('best_model saved')
                                else:
                                    patience_counter += 1
                                    if patience_counter >= patience:
                                        print(f"Early stopping triggered at epoch {epoch}")
                                        break
                                
                                print('--------------------')
                                print(f"Epoch {epoch+1}")
                                print(f"Average Loss: {avg_epoch_loss:.6f}")
                                print(f"Laptime Loss: {avg_laptime_loss:.6f}")
                                print(f"Position Loss: {avg_position_loss:.6f}")
                                print(f"Historical Loss: {avg_hist_loss:.6f}")
                                
                                
                            # model.load_state_dict(torch.load('best_model.pth'))
                            model = torch.load('./2024/best_model.pth')
                            model.eval()
                            
                            # Test on 2024 season
                            test_loss = evaluate_2024_season(model)
                            print(f'Test loss: {test_loss}')
                            round_22_preds = predict_round_22(model)

                            # Save results
                            new_result = {
                                'model_name': model_name,
                                'batch_size': batch_size,
                                'epochs': epochs,
                                'learning_rate': lr,
                                'hidden_size': hidden_size,
                                'lstm_layers': num_layers,
                                'dropout': dropout,
                                'laptime_weight': weights[0],
                                'position_weight': weights[1],
                                'historical_weight': weights[2],
                                'train_loss': best_loss,  # Replace with actual training loss
                                'test_loss': test_loss   # Replace with actual test loss
                            }
                            results_df = pd.concat([results_df, pd.DataFrame([new_result])], ignore_index=True)

                            torch.save(model, f'./2024/test/{model_name}.pth')

                            # Step 5: Save updated results back to CSV
                            results_df.to_csv('./2024/model_results2024.csv', index=False)
                            
                            round_22_preds.to_csv(f'./2024/Pred-{model_name}.csv')
                            # # Save results after each model
                            # pd.DataFrame(results).to_csv('model_results.csv', index=False)



Training model: model_b5_e150_lr0.0001_h256_l1_d0.1_w0.6_0.3_0.1.pth
best_model saved
--------------------
Epoch 1
Average Loss: 61.705839
Laptime Loss: 101.412381
Position Loss: 2.779234
Historical Loss: 0.246384
best_model saved
--------------------
Epoch 2
Average Loss: 45.277839
Laptime Loss: 74.082872
Position Loss: 2.676391
Historical Loss: 0.251978
best_model saved
--------------------
Epoch 3
Average Loss: 44.837854
Laptime Loss: 73.394351
Position Loss: 2.589370
Historical Loss: 0.244314
best_model saved
--------------------
Epoch 4
Average Loss: 43.823428
Laptime Loss: 71.670018
Position Loss: 2.655624
Historical Loss: 0.247281
best_model saved
--------------------
Epoch 5
Average Loss: 43.308506
Laptime Loss: 70.776675
Position Loss: 2.728307
Historical Loss: 0.240080
best_model saved
--------------------
Epoch 6
Average Loss: 42.755959
Laptime Loss: 69.905891
Position Loss: 2.627441
Historical Loss: 0.241904
best_model saved
--------------------
Epoch 7
Average Loss: 42.2

  model = torch.load('./2024/best_model.pth')


Test loss: 64.73300966964318
Outputs
O: tensor([[9.0134, 8.9607, 8.7337, 8.5750, 8.4719, 8.4056, 8.3623, 8.3335, 8.3141,
         8.3009, 8.2919, 9.0811, 9.9602, 9.0408, 8.7018, 8.5278, 8.4273, 8.3656,
         8.3260, 8.3003, 8.2833, 8.2722, 8.2651, 8.2607, 8.2581, 8.2568, 8.2564,
         8.2565, 8.2571, 8.2579, 8.2589, 8.2600, 9.0711, 9.6018, 8.7772, 8.4930,
         8.3555, 8.2803, 8.2364, 8.2097, 8.1932, 8.1831, 8.1772, 8.1741, 8.1729,
         8.1729, 8.1737, 8.1750, 8.1766, 8.1783, 8.1801, 8.1818, 8.1835, 8.1850,
         8.1863, 8.1874, 8.1881, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830,
         1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830,
         1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830,
         1.6830, 1.6830, 1.6830, 1.6830, 1.6830, 1.6830]], device='cuda:0')
O: tensor([[ 9.1588,  9.0908,  8.8895,  8.7606,  8.6829,  8.6369,  8.6088,  8.5913,
          8.5803,  8.5733,  8.5689,  8.5662,  8.5646,  8.5637,  9.3088,

  results_df = pd.concat([results_df, pd.DataFrame([new_result])], ignore_index=True)


best_model saved
--------------------
Epoch 1
Average Loss: 41.725889
Laptime Loss: 101.412294
Position Loss: 2.779234
Historical Loss: 0.246384
best_model saved
--------------------
Epoch 2
Average Loss: 30.717480
Laptime Loss: 74.015578
Position Loss: 2.651174
Historical Loss: 0.253894
best_model saved
--------------------
Epoch 3
Average Loss: 30.471289
Laptime Loss: 73.425378
Position Loss: 2.630902
Historical Loss: 0.243881
best_model saved
--------------------
Epoch 4
Average Loss: 29.777141
Laptime Loss: 71.651580
Position Loss: 2.667491
Historical Loss: 0.247559
best_model saved
--------------------
Epoch 5
Average Loss: 29.406205
Laptime Loss: 70.701296
Position Loss: 2.694190
Historical Loss: 0.240049
best_model saved
--------------------
Epoch 6
Average Loss: 29.095292
Laptime Loss: 69.953633
Position Loss: 2.663041
Historical Loss: 0.243109
best_model saved
--------------------
Epoch 7
Average Loss: 28.687810
Laptime Loss: 69.012910
Position Loss: 2.582942
Historical Loss: 

  model = torch.load('./2024/best_model.pth')


Test loss: 65.20688824699475
Outputs
O: tensor([[8.0321, 8.4595, 8.2922, 8.1697, 8.1045, 8.0666, 8.0425, 8.0260, 8.0141,
         8.0049, 7.9976, 9.0477, 9.7121, 8.4076, 8.1870, 8.0743, 8.0157, 7.9835,
         7.9652, 7.9548, 7.9492, 7.9466, 7.9459, 7.9462, 7.9469, 7.9477, 7.9483,
         7.9484, 7.9480, 7.9470, 7.9454, 7.9431, 8.9803, 9.5572, 8.1869, 8.0039,
         7.9114, 7.8651, 7.8422, 7.8324, 7.8307, 7.8342, 7.8412, 7.8503, 7.8607,
         7.8715, 7.8822, 7.8923, 7.9014, 7.9093, 7.9158, 7.9210, 7.9247, 7.9270,
         7.9279, 7.9276, 7.9261, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745,
         1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745,
         1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745,
         1.5745, 1.5745, 1.5745, 1.5745, 1.5745, 1.5745]], device='cuda:0')
O: tensor([[7.9774, 8.3141, 8.1428, 8.0308, 7.9777, 7.9526, 7.9416, 7.9381, 7.9388,
         7.9419, 7.9463, 7.9512, 7.9560, 7.9604, 8.9844, 9.6439, 8.1986

  model = torch.load('./2024/best_model.pth')


Test loss: 64.75757303375464
Outputs
O: tensor([[ 9.1735,  9.3268,  9.0457,  8.8456,  8.7274,  8.6535,  8.6047,  8.5712,
          8.5478,  8.5311,  8.5190,  9.2772, 10.4995,  9.3044,  8.9547,  8.7776,
          8.6750,  8.6113,  8.5702,  8.5429,  8.5248,  8.5129,  8.5052,  8.5005,
          8.4979,  8.4969,  8.4969,  8.4976,  8.4989,  8.5005,  8.5023,  8.5042,
          9.2908, 10.1663,  9.0182,  8.7376,  8.6023,  8.5268,  8.4818,  8.4541,
          8.4372,  8.4272,  8.4220,  8.4203,  8.4209,  8.4234,  8.4271,  8.4318,
          8.4373,  8.4432,  8.4495,  8.4561,  8.4629,  8.4697,  8.4766,  8.4835,
          8.4903,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,
          1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,
          1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,
          1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977,  1.7977]],
       device='cuda:0')
O: tensor([[ 8.9083,  9.0967,  8.8544,  8.6830,  8.5

  model = torch.load('./2024/best_model.pth')


Test loss: 64.89579495329123
Outputs
O: tensor([[8.7816, 8.9665, 8.7194, 8.5831, 8.5168, 8.4811, 8.4609, 8.4492, 8.4422,
         8.4381, 8.4355, 9.1519, 9.9823, 8.9639, 8.6746, 8.5463, 8.4817, 8.4467,
         8.4269, 8.4157, 8.4092, 8.4057, 8.4039, 8.4033, 8.4033, 8.4039, 8.4049,
         8.4061, 8.4076, 8.4093, 8.4113, 8.4133, 9.1461, 9.6662, 8.8057, 8.5639,
         8.4588, 8.4071, 8.3801, 8.3656, 8.3582, 8.3548, 8.3537, 8.3541, 8.3554,
         8.3573, 8.3596, 8.3621, 8.3649, 8.3678, 8.3709, 8.3740, 8.3773, 8.3806,
         8.3840, 8.3875, 8.3911, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884,
         1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884,
         1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884,
         1.6884, 1.6884, 1.6884, 1.6884, 1.6884, 1.6884]], device='cuda:0')
O: tensor([[8.8710, 8.9322, 8.6627, 8.5402, 8.4807, 8.4473, 8.4271, 8.4144, 8.4062,
         8.4011, 8.3982, 8.3968, 8.3965, 8.3970, 9.0975, 9.8349, 8.7924

  model = torch.load('./2024/best_model.pth')


Test loss: 64.6551240762839
Outputs
O: tensor([[ 9.6066,  9.6141,  9.2736,  9.0526,  8.9343,  8.8653,  8.8216,  8.7917,
          8.7699,  8.7528,  8.7387,  9.2972, 10.6261,  9.6016,  9.2165,  9.0227,
          8.9187,  8.8579,  8.8196,  8.7937,  8.7749,  8.7605,  8.7488,  8.7388,
          8.7301,  8.7223,  8.7151,  8.7085,  8.7023,  8.6965,  8.6910,  8.6858,
          9.2695, 10.2599,  9.3743,  9.0517,  8.8885,  8.8012,  8.7506,  8.7192,
          8.6985,  8.6839,  8.6730,  8.6643,  8.6572,  8.6510,  8.6456,  8.6406,
          8.6361,  8.6318,  8.6278,  8.6240,  8.6204,  8.6169,  8.6135,  8.6103,
          8.6071,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,
          1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,
          1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,
          1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914,  1.5914]],
       device='cuda:0')
O: tensor([[ 9.6375,  9.6546,  9.3182,  9.1041,  8.98