In [None]:
# ModelHelpers - shared utilities for model training and evaluation
import pandas as pd
import numpy as np
import copy

from sklearn.ensemble import RandomForestRegressor, BaggingRegressor
from sklearn.model_selection import cross_val_predict, KFold, cross_val_score, GroupKFold
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader


def evaluate_model(model, features, data, target='duration'):
    """Runs 5-fold CV for a given model and feature set, printing the mean RMSE."""
    eval_data = data.dropna(subset=features + [target])
    X = eval_data[features]
    y = eval_data[target]
    
    cv = KFold(n_splits=5, shuffle=True, random_state=99)
    
    scores = cross_val_score(
        estimator=model,
        X=X,
        y=y,
        cv=cv,              
        scoring='neg_root_mean_squared_error'  
    )
    
    print(f"CV Scores (RMSE): {scores}")
    print(f"Mean CV RMSE: {np.mean(scores):.4f}\n")


# Constants for padding hero lists
HERO_LIST_MAX_LEN = 5 
PADDING_VALUE = 0 
TARGET_COLUMN = 'duration'


class MatchDataset(Dataset):
    """PyTorch Dataset for match data with hero picks/bans padded to fixed length."""
    
    def __init__(self, dataframe, target_column):
        self.df = dataframe
        self.team1_ids = self.df['team1_id'].values
        self.team2_ids = self.df['team2_id'].values
        self.team1_side_ids = self.df['team1_side_id'].values
        self.team2_side_ids = self.df['team2_side_id'].values
        self.t1_picks = self.df['team1_picks_ids'].values
        self.t2_picks = self.df['team2_picks_ids'].values
        self.t1_bans = self.df['team1_bans_ids'].values
        self.t2_bans = self.df['team2_bans_ids'].values
        self.target = self.df[target_column].values

    def __len__(self):
        return len(self.df)

    def _pad_hero_list(self, hero_list):
        """Pad or truncate hero list to HERO_LIST_MAX_LEN."""
        padded_list = hero_list + [PADDING_VALUE] * (HERO_LIST_MAX_LEN - len(hero_list))
        return padded_list[:HERO_LIST_MAX_LEN]

    def __getitem__(self, idx):
        team1_id = torch.tensor(self.team1_ids[idx], dtype=torch.long)
        team2_id = torch.tensor(self.team2_ids[idx], dtype=torch.long)
        team1_side_id = torch.tensor(self.team1_side_ids[idx], dtype=torch.long)
        team2_side_id = torch.tensor(self.team2_side_ids[idx], dtype=torch.long)

        t1_picks = torch.tensor(self._pad_hero_list(self.t1_picks[idx]), dtype=torch.long)
        t2_picks = torch.tensor(self._pad_hero_list(self.t2_picks[idx]), dtype=torch.long)
        t1_bans = torch.tensor(self._pad_hero_list(self.t1_bans[idx]), dtype=torch.long)
        t2_bans = torch.tensor(self._pad_hero_list(self.t2_bans[idx]), dtype=torch.long)
        
        target = torch.tensor(self.target[idx], dtype=torch.float)

        features = {
            'team1_id': team1_id,
            'team2_id': team2_id,
            'team1_side_id': team1_side_id,
            'team2_side_id': team2_side_id,
            't1_picks': t1_picks,
            't2_picks': t2_picks,
            't1_bans': t1_bans,
            't2_bans': t2_bans
        }
        
        return features, target


class SimpleModel(nn.Module):
    """
    A simple model where pick/ban order does not matter.
    It uses EmbeddingBag to sum hero embeddings.
    """
    def __init__(self, n_teams, n_heroes, n_sides, 
                 team_embed_dim=16, hero_embed_dim=16, side_embed_dim=2):
        
        super().__init__()
        
        # --- 1. Define Embedding Layers ---
        
        # Simple embeddings for single items
        # padding_idx=0 means the vector for ID 0 will always be zeros
        # and won't be updated during training.
        self.team_embed = nn.Embedding(n_teams, team_embed_dim, padding_idx=0)
        self.side_embed = nn.Embedding(n_sides, side_embed_dim, padding_idx=0)
        
        # EmbeddingBag for lists (picks/bans)
        # This is the key part for "order doesn't matter".
        # It sums the embeddings of all heroes in the list.
        self.hero_embed_bag = nn.EmbeddingBag(
            n_heroes, 
            hero_embed_dim, 
            padding_idx=0, 
            mode='sum' # This is what ignores order
        )
        
        # --- 2. Define Classifier Head ---
        
        # Calculate the total size of our concatenated vector
        # For each team we have: 1 team_vec + 1 side_vec + 1 picks_vec + 1 bans_vec
        # Total size = 2 * (team_dim + side_dim + picks_dim + bans_dim)
        
        input_size = 2 * (team_embed_dim + side_embed_dim + 
                          hero_embed_dim + hero_embed_dim)
        
        # A simple stack of linear layers
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1) # Output a single number (logit)
        
        self.dropout = nn.Dropout(0.3) # Helps prevent overfitting

    def forward(self, features):
        
        # --- 1. Get Embeddings for Team 1 ---
        # features['team1_id'] shape is [batch_size]
        # t1_team_vec shape becomes [batch_size, team_embed_dim]
        t1_team_vec = self.team_embed(features['team1_id'])
        t1_side_vec = self.side_embed(features['team1_side_id'])
        
        # features['t1_picks'] shape is [batch_size, 5]
        # t1_picks_vec shape becomes [batch_size, hero_embed_dim]
        t1_picks_vec = self.hero_embed_bag(features['t1_picks'])
        t1_bans_vec = self.hero_embed_bag(features['t1_bans'])
        
        # --- 2. Get Embeddings for Team 2 ---
        t2_team_vec = self.team_embed(features['team2_id'])
        t2_side_vec = self.side_embed(features['team2_side_id'])
        t2_picks_vec = self.hero_embed_bag(features['t2_picks'])
        t2_bans_vec = self.hero_embed_bag(features['t2_bans'])
        
        # --- 3. Concatenate all vectors into one big vector ---
        x = torch.cat([
            t1_team_vec, t1_side_vec, t1_picks_vec, t1_bans_vec,
            t2_team_vec, t2_side_vec, t2_picks_vec, t2_bans_vec
        ], dim=1) # dim=1 to stack columns horizontally
        # Final shape of x is [batch_size, input_size]
        
        # --- 4. Pass through the classifier ---
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        
        # Output raw logit, shape [batch_size, 1]
        x = self.fc3(x) 
        
        # Squeeze from [batch_size, 1] to [batch_size]
        # This makes it match our target's shape
        return x.squeeze(dim=1)

class PositionalModel(nn.Module):
    # Removed n_games and game_embed_dim from parameters
    def __init__(self, n_teams, n_heroes, n_sides,
                 team_embed_dim=16, hero_embed_dim=16, 
                 side_embed_dim=2): 
        
        super().__init__()
        
        # --- 1. Print received vocab sizes for debugging ---
        print(f"  [Model Init] n_teams: {n_teams}")
        print(f"  [Model Init] n_heroes: {n_heroes}")
        print(f"  [Model Init] n_sides: {n_sides}")
        # Removed n_games print
        
        # --- 2. Define Embedding Layers ---
        self.team_embed = nn.Embedding(n_teams, team_embed_dim, padding_idx=0)
        self.side_embed = nn.Embedding(n_sides, side_embed_dim, padding_idx=0)
        self.hero_embed = nn.Embedding(n_heroes, hero_embed_dim, padding_idx=0)
        
        n_positions = HERO_LIST_MAX_LEN + 1
        print(f"  [Model Init] n_positions: {n_positions}")
        self.position_embed = nn.Embedding(n_positions, hero_embed_dim, padding_idx=0)
        
        # Removed self.game_embed

        # --- 3. Define Classifier Head ---
        # Removed game_embed_dim from input_size calculation
        input_size = 2 * (team_embed_dim + side_embed_dim + 
                          hero_embed_dim + hero_embed_dim)
        
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1) 
        self.dropout = nn.Dropout(0.3)

    def _get_draft_vector(self, hero_ids, pos_ids):
        """Helper to combine hero + position embeddings"""
        hero_vecs = self.hero_embed(hero_ids)
        pos_vecs = self.position_embed(pos_ids)
        combined_vecs = hero_vecs + pos_vecs
        final_draft_vec = combined_vecs.sum(dim=1)
        return final_draft_vec

    def forward(self, features):
        
        # --- 1. Get Single-Item Embeddings ---
        t1_team_vec = self.team_embed(features['team1_id'])
        t1_side_vec = self.side_embed(features['team1_side_id'])
        t2_team_vec = self.team_embed(features['team2_id'])
        t2_side_vec = self.side_embed(features['team2_side_id'])
        # Removed game_vec

        # --- 2. Get Draft/Ban Vectors ---
        t1_picks_vec = self._get_draft_vector(features['t1_picks'], features['t1_picks_pos'])
        t1_bans_vec = self._get_draft_vector(features['t1_bans'], features['t1_bans_pos'])
        t2_picks_vec = self._get_draft_vector(features['t2_picks'], features['t2_picks_pos'])
        t2_bans_vec = self._get_draft_vector(features['t2_bans'], features['t2_bans_pos'])
        
        # --- 3. Concatenate all vectors ---
        # Removed game_vec from torch.cat
        x = torch.cat([
            t1_team_vec, t1_side_vec, t1_picks_vec, t1_bans_vec,
            t2_team_vec, t2_side_vec, t2_picks_vec, t2_bans_vec
        ], dim=1) 
        
        # --- 4. Pass through the classifier ---
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x) 
        
        return x.squeeze(dim=1)

N_SPLITS = 5
N_EPOCHS = 10 
BATCH_SIZE = 64
LEARNING_RATE = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")

def train_loop(model, loader, criterion, optimizer, device):
    """Runs one epoch of training."""
    model.train() 
    total_loss = 0.0
    
    for features, targets in loader:
        features = {k: v.to(device) for k, v in features.items()}
        targets = targets.to(device)
        
        # 1. Forward pass
        outputs = model(features)
        loss = criterion(outputs, targets)
        
        # 2. Backward pass and optimization
        optimizer.zero_grad() 
        loss.backward()       
        optimizer.step()      
        
        total_loss += loss.item()
        
    return total_loss / len(loader)

# (get_predictions is already correct from your paste)
def get_predictions(model, loader, device):
    """Gets predictions for a validation/test set."""
    model.eval() 
    all_preds = []
    all_targets = []
    
    with torch.no_grad(): 
        for features, targets in loader:
            features = {k: v.to(device) for k, v in features.items()}
            targets = targets.to(device)
            
            # Get raw model output (which is our duration prediction)
            outputs = model(features)
            
            # --- This is already correct! ---
            preds = outputs 
            
            all_preds.append(preds.cpu().numpy())
            all_targets.append(targets.cpu().numpy())
            
    return np.concatenate(all_preds), np.concatenate(all_targets)