In [None]:
import pandas as pd
import numpy as np
import copy

from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import cross_val_predict, KFold, cross_val_score, GroupKFold
from sklearn.metrics import mean_squared_error, mean_absolute_error
from sklearn.linear_model import LinearRegression
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import make_pipeline

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader

# Constants
HERO_LIST_MAX_LEN = 5 
PADDING_VALUE = 0 
TARGET_COLUMN = 'duration'
N_SPLITS = 5
N_EPOCHS = 10 
BATCH_SIZE = 64
LEARNING_RATE = 0.001
DEVICE = torch.device("cuda" if torch.cuda.is_available() else "cpu")


def evaluate_model(model, features, data, target='duration'):
    eval_data = data.dropna(subset=features + [target])
    X = eval_data[features]
    y = eval_data[target]
    cv = KFold(n_splits=5, shuffle=True, random_state=99)
    scores = cross_val_score(estimator=model, X=X, y=y, cv=cv, scoring='neg_root_mean_squared_error')
    print(f"CV Scores (RMSE): {scores}")
    print(f"Mean CV RMSE: {np.mean(scores):.4f}\n")


def train_loop(model, loader, criterion, optimizer, device):
    model.train()
    total_loss = 0.0
    for features, targets in loader:
        features = {k: v.to(device) for k, v in features.items()}
        targets = targets.to(device)
        outputs = model(features)
        loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    return total_loss / len(loader)


def get_predictions(model, loader, device):
    model.eval()
    all_preds = []
    all_targets = []
    with torch.no_grad():
        for features, targets in loader:
            features = {k: v.to(device) for k, v in features.items()}
            targets = targets.to(device)
            outputs = model(features)
            all_preds.append(outputs.cpu().numpy())
            all_targets.append(targets.cpu().numpy())
    return np.concatenate(all_preds), np.concatenate(all_targets)


class MatchDataset(Dataset):
    """Dataset for post-draft predictions with hero picks/bans and sides."""
    def __init__(self, dataframe, target_column):
        self.df = dataframe
        self.team1_ids = self.df['team1_id'].values
        self.team2_ids = self.df['team2_id'].values
        self.team1_side_ids = self.df['team1_side_id'].values
        self.team2_side_ids = self.df['team2_side_id'].values
        self.t1_picks = self.df['team1_picks_ids'].values
        self.t2_picks = self.df['team2_picks_ids'].values
        self.t1_bans = self.df['team1_bans_ids'].values
        self.t2_bans = self.df['team2_bans_ids'].values
        self.target = self.df[target_column].values

    def __len__(self):
        return len(self.df)

    def _pad_hero_list(self, hero_list):
        padded_list = hero_list + [PADDING_VALUE] * (HERO_LIST_MAX_LEN - len(hero_list))
        return padded_list[:HERO_LIST_MAX_LEN]

    def __getitem__(self, idx):
        features = {
            'team1_id': torch.tensor(self.team1_ids[idx], dtype=torch.long),
            'team2_id': torch.tensor(self.team2_ids[idx], dtype=torch.long),
            'team1_side_id': torch.tensor(self.team1_side_ids[idx], dtype=torch.long),
            'team2_side_id': torch.tensor(self.team2_side_ids[idx], dtype=torch.long),
            't1_picks': torch.tensor(self._pad_hero_list(self.t1_picks[idx]), dtype=torch.long),
            't2_picks': torch.tensor(self._pad_hero_list(self.t2_picks[idx]), dtype=torch.long),
            't1_bans': torch.tensor(self._pad_hero_list(self.t1_bans[idx]), dtype=torch.long),
            't2_bans': torch.tensor(self._pad_hero_list(self.t2_bans[idx]), dtype=torch.long)
        }
        target = torch.tensor(self.target[idx], dtype=torch.float)
        return features, target


class SimpleModel(nn.Module):
    """Post-draft model where pick/ban order doesn't matter (uses EmbeddingBag)."""
    def __init__(self, n_teams, n_heroes, n_sides, team_embed_dim=16, hero_embed_dim=16, side_embed_dim=2):
        super().__init__()
        self.team_embed = nn.Embedding(n_teams, team_embed_dim, padding_idx=0)
        self.side_embed = nn.Embedding(n_sides, side_embed_dim, padding_idx=0)
        self.hero_embed_bag = nn.EmbeddingBag(n_heroes, hero_embed_dim, padding_idx=0, mode='sum')
        
        input_size = 2 * (team_embed_dim + side_embed_dim + hero_embed_dim + hero_embed_dim)
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.3)

    def forward(self, features):
        t1_team_vec = self.team_embed(features['team1_id'])
        t1_side_vec = self.side_embed(features['team1_side_id'])
        t1_picks_vec = self.hero_embed_bag(features['t1_picks'])
        t1_bans_vec = self.hero_embed_bag(features['t1_bans'])
        
        t2_team_vec = self.team_embed(features['team2_id'])
        t2_side_vec = self.side_embed(features['team2_side_id'])
        t2_picks_vec = self.hero_embed_bag(features['t2_picks'])
        t2_bans_vec = self.hero_embed_bag(features['t2_bans'])
        
        x = torch.cat([
            t1_team_vec, t1_side_vec, t1_picks_vec, t1_bans_vec,
            t2_team_vec, t2_side_vec, t2_picks_vec, t2_bans_vec
        ], dim=1)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x.squeeze(dim=1)

class PositionalModel(nn.Module):
    """Post-draft model where pick/ban order matters (uses positional embeddings)."""
    def __init__(self, n_teams, n_heroes, n_sides, team_embed_dim=16, hero_embed_dim=16, side_embed_dim=2):
        super().__init__()
        print(f"  [Model Init] n_teams: {n_teams}")
        print(f"  [Model Init] n_heroes: {n_heroes}")
        print(f"  [Model Init] n_sides: {n_sides}")
        
        self.team_embed = nn.Embedding(n_teams, team_embed_dim, padding_idx=0)
        self.side_embed = nn.Embedding(n_sides, side_embed_dim, padding_idx=0)
        self.hero_embed = nn.Embedding(n_heroes, hero_embed_dim, padding_idx=0)
        
        n_positions = HERO_LIST_MAX_LEN + 1
        print(f"  [Model Init] n_positions: {n_positions}")
        self.position_embed = nn.Embedding(n_positions, hero_embed_dim, padding_idx=0)
        
        input_size = 2 * (team_embed_dim + side_embed_dim + hero_embed_dim + hero_embed_dim)
        self.fc1 = nn.Linear(input_size, 64)
        self.fc2 = nn.Linear(64, 32)
        self.fc3 = nn.Linear(32, 1)
        self.dropout = nn.Dropout(0.3)

    def _get_draft_vector(self, hero_ids, pos_ids):
        hero_vecs = self.hero_embed(hero_ids)
        pos_vecs = self.position_embed(pos_ids)
        combined_vecs = hero_vecs + pos_vecs
        return combined_vecs.sum(dim=1)

    def forward(self, features):
        t1_team_vec = self.team_embed(features['team1_id'])
        t1_side_vec = self.side_embed(features['team1_side_id'])
        t2_team_vec = self.team_embed(features['team2_id'])
        t2_side_vec = self.side_embed(features['team2_side_id'])

        t1_picks_vec = self._get_draft_vector(features['t1_picks'], features['t1_picks_pos'])
        t1_bans_vec = self._get_draft_vector(features['t1_bans'], features['t1_bans_pos'])
        t2_picks_vec = self._get_draft_vector(features['t2_picks'], features['t2_picks_pos'])
        t2_bans_vec = self._get_draft_vector(features['t2_bans'], features['t2_bans_pos'])
        
        x = torch.cat([
            t1_team_vec, t1_side_vec, t1_picks_vec, t1_bans_vec,
            t2_team_vec, t2_side_vec, t2_picks_vec, t2_bans_vec
        ], dim=1)
        
        x = F.relu(self.fc1(x))
        x = self.dropout(x)
        x = F.relu(self.fc2(x))
        x = self.dropout(x)
        x = self.fc3(x)
        return x.squeeze(dim=1)

In [None]:
class PrematchTeamDataset(Dataset):
    """Dataset for prematch predictions using only team IDs."""
    def __init__(self, dataframe, target_column):
        self.df = dataframe.reset_index(drop=True)
        self.team1_ids = self.df['team1_id'].values
        self.team2_ids = self.df['team2_id'].values
        self.target = self.df[target_column].values

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        features = {
            'team1_id': torch.tensor(self.team1_ids[idx], dtype=torch.long),
            'team2_id': torch.tensor(self.team2_ids[idx], dtype=torch.long),
        }
        target = torch.tensor(self.target[idx], dtype=torch.float)
        return features, target


class TeamSpeedEmbedding(nn.Module):
    """Prematch model that learns team speed ratings."""
    def __init__(self, n_teams, embedding_dim=32):
        super().__init__()
        self.team_embed = nn.Embedding(n_teams, embedding_dim, padding_idx=0)
        self.fc = nn.Sequential(
            nn.Linear(embedding_dim * 2, 64),
            nn.ReLU(),
            nn.Dropout(0.2),
            nn.Linear(64, 32),
            nn.ReLU(),
            nn.Linear(32, 1)
        )
    
    def forward(self, features):
        t1_emb = self.team_embed(features['team1_id'])
        t2_emb = self.team_embed(features['team2_id'])
        combined = torch.cat([t1_emb, t2_emb], dim=1)
        return self.fc(combined).squeeze(dim=1)
