----------------
# **Library**

In [1]:
!pip install timm



In [2]:
# Data Processing n' Visualization
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Compute
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Data
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Random
import os
import random as rand
import timm

In [3]:
torch.cuda.empty_cache()

In [4]:
def set_seed(seed):
  rand.seed(seed)
  np.random.seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

seed = 59
set_seed(59)

-----------------
# **Data Sample**

In [5]:
data_dir = '/kaggle/input/bkb-data/games_2022 (1).xlsx'
infer_dir = '/kaggle/input/infer-wharton/inference.csv'

In [6]:
df = pd.read_excel(data_dir)
inf = pd.read_csv(infer_dir)
print(f"Dataset Type: {type(df)}")

Dataset Type: <class 'pandas.core.frame.DataFrame'>


In [7]:
df_ts = df

In [8]:
inf.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,game_id,description,team_home,team_away,seed_home,seed_away,home_away_NS,rest_days_Home,rest_days_Away,travel_dist_Home,travel_dist_Away,WINNING %
0,G_East_1,play-in-game,rhode_island_rams,north_carolina_tar_heels,16,17,0,6,1,0,770,
1,G_East_2,play-in team A v 1,nc_state_wolfpack,rhode_island_rams,1,16,0,7,1,1440,0,
2,G_East_3,play-in team B v 1,nc_state_wolfpack,north_carolina_tar_heels,1,17,0,7,1,1440,770,
3,G_East_4,2 v 15,liberty_flames,bucknell_bison,2,9,0,7,10,255,250,
4,G_East_5,3 v 14,drexel_dragons,delaware_blue_hens,3,10,0,11,9,0,680,


In [9]:
df_ts.head()

  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_large_values = (abs_vals > 1e6).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()
  has_small_values = ((abs_vals < 10 ** (-self.digits)) & (abs_vals > 0)).any()


Unnamed: 0,game_id,game_date,team,FGA_2,FGM_2,FGA_3,FGM_3,FTA,FTM,AST,...,largest_lead,notD1_incomplete,OT_length_min_tot,rest_days,attendance,tz_dif_H_E,prev_game_dist,home_away,home_away_NS,travel_dist
0,game_2022_2011,2021-12-30,georgia_lady_bulldogs,50,22,11,5,6,3,14,...,1.0,False,,9.0,3241.0,0.0,0.0,home,1,0.0
1,game_2022_2011,2021-12-30,lsu_tigers,50,24,11,4,15,8,15,...,14.0,False,,3.0,3241.0,0.0,824.0,away,-1,824.0
2,game_2022_2012,2021-12-30,missouri_tigers,43,18,15,7,16,13,10,...,8.0,False,5.0,8.0,6139.0,0.0,371.0,home,1,0.0
3,game_2022_2012,2021-12-30,south_carolina_gamecocks,55,23,21,6,9,5,15,...,6.0,False,5.0,9.0,6139.0,0.0,1154.0,away,-1,1154.0
4,game_2022_2013,2021-12-30,tennessee_lady_volunteers,41,20,15,4,15,10,16,...,19.0,False,,3.0,8124.0,0.0,0.0,home,1,0.0


-------------------------
# **Data Preprocessing**

In [10]:
df_ts = df_ts.drop(columns = ['OT_length_min_tot', 'attendance', 'tz_dif_H_E', 'opponent_team_score', 
                        'team_score', 'home_away', 'notD1_incomplete', 'largest_lead'])
df_ts = df_ts.dropna()
df_ts['home_away_NS'] = df_ts['home_away_NS'].replace({
    1: 1, -1: 0, 0: 2
})

inf = inf.drop(columns = ['game_id', 'seed_home', 'seed_away', 'WINNING %', 'description'])

In [11]:
import math
from sklearn.preprocessing import MinMaxScaler

print(df_ts['rest_days'].shape)
print(df_ts['travel_dist'].shape)

rest = df_ts['rest_days'].values
travel_dist = df_ts['travel_dist'].values

rest = rest.reshape(-1, 1)
travel_dist = travel_dist.reshape(-1, 1)

# List of columns to normalize
stats1 = ['rest_days_Home', 'rest_days_Away']
stats2 = ['travel_dist_Home', 'travel_dist_Away']

# Initialize MinMaxScaler
scaler_rest = MinMaxScaler(feature_range=(0,1))
scaler_rest.fit(rest)

scaler_trvdist = MinMaxScaler(feature_range=(0,1))
scaler_trvdist.fit(travel_dist)

inf[stats1] = scaler_trvdist.fit_transform(inf[stats1])
inf[stats2] = scaler_rest.fit_transform(inf[stats2])

display(inf)

(9126,)
(9126,)


Unnamed: 0,team_home,team_away,home_away_NS,rest_days_Home,rest_days_Away,travel_dist_Home,travel_dist_Away
0,rhode_island_rams,north_carolina_tar_heels,0,0.0,0.0,0.0,0.226471
1,nc_state_wolfpack,rhode_island_rams,0,0.2,0.0,1.0,0.0
2,nc_state_wolfpack,north_carolina_tar_heels,0,0.2,0.0,1.0,0.226471
3,liberty_flames,bucknell_bison,0,0.2,1.0,0.177083,0.073529
4,drexel_dragons,delaware_blue_hens,0,1.0,0.888889,0.0,0.2
5,massachusetts_minutewomen,princeton_tigers,0,0.6,0.666667,0.052083,0.01
6,buffalo_bulls,stony_brook_seawolves,0,0.4,0.777778,0.041667,1.0
7,fairfield_stags,towson_tigers,0,0.2,0.888889,0.013889,0.064706
8,uconn_huskies,campbell_fighting_camels,0,0.6,1.0,0.010417,0.578235
9,american_university_eagles,columbia_lions,0,0.2,0.777778,0.243056,0.042647


In [12]:
# List of columns to normalize
stats_to_normalize = ['FGA_2', 'FGM_2', 'FGA_3', 'FGM_3', 
                      'FTA', 'FTM', 'AST', 'BLK', 'STL', 'TOV', 
                      'TOV_team', 'DREB', 'OREB', 'F_tech', 'F_personal', 
                      'rest_days', 'prev_game_dist', 'travel_dist']

# Initialize MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

# Apply MinMaxScaler only to the selected stats
df_ts[stats_to_normalize] = scaler.fit_transform(df_ts[stats_to_normalize])

print(df_ts.head())  # Check normalized values

          game_id  game_date                       team     FGA_2     FGM_2  \
0  game_2022_2011 2021-12-30      georgia_lady_bulldogs  0.661290  0.513514   
1  game_2022_2011 2021-12-30                 lsu_tigers  0.661290  0.567568   
2  game_2022_2012 2021-12-30            missouri_tigers  0.548387  0.405405   
3  game_2022_2012 2021-12-30   south_carolina_gamecocks  0.741935  0.540541   
4  game_2022_2013 2021-12-30  tennessee_lady_volunteers  0.516129  0.459459   

      FGA_3     FGM_3       FTA    FTM       AST  ...       TOV  TOV_team  \
0  0.196078  0.227273  0.125000  0.075  0.361111  ...  0.390244  0.000000   
1  0.196078  0.181818  0.312500  0.200  0.388889  ...  0.292683  0.333333   
2  0.274510  0.318182  0.333333  0.325  0.250000  ...  0.146341  0.166667   
3  0.392157  0.272727  0.187500  0.125  0.388889  ...  0.146341  0.000000   
4  0.274510  0.181818  0.312500  0.250  0.416667  ...  0.317073  0.166667   

       DREB     OREB  F_tech  F_personal  rest_days  prev_game

In [13]:
epsilon = 1e-8
T = 5
inf['travel_dist% (A/B)'] = np.tanh((inf['travel_dist_Home'] - inf['travel_dist_Away'] + epsilon) / T)

In [14]:
inf

Unnamed: 0,team_home,team_away,home_away_NS,rest_days_Home,rest_days_Away,travel_dist_Home,travel_dist_Away,travel_dist% (A/B)
0,rhode_island_rams,north_carolina_tar_heels,0,0.0,0.0,0.0,0.226471,-0.045263
1,nc_state_wolfpack,rhode_island_rams,0,0.2,0.0,1.0,0.0,0.197375
2,nc_state_wolfpack,north_carolina_tar_heels,0,0.2,0.0,1.0,0.226471,0.153483
3,liberty_flames,bucknell_bison,0,0.2,1.0,0.177083,0.073529,0.020708
4,drexel_dragons,delaware_blue_hens,0,1.0,0.888889,0.0,0.2,-0.039979
5,massachusetts_minutewomen,princeton_tigers,0,0.6,0.666667,0.052083,0.01,0.008416
6,buffalo_bulls,stony_brook_seawolves,0,0.4,0.777778,0.041667,1.0,-0.189354
7,fairfield_stags,towson_tigers,0,0.2,0.888889,0.013889,0.064706,-0.010163
8,uconn_huskies,campbell_fighting_camels,0,0.6,1.0,0.010417,0.578235,-0.113078
9,american_university_eagles,columbia_lions,0,0.2,0.777778,0.243056,0.042647,0.04006


In [15]:
past_stats_home = {}

start_col = 'FGA_2'
end_col = 'F_personal'

for idx, team in enumerate(inf['team_home']):
    team_i = df_ts[df_ts['team'] == team]
    team_i = team_i.sort_values(by = 'game_date', ascending = False)
    latest_game = team_i.iloc[0]
    
    st = latest_game.loc[start_col:end_col].values
    st = st.reshape(-1, 1)
    prev_game_dist = latest_game['travel_dist'].reshape(-1, 1)
    
    current_game = inf.loc[idx]
    rest_days = current_game['rest_days_Home'].reshape(-1, 1)
    travel_dist = current_game['travel_dist_Home'].reshape(-1, 1)
    Home_HAN = np.array([1]).reshape(-1, 1)


    rst_prv = np.concatenate((rest_days, prev_game_dist))
    home_dist = np.concatenate((Home_HAN, travel_dist))
    combine = np.concatenate((rst_prv, home_dist))

    latest_stat = np.concatenate((st, combine))
    if team in past_stats_home:
        past_stats_home[f'{team}{idx}'] = latest_stat
    else:
        past_stats_home[team] = latest_stat
        
print(past_stats_home['rhode_island_rams'])
print(len(past_stats_home['rhode_island_rams']))

print("-"*59)

past_stats_away = {}

start_col = 'FGA_2'
end_col = 'F_personal'

for idx, team in enumerate(inf['team_away']):
    team_i = df_ts[df_ts['team'] == team]
    team_i = team_i.sort_values(by = 'game_date', ascending = False)
    latest_game = team_i.iloc[0]
    
    st = latest_game.loc[start_col:end_col].values
    st = st.reshape(-1, 1)
    prev_game_dist = latest_game['travel_dist'].reshape(-1, 1)

    current_game = inf.loc[idx]
    rest_days = current_game['rest_days_Away'].reshape(-1, 1)
    travel_dist = current_game['travel_dist_Away'].reshape(-1, 1)
    Away_HAN = np.array([0]).reshape(-1, 1)
    
    rst_prv = np.concatenate((rest_days, prev_game_dist))
    home_dist = np.concatenate((Home_HAN, travel_dist))
    combine = np.concatenate((rst_prv, home_dist))

    latest_stat = np.concatenate((st, combine))
    
    if team in past_stats_away:
        past_stats_away[f'{team}{idx}'] = latest_stat
    else:
        past_stats_away[team] = latest_stat
        
print(past_stats_away['north_carolina_tar_heels'])
print(len(past_stats_away['north_carolina_tar_heels']))

# [15 Features] + rest_days + prev_game_dist + home_away_NS + travel_dist

[[0.467741935483871]
 [0.16216216216216217]
 [0.21568627450980393]
 [0.2727272727272727]
 [0.25]
 [0.25]
 [0.16666666666666663]
 [0.2777777777777778]
 [0.18518518518518517]
 [0.21951219512195125]
 [0.0]
 [0.3076923076923077]
 [0.3125]
 [0.0]
 [0.4193548387096774]
 [0.0]
 [0.15946502057613168]
 [1.0]
 [0.0]]
19
-----------------------------------------------------------
[[0.5161290322580645]
 [0.5945945945945945]
 [0.25490196078431376]
 [0.045454545454545456]
 [0.4583333333333333]
 [0.375]
 [0.3055555555555555]
 [0.2222222222222222]
 [0.4444444444444444]
 [0.43902439024390244]
 [0.16666666666666666]
 [0.41025641025641024]
 [0.34375]
 [0.0]
 [0.4516129032258064]
 [0.0]
 [0.03542034097589653]
 [1.0]
 [0.2264705882352941]]
19


---------------------
# **Model**

In [16]:
from huggingface_hub import login

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
HUGGINGFACE_TOKEN = user_secrets.get_secret("HF_TOKEN")

# Login to Hugging Face
login(HUGGINGFACE_TOKEN)

In [17]:
import torch.nn.functional as F

class FeatureGrouping_cls(nn.Module):
    def __init__(self, num_features=20, max_groups=3, embed_dim=4, output_dim=2, resnet_model="resnet50", finetune = True):
        super().__init__()
        self.num_features = num_features
        self.max_groups = max_groups
        self.embed_dim = embed_dim
        self.output_dim = output_dim

        # Embedding for home_away (3 categories: Home, Away, Neutral)
        self.home_away_embed = nn.Embedding(3, embed_dim)

        # Adjust feature count after embedding replacement
        self.adjusted_num_features = num_features - 2 + 2*embed_dim  # 20 - 2 + 2*4 = 26

        # Calculate the maximum features per group to ensure consistent dimensions
        self.max_features_per_group = self.adjusted_num_features

        # Learnable logits for feature assignment
        self.assignment_logits = nn.Parameter(torch.randn(self.adjusted_num_features, max_groups))

        # Self-Attention layers for each possible number of groups
        self.attention_layers = nn.ModuleDict({
            f"attn_{g}": nn.MultiheadAttention(
                embed_dim=self.max_features_per_group,
                num_heads=1,
                batch_first=True
            )
            for g in range(1, max_groups + 1)
        })

        # Reduce channels before ResNet
        self.channel_reducer = nn.Conv2d(in_channels=max_groups, out_channels=3, kernel_size=1)

        # Pretrained ResNet model
        self.resnet = timm.create_model(resnet_model, pretrained=True)
        in_features = self.resnet.get_classifier().in_features
        self.resnet.reset_classifier(0)

        # Final regression head
        self.fc = nn.Linear(in_features, output_dim)
        
        if finetune:
            for param in self.resnet.parameters():
                param.requires_grad = True
        else:
            for param in self.resnet.parameters():
                param.requires_grad = False
                    
    def forward(self, x):
        batch_size = x.shape[0]

        # Extract home_away index and convert to embeddings
        A_home_away_idx = x[:, 17].long().clamp(0, 2)
        B_home_away_idx = x[:, 18].long().clamp(0, 2)
        
        A_home_away_embed = self.home_away_embed(A_home_away_idx)
        B_home_away_embed = self.home_away_embed(B_home_away_idx)
        
        x = torch.cat([x[:, :17], A_home_away_embed, B_home_away_embed, x[:, 19:]], dim=1)

        # Hard feature assignment
        assignment_hard = torch.nn.functional.gumbel_softmax(self.assignment_logits, tau=0.5, hard=True, dim=1)

        all_group_outputs = []

        # Process different group configurations
        for num_groups in range(1, self.max_groups + 1):
            # Split features into groups
            groups = []
            features_per_group = self.adjusted_num_features // num_groups
            
            for g in range(num_groups):
                start_idx = g * features_per_group
                end_idx = min(start_idx + features_per_group, self.adjusted_num_features)
                group_features = x[:, start_idx:end_idx]
                
                # Pad to match max_features_per_group
                if group_features.shape[1] < self.max_features_per_group:
                    pad_size = self.max_features_per_group - group_features.shape[1]
                    padding = torch.zeros(batch_size, pad_size, device=x.device)
                    group_features = torch.cat([group_features, padding], dim=1)
                
                groups.append(group_features)

            # Process each group with attention
            processed_groups = []
            for g in range(num_groups):
                group_features = groups[g].unsqueeze(1)
                attn_output, _ = self.attention_layers[f"attn_{num_groups}"](
                    group_features, group_features, group_features)
                processed_groups.append(attn_output)

            # Combine processed groups
            group_output = torch.cat(processed_groups, dim=1)
            
            # Pad to match max_groups if necessary
            if num_groups < self.max_groups:
                padding = torch.zeros(
                    batch_size,
                    self.max_groups - num_groups,
                    self.max_features_per_group,
                    device=x.device
                )
                group_output = torch.cat([group_output, padding], dim=1)
            
            all_group_outputs.append(group_output)

        # Stack all configurations
        x_final = torch.stack(all_group_outputs, dim=1)  # [B, max_groups, max_groups, Features]
        
        # Reshape for channel reducer
        x_final = x_final.mean(dim=1)  # [B, max_groups, Features]
        x_final = x_final.permute(0, 2, 1)  # [B, Features, max_groups]
        x_final = x_final.mean(dim=1).unsqueeze(-1).unsqueeze(-1)  # [B, max_groups, 1, 1]
        
        # Apply channel reduction
        x_final = self.channel_reducer(x_final)  # [B, 3, 1, 1]
        
        # Prepare for ResNet
        x_final = x_final.expand(-1, -1, 224, 224)  # [B, 3, 224, 224]

        # Process through ResNet and final layer
        x_final = self.resnet(x_final)
        output = self.fc(x_final)

        return output

In [18]:
from transformers import PreTrainedModel, PretrainedConfig
from transformers.modeling_outputs import SequenceClassifierOutput
from sklearn.metrics import roc_auc_score, f1_score

class FeatureGroupingConfig(PretrainedConfig):
    model_type = "feature_grouping"

    def __init__(self, num_features=20, output_dim=2, **kwargs):
        super().__init__(**kwargs)
        self.num_features = num_features
        self.output_dim = output_dim

class FeatureGroupingModel_cls(PreTrainedModel):
    config_class = FeatureGroupingConfig

    def __init__(self, config):
        super().__init__(config)
        self.model = FeatureGrouping_cls(  # Ensure this is defined somewhere
            num_features=config.num_features, 
            output_dim=config.output_dim
        )
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, input_ids, labels=None):
        logits = self.model(input_ids)  # Raw logits

        loss = None
        auc_roc = None
        f1 = None

        if labels is not None:
            loss = self.loss_fn(logits, labels)  

            # Convert logits to probabilities
            probs = F.softmax(logits, dim=-1)[:, 1].detach().cpu().numpy()
            labels_np = labels.detach().cpu().numpy()

            # Compute AUC-ROC only if both classes exist
            if len(set(labels_np)) > 1:
                auc_roc = roc_auc_score(labels_np, probs)
            else:
                auc_roc = 0.0

            # Compute F1-score
            preds = torch.argmax(logits, dim=-1).detach().cpu().numpy()
            f1 = f1_score(labels_np, preds, average="macro")

        return SequenceClassifierOutput(
            loss=loss, 
            logits=logits,
            hidden_states=None, 
            attentions=None
        )

In [19]:
import requests
from safetensors.torch import load_file

# URL to the safetensors model file
cls_url = "https://huggingface.co/KanWasTaken/WhartonDS_ClsModel/resolve/main/CLSmodel.safetensors"

# Download the model file
response = requests.get(cls_url)
if response.status_code == 200:
    with open('CLSmodel.safetensors', 'wb') as f:
        f.write(response.content)
    print("Model downloaded successfully.")

# Initialize your model
cls_model = FeatureGroupingModel_cls(FeatureGroupingConfig())

# Load the model weights from the downloaded file
cls_weights = load_file('CLSmodel.safetensors')

# Load the state_dict (weights) into the model
cls_model.load_state_dict(cls_weights)

# Set to evaluation mode
cls_model.eval()

print("Model loaded and ready to use.")

Model downloaded successfully.


model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Model loaded and ready to use.


In [20]:
class FeatureGrouping(nn.Module):
    def __init__(self, num_features=19, max_groups=3, embed_dim=4, output_dim=15, resnet_model="resnet50", finetune=True):
        super().__init__()
        self.num_features = num_features
        self.max_groups = max_groups
        self.embed_dim = embed_dim
        self.output_dim = output_dim

        # Embedding for home_away (3 categories: Home, Away, Neutral)
        self.home_away_embed = nn.Embedding(3, embed_dim)

        # Adjust feature count after embedding replacement
        self.adjusted_num_features = num_features - 1 + embed_dim  # 22 total features

        # Maximum features per group
        self.max_features_per_group = self.adjusted_num_features

        # Learnable logits for feature assignment
        self.assignment_logits = nn.Parameter(torch.randn(self.adjusted_num_features, max_groups))

        # Self-Attention layers for different group numbers
        self.attention_layers = nn.ModuleDict({
            f"attn_{g}": nn.MultiheadAttention(
                embed_dim=self.max_features_per_group,
                num_heads=1,
                batch_first=True
            )
            for g in range(1, max_groups + 1)
        })

        # Reduce channels before ResNet
        self.channel_reducer = nn.Conv2d(in_channels=max_groups, out_channels=3, kernel_size=1)

        # Pretrained ResNet
        self.resnet = timm.create_model(resnet_model, pretrained=True)
        in_features = self.resnet.get_classifier().in_features
        self.resnet.reset_classifier(0)

        # Final regression head
        self.fc = nn.Linear(in_features, output_dim)

        # Fine-tuning control
        if finetune:
            for param in self.resnet.parameters():
                param.requires_grad = True
        else:
            for param in self.resnet.parameters():
                param.requires_grad = False

    def forward(self, x):
        batch_size = x.shape[0]

        # Process home_away embedding
        home_away_idx = x[:, 17].long().clamp(0, 2)
        home_away_embed = self.home_away_embed(home_away_idx)
        x = torch.cat([x[:, :17], home_away_embed, x[:, 18:]], dim=1)

        # Hard feature assignment
        assignment_hard = torch.nn.functional.gumbel_softmax(self.assignment_logits, tau=0.5, hard=True, dim=1)

        all_group_outputs = []

        # Process different group configurations
        for num_groups in range(1, self.max_groups + 1):
            groups = []
            features_per_group = self.adjusted_num_features // num_groups

            for g in range(num_groups):
                start_idx = g * features_per_group
                end_idx = min(start_idx + features_per_group, self.adjusted_num_features)
                group_features = x[:, start_idx:end_idx]

                # Pad to match max_features_per_group
                if group_features.shape[1] < self.max_features_per_group:
                    pad_size = self.max_features_per_group - group_features.shape[1]
                    padding = torch.zeros(batch_size, pad_size, device=x.device)
                    group_features = torch.cat([group_features, padding], dim=1)

                groups.append(group_features)

            # Apply attention to each group
            processed_groups = []
            for g in range(num_groups):
                group_features = groups[g].unsqueeze(1)
                attn_output, _ = self.attention_layers[f"attn_{num_groups}"](group_features, group_features, group_features)
                processed_groups.append(attn_output)

            # Concatenate processed groups
            group_output = torch.cat(processed_groups, dim=1)

            # Pad to match max_groups if necessary
            if num_groups < self.max_groups:
                padding = torch.zeros(batch_size, self.max_groups - num_groups, self.max_features_per_group, device=x.device)
                group_output = torch.cat([group_output, padding], dim=1)

            all_group_outputs.append(group_output)

        # Stack different group configurations
        x_final = torch.stack(all_group_outputs, dim=1)  # [B, max_groups, max_groups, Features]

        # Reduce across different group choices
        x_final = x_final.mean(dim=1)  # [B, max_groups, Features]
        x_final = x_final.permute(0, 2, 1)  # [B, Features, max_groups]
        x_final = x_final.mean(dim=1).unsqueeze(-1).unsqueeze(-1)  # [B, max_groups, 1, 1]

        # Apply channel reduction
        x_final = self.channel_reducer(x_final)  # [B, 3, 1, 1]

        # Resize for ResNet
        x_final = x_final.expand(-1, -1, 224, 224)  # [B, 3, 224, 224]

        # Process through ResNet and final layer
        x_final = self.resnet(x_final)
        output = self.fc(x_final)

        return output

In [21]:
from transformers import PreTrainedModel, PretrainedConfig

class FeatureGroupingConfig(PretrainedConfig):
    model_type = "feature_grouping"

    def __init__(self, num_features=19, output_dim=15, **kwargs):
        super().__init__(**kwargs)
        self.num_features = num_features
        self.output_dim = output_dim

class FeatureGroupingModel(PreTrainedModel):
    config_class = FeatureGroupingConfig

    def __init__(self, config):
        super().__init__(config)
        self.model = FeatureGrouping(
            num_features=config.num_features, 
            output_dim=config.output_dim
        )

    def forward(self, input_ids, labels=None):
        output = self.model(input_ids)

        loss = None
        if labels is not None:
            loss = F.smooth_l1_loss(output, labels)  # Mean Squared Error for regression

        return {"loss": loss, "logits": output} if loss is not None else {"logits": output}

In [22]:
import requests
from safetensors.torch import load_file

# URL to the safetensors model file
rg_url = "https://huggingface.co/KanWasTaken/WhartonDS_RegressionModel/resolve/main/REGmodel.safetensors"

# Download the model file
response = requests.get(rg_url)
if response.status_code == 200:
    with open('REGmodel.safetensors', 'wb') as f:
        f.write(response.content)
    print("Model downloaded successfully.")

# Initialize your model
gen_model = FeatureGroupingModel(FeatureGroupingConfig())

# Load the model weights from the downloaded file
gen_weights = load_file('REGmodel.safetensors')

# Load the state_dict (weights) into the model
gen_model.load_state_dict(gen_weights)

# Set to evaluation mode
gen_model.eval()

print("Model loaded and ready to use.")

Model downloaded successfully.
Model loaded and ready to use.


-------------------
# **Check Weights**

In [23]:
# Get the model parameters (weights)
model_params = {name: param.detach().cpu().numpy() for name, param in cls_model.named_parameters()}

# Compare the weights in the safetensor file and the model
for name, param in cls_model.named_parameters():
    if name in cls_weights:
        param_safetensor = cls_weights[name].cpu().numpy()
        param_model = param.detach().cpu().numpy()

        # Check if the weights are the same
        if not (param_safetensor == param_model).all():
            print(f"Mismatch in weights for layer: {name}")

print(f"Check Done!")

Check Done!


In [24]:
# Get the model parameters (weights)
model_params = {name: param.detach().cpu().numpy() for name, param in gen_model.named_parameters()}

# Compare the weights in the safetensor file and the model
for name, param in gen_model.named_parameters():
    if name in gen_weights:
        param_safetensor = gen_weights[name].cpu().numpy()
        param_model = param.detach().cpu().numpy()

        # Check if the weights are the same
        if not (param_safetensor == param_model).all():
            print(f"Mismatch in weights for layer: {name}")

print(f"Check Done!")

Check Done!


In [25]:
gen_out = gen_model(torch.randn(1, 19))
print(gen_out['logits'])

tensor([[ 38.5485, 194.0829,  68.5496,  29.8556,  98.6914,  69.5574, -14.5581,
         112.4964, -38.2804, 109.9091,  -6.9627, 192.0578, 134.5824, -59.1047,
         154.3507]], grad_fn=<AddmmBackward0>)


In [26]:
cls_out = cls_model(torch.randn(1, 20))
print(cls_out['logits'])

print(F.softmax(cls_out['logits'], dim = -1))

tensor([[0.9278, 0.0216]], grad_fn=<AddmmBackward0>)
tensor([[0.7122, 0.2878]], grad_fn=<SoftmaxBackward0>)


--------------
# **Data Generation**

In [27]:
X_Home = []
for idx, team in enumerate(past_stats_home):
    
    X = past_stats_home[team]
    X = np.array(X, dtype=np.float32)
    X = torch.tensor(X, dtype=torch.float32)
    X = torch.reshape(X, (1, -1))
    
    out = gen_model(X)
    logits = out['logits']
    X_Home.append(logits)

X_Away = []
for idx, team in enumerate(past_stats_away):
    
    X = past_stats_away[team]
    X = np.array(X, dtype=np.float32)
    X = torch.tensor(X, dtype=torch.float32)
    X = torch.reshape(X, (1, -1))
    
    out = gen_model(X)
    logits = out['logits']
    X_Away.append(logits)

Normalized_X = []
for i in range(len(X_Home)):
    Normalized_X.append(F.tanh((X_Home[i] - X_Away[i] + epsilon) / T))

print(Normalized_X[0]) # X -> lst | X[0] -> lst | X[0][0] -> lst
print(len(Normalized_X))

tensor([[-0.0059, -0.0011,  0.0098,  0.0052, -0.0006,  0.0028,  0.0151, -0.0064,
          0.0024,  0.0021,  0.0024, -0.0074,  0.0042, -0.0050,  0.0043]],
       grad_fn=<TanhBackward0>)
10


In [28]:
inf['rest_days% (A/B)'] = np.tanh((inf['rest_days_Home'] - inf['rest_days_Away'] + epsilon) / T)
inf = inf.drop(columns = ['rest_days_Home', 'rest_days_Away', 'travel_dist_Home', 'travel_dist_Away'])

In [29]:
# [15 Features] -> F_Personal + rest_diff + prev game dist_diff + HAN_A + HAN_B + travel_dist_diff
# Total = 20
HAN_Home = torch.tensor(1)
HAN_Away = torch.tensor(0)

prev_trv_dist_diff_list = []
for i in inf.iterrows():
    # i -> Tuple
    # i[0] = Row Index
    # i[1] = Rest
    # i[1] -> List
    team_home = df_ts[df_ts['team'] == i[1][0]]
    team_home = team_home.sort_values(by = 'game_date', ascending = False)
    team_home = team_home.iloc[0]

    team_away = df_ts[df_ts['team'] == i[1][1]]
    team_away = team_away.sort_values(by = 'game_date', ascending = False)
    team_away = team_away.iloc[0]

    prev_travel_dist_home = team_home['travel_dist']
    prev_travel_dist_away = team_away['travel_dist']
    prev_travel_dist_diff = torch.tensor(np.tanh((prev_travel_dist_home - prev_travel_dist_away + epsilon) / T)) # type: np.float64
    prev_trv_dist_diff_list.append(prev_travel_dist_diff)

print(prev_trv_dist_diff_list[0])

tensor(0.0248, dtype=torch.float64)


  team_home = df_ts[df_ts['team'] == i[1][0]]
  team_away = df_ts[df_ts['team'] == i[1][1]]


In [30]:
inf

Unnamed: 0,team_home,team_away,home_away_NS,travel_dist% (A/B),rest_days% (A/B)
0,rhode_island_rams,north_carolina_tar_heels,0,-0.045263,2e-09
1,nc_state_wolfpack,rhode_island_rams,0,0.197375,0.03997868
2,nc_state_wolfpack,north_carolina_tar_heels,0,0.153483,0.03997868
3,liberty_flames,bucknell_bison,0,0.020708,-0.1586485
4,drexel_dragons,delaware_blue_hens,0,-0.039979,0.02221857
5,massachusetts_minutewomen,princeton_tigers,0,0.008416,-0.01333254
6,buffalo_bulls,stony_brook_seawolves,0,-0.189354,-0.07541211
7,fairfield_stags,towson_tigers,0,-0.010163,-0.1369125
8,uconn_huskies,campbell_fighting_camels,0,-0.113078,-0.07982977
9,american_university_eagles,columbia_lions,0,0.04006,-0.1150439


In [31]:
Normalized_X

[tensor([[-0.0059, -0.0011,  0.0098,  0.0052, -0.0006,  0.0028,  0.0151, -0.0064,
           0.0024,  0.0021,  0.0024, -0.0074,  0.0042, -0.0050,  0.0043]],
        grad_fn=<TanhBackward0>),
 tensor([[ 0.0188,  0.0107,  0.0013,  0.0032, -0.0027,  0.0033, -0.0073,  0.0123,
           0.0083, -0.0083, -0.0028,  0.0077, -0.0089, -0.0003, -0.0050]],
        grad_fn=<TanhBackward0>),
 tensor([[ 0.0129,  0.0096,  0.0111,  0.0084, -0.0033,  0.0061,  0.0078,  0.0059,
           0.0106, -0.0063, -0.0003,  0.0003, -0.0047, -0.0053, -0.0007]],
        grad_fn=<TanhBackward0>),
 tensor([[ 0.0007,  0.0010,  0.0115,  0.0228,  0.0050,  0.0138,  0.0051,  0.0003,
          -0.0025,  0.0117, -0.0010,  0.0246,  0.0122, -0.0027, -0.0018]],
        grad_fn=<TanhBackward0>),
 tensor([[-2.4251e-03, -9.5064e-04, -2.6397e-03,  4.0531e-03, -4.1369e-03,
           8.7680e-03,  1.8272e-02, -4.6592e-04, -1.6911e-03, -1.2699e-03,
          -1.9338e-02,  4.9190e-03,  5.1918e-05, -5.4430e-03,  5.9254e-04]],
        g

In [32]:
Ft = []
for i in range(len(Normalized_X)):
    Ft.append(Normalized_X[i][0])

FF = []
for i in range(len(Ft)):
    Stat = Ft[i]

    rest_diff = torch.tensor(inf['rest_days% (A/B)'][i])
    travel_diff = torch.tensor(inf['travel_dist% (A/B)'][i])

    comb1 = torch.stack((rest_diff, prev_trv_dist_diff_list[0]))

    HAN = torch.stack((HAN_Home, HAN_Away))

    comb2 = torch.stack((comb1, HAN)).reshape(1, -1)
    
    comb3 = torch.hstack((comb2, travel_diff.reshape(1, 1)))
    final_feature = torch.hstack((Stat.reshape(1, -1), comb3))
    FF.append(final_feature)

In [33]:
print(FF[0])
print(" ")
print(FF[0][0])

tensor([[-5.9330e-03, -1.1484e-03,  9.7819e-03,  5.1622e-03, -6.0143e-04,
          2.8063e-03,  1.5135e-02, -6.4062e-03,  2.3529e-03,  2.0561e-03,
          2.4422e-03, -7.3865e-03,  4.2125e-03, -4.9841e-03,  4.3132e-03,
          2.0000e-09,  2.4804e-02,  1.0000e+00,  0.0000e+00, -4.5263e-02]],
       dtype=torch.float64, grad_fn=<CatBackward0>)
 
tensor([-5.9330e-03, -1.1484e-03,  9.7819e-03,  5.1622e-03, -6.0143e-04,
         2.8063e-03,  1.5135e-02, -6.4062e-03,  2.3529e-03,  2.0561e-03,
         2.4422e-03, -7.3865e-03,  4.2125e-03, -4.9841e-03,  4.3132e-03,
         2.0000e-09,  2.4804e-02,  1.0000e+00,  0.0000e+00, -4.5263e-02],
       dtype=torch.float64, grad_fn=<SelectBackward0>)


In [34]:
for i in FF[0][0]:
    print(type(i))

<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>
<class 'torch.Tensor'>


In [35]:
Winning_pctg = []

for i in range(len(FF)):
    Features = FF[i][0]
    Features = Features.reshape(1, -1)
    Features = Features.float()
    
    Pred = cls_model(Features)
    prob = F.softmax(Pred['logits'], dim = -1)
    print(prob[0][1])
    Winning_pctg.append(prob[0][1])

tensor(0.3612, grad_fn=<SelectBackward0>)
tensor(0.9245, grad_fn=<SelectBackward0>)
tensor(0.9205, grad_fn=<SelectBackward0>)
tensor(0.8000, grad_fn=<SelectBackward0>)
tensor(0.6403, grad_fn=<SelectBackward0>)
tensor(0.8373, grad_fn=<SelectBackward0>)
tensor(0.0940, grad_fn=<SelectBackward0>)
tensor(0.5616, grad_fn=<SelectBackward0>)
tensor(0.0706, grad_fn=<SelectBackward0>)
tensor(0.5982, grad_fn=<SelectBackward0>)


In [36]:
Winning_pctg

[tensor(0.3612, grad_fn=<SelectBackward0>),
 tensor(0.9245, grad_fn=<SelectBackward0>),
 tensor(0.9205, grad_fn=<SelectBackward0>),
 tensor(0.8000, grad_fn=<SelectBackward0>),
 tensor(0.6403, grad_fn=<SelectBackward0>),
 tensor(0.8373, grad_fn=<SelectBackward0>),
 tensor(0.0940, grad_fn=<SelectBackward0>),
 tensor(0.5616, grad_fn=<SelectBackward0>),
 tensor(0.0706, grad_fn=<SelectBackward0>),
 tensor(0.5982, grad_fn=<SelectBackward0>)]