# **Libraries**

In [1]:
!pip install timm



In [2]:
!pip install evaluate

Collecting evaluate
  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)
Downloading evaluate-0.4.3-py3-none-any.whl (84 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m84.0/84.0 kB[0m [31m2.0 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: evaluate
Successfully installed evaluate-0.4.3


In [3]:
# Data Processing n' Visualization
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

# Compute
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim

# Data
from torch.utils.data import Dataset, DataLoader
from sklearn.model_selection import train_test_split

# Random
import os
import random as rand
import timm

In [4]:
torch.cuda.empty_cache()

In [5]:
def set_seed(seed):
  rand.seed(seed)
  np.random.seed(seed)
  torch.cuda.manual_seed(seed)
  torch.cuda.manual_seed_all(seed)
  torch.backends.cudnn.deterministic = True
  torch.backends.cudnn.benchmark = False

seed = 59
set_seed(59)

-------------------------
# **Data Sample**

In [6]:
data_dir = '/kaggle/input/wharton-bkb-dataset/games_2022 (1).xlsx'

In [7]:
df = pd.read_excel(data_dir)
print(f"Dataset Type: {type(df)}")

Dataset Type: <class 'pandas.core.frame.DataFrame'>


In [8]:
df_cls = df

-----------------------------
# **Data Preprocessing**

In [9]:
df_cls = df_cls.drop(columns = ['OT_length_min_tot', 'attendance', 'tz_dif_H_E',
                        'home_away', 'notD1_incomplete', 'largest_lead'])
df_cls = df_cls.dropna()
df_cls['home_away_NS'] = df_cls['home_away_NS'].replace({
    1: 1, -1: 0, 0: 2
})

from sklearn.preprocessing import MinMaxScaler

# List of columns to normalize
stats_to_normalize = ['FGA_2', 'FGM_2', 'FGA_3', 'FGM_3', 
                      'FTA', 'FTM', 'AST', 'BLK', 'STL', 'TOV', 
                      'TOV_team', 'DREB', 'OREB', 'F_tech', 'F_personal', 
                      'rest_days', 'prev_game_dist', 'travel_dist']

# Initialize MinMaxScaler
scaler = MinMaxScaler(feature_range=(0,1))

# Apply MinMaxScaler only to the selected stats
df_cls[stats_to_normalize] = scaler.fit_transform(df_cls[stats_to_normalize])

print(df_cls.head())  # Check normalized values

          game_id  game_date                       team     FGA_2     FGM_2  \
0  game_2022_2011 2021-12-30      georgia_lady_bulldogs  0.661290  0.513514   
1  game_2022_2011 2021-12-30                 lsu_tigers  0.661290  0.567568   
2  game_2022_2012 2021-12-30            missouri_tigers  0.548387  0.405405   
3  game_2022_2012 2021-12-30   south_carolina_gamecocks  0.741935  0.540541   
4  game_2022_2013 2021-12-30  tennessee_lady_volunteers  0.516129  0.459459   

      FGA_3     FGM_3       FTA    FTM       AST  ...      DREB     OREB  \
0  0.196078  0.227273  0.125000  0.075  0.361111  ...  0.410256  0.34375   
1  0.196078  0.181818  0.312500  0.200  0.388889  ...  0.410256  0.34375   
2  0.274510  0.318182  0.333333  0.325  0.250000  ...  0.564103  0.18750   
3  0.392157  0.272727  0.187500  0.125  0.388889  ...  0.461538  0.62500   
4  0.274510  0.181818  0.312500  0.250  0.416667  ...  0.641026  0.37500   

   F_tech  F_personal  team_score  opponent_team_score  rest_days  \

In [10]:
import math
def preprocess_data_diff(data):
    
    """
    Preprocessed Data (1st Step)
        Input: 
        - data: Dataset File -> csv

        Output:
        - processed_df: Processed Dataset File -> pd
    """

    epsilon = 1e-8
    T = 5 # Temperature
    
    processed_data = []
    
    stats_to_diff = [
        'FGA_2', 'FGM_2', 'FGA_3', 'FGM_3', 'FTA',
        'FTM', 'AST', 'BLK', 'STL', 'TOV', 'TOV_team',
        'DREB', 'OREB', 'F_tech', 'F_personal', 'rest_days',
        'prev_game_dist', 'travel_dist'
    ]
    
    # Process each game
    for game_id in data['game_id'].unique():
        game_data = data[data['game_id'] == game_id]

        # Ensure the game has exactly 2 teams
        if len(game_data) != 2:
            print(f"Skipping game {game_id} due to missing teams.")
            continue

        # Extract teams
        teamA = game_data.iloc[0]
        teamB = game_data.iloc[1]

        entry = {
            'teamA': teamA['team'],
            'teamB': teamB['team'],
            'teamA_score': teamA['team_score'],
            'teamB_score': teamB['team_score'],

            # Embedding for Home/Away/Neutral
            'A_H/W/N': teamA['home_away_NS'], 
            'B_H/W/N': teamB['home_away_NS'],

            # 0: Lost | 1: Won | 2: Draw
            'W/L/D (teamA)': 0 if teamA['team_score'] < teamB['team_score']
                            else 1 if teamA['team_score'] > teamB['team_score']
                            else 2
        }

        # Compute stat differences
        for stat in stats_to_diff:
            # Handle NA values
            if pd.isna(teamB[stat]) and pd.isna(teamA[stat]):
                teamA[stat], teamB[stat] = 0, 0
                
            elif pd.isna(teamA[stat]):
                print(f"Team A ({teamA['team']}) {stat} has NA. Using Team B's value.")
                teamA[stat] = teamB[stat]
                
            elif pd.isna(teamB[stat]):
                print(f"Team B ({teamB['team']}) {stat} has NA. Using Team A's value.")
                teamB[stat] = teamA[stat]
                
            # Compute difference
            entry[f'{stat}% (A/B)'] = math.tanh((teamA[stat] - teamB[stat] + epsilon) / T)
        
        processed_data.append(entry)

    # Convert to DataFrame
    processed_df = pd.DataFrame(processed_data)

    return processed_df

In [11]:
df_cls_diff = preprocess_data_diff(df_cls)

Skipping game game_2022_1320 due to missing teams.
Skipping game game_2022_2198 due to missing teams.
Skipping game game_2022_2621 due to missing teams.
Skipping game game_2022_3347 due to missing teams.
Skipping game game_2022_3744 due to missing teams.
Skipping game game_2022_4049 due to missing teams.
Skipping game game_2022_4745 due to missing teams.
Skipping game game_2022_181 due to missing teams.
Skipping game game_2022_1994 due to missing teams.
Skipping game game_2022_3906 due to missing teams.
Skipping game game_2022_4264 due to missing teams.
Skipping game game_2022_2441 due to missing teams.
Skipping game game_2022_182 due to missing teams.
Skipping game game_2022_183 due to missing teams.
Skipping game game_2022_219 due to missing teams.
Skipping game game_2022_220 due to missing teams.
Skipping game game_2022_221 due to missing teams.
Skipping game game_2022_222 due to missing teams.
Skipping game game_2022_320 due to missing teams.
Skipping game game_2022_468 due to miss

In [12]:
df_cls_diff = df_cls_diff.drop(columns = ['teamA', 'teamB', 'teamA_score', 'teamB_score'])

A_HWN = df_cls_diff['A_H/W/N']
B_HWN = df_cls_diff['B_H/W/N']
df_cls_diff = df_cls_diff.drop(columns = ['A_H/W/N', 'B_H/W/N'])
df_cls_diff['A_H/W/N'] = A_HWN
df_cls_diff['B_H/W/N'] = B_HWN

travel_dist = df_cls_diff['travel_dist% (A/B)']
df_cls_diff = df_cls_diff.drop(columns = ['travel_dist% (A/B)'])
df_cls_diff['travel_dist% (A/B)'] = travel_dist

In [13]:
df_cls_diff.head()

Unnamed: 0,W/L/D (teamA),FGA_2% (A/B),FGM_2% (A/B),FGA_3% (A/B),FGM_3% (A/B),FTA% (A/B),FTM% (A/B),AST% (A/B),BLK% (A/B),STL% (A/B),...,TOV_team% (A/B),DREB% (A/B),OREB% (A/B),F_tech% (A/B),F_personal% (A/B),rest_days% (A/B),prev_game_dist% (A/B),A_H/W/N,B_H/W/N,travel_dist% (A/B)
0,0,2e-09,-0.01081,2e-09,0.009091,-0.037482,-0.024995,-0.005555,0.055498,-0.05918999,...,-0.066568,2e-09,2e-09,2e-09,0.070849,0.031568,-0.021141,1,0,-0.024216
1,1,-0.03869035,-0.02702,-0.02352507,0.009091,0.029158,0.039979,-0.027771,-0.077621,0.007407274,...,0.033321,0.02050995,-0.08727737,2e-09,-0.025801,-0.005263,-0.02009,1,0,-0.033908
2,1,-0.003225793,0.043216,-0.05484686,-0.009091,0.029158,0.024995,0.044415,0.033321,2e-09,...,0.033321,0.0563505,-0.03748243,2e-09,-0.019352,-0.073551,-0.011265,1,0,-0.012903
3,0,0.01612764,0.005405,-0.01176416,-0.009091,-0.037482,-0.01,-0.011111,-0.011111,-0.00740727,...,-0.033321,-0.01025605,-0.006249917,-0.03997868,-0.006452,0.052583,-0.00621,0,1,0.005791
4,1,0.03224688,0.03782,-0.1286941,-0.063551,0.103792,0.069886,0.038869,-0.022219,-0.05180543,...,-0.033321,0.08186764,-0.01249935,2e-09,-0.051567,-0.047333,0.023604,0,1,0.027036


In [14]:
start_col = 'FGA_2% (A/B)'
end_col_test = 'F_personal% (A/B)'
df_test = df_cls_diff.loc[:,start_col : end_col_test]

In [15]:
# Our Regression Model Output 15 Labels.
len(df_test.columns) # => Should be 15

15

In [16]:
X = df_cls_diff.loc[:, start_col : 'travel_dist% (A/B)'].values

y = df_cls_diff.loc[:, 'W/L/D (teamA)'].values

X = torch.tensor(X, dtype=torch.float32)
y = torch.tensor(y, dtype=torch.long)  


print(X.shape, X.dtype)  # Check shape and type
print(y.shape, y.dtype)

torch.Size([4350, 20]) torch.float32
torch.Size([4350]) torch.int64


In [17]:
print(X[0])

print("-"*59)

print(X[0][17])
print(X[0][18])

print(f"Number of Features: {len(X[0])}")
A_HWN_idx = 17
B_HWN_idx = 18

print(f"Label: {y[:3]}")

# [15 Features] -> F_Personal + Rest + Prev game dist + HAN_A + HAN_B + travel dist
# Total = 20

tensor([ 2.0000e-09, -1.0810e-02,  2.0000e-09,  9.0907e-03, -3.7482e-02,
        -2.4995e-02, -5.5555e-03,  5.5498e-02, -5.9190e-02,  1.9510e-02,
        -6.6568e-02,  2.0000e-09,  2.0000e-09,  2.0000e-09,  7.0849e-02,
         3.1568e-02, -2.1141e-02,  1.0000e+00,  0.0000e+00, -2.4216e-02])
-----------------------------------------------------------
tensor(1.)
tensor(0.)
Number of Features: 20
Label: tensor([0, 1, 1])


------------------------
# **Dataset**

In [18]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [19]:
# 7/3 Train/Val
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.3, random_state=42)

print(X_train.shape)  # Should be (num_samples, num_features) → (N, 19)
print(y_train.shape)  # Should be (num_samples, num_outputs) → (N, 15)
print(f"Type of X_train and X_test: {type(X_train)} | {type(X_val)}")

torch.Size([3045, 20])
torch.Size([3045])
Type of X_train and X_test: <class 'torch.Tensor'> | <class 'torch.Tensor'>


In [20]:
from torch.utils.data import Dataset

class bkb_dataset(Dataset):
    def __init__(self, data, label):
        self.data = data
        self.label = label

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        feature = self.data[idx]
        label = self.label[idx]

        return {"input_ids": feature, "labels": label}

In [21]:
train_set = bkb_dataset(
    X_train,
    y_train,
)

val_set = bkb_dataset(
    X_val,
    y_val,
)

print(f"Length of train_set: {len(train_set)}")
print(f"Length of val_set: {len(val_set)}")

Length of train_set: 3045
Length of val_set: 1305


In [22]:
train_batch = 256
test_batch = 32

train_loader = DataLoader(
    train_set,
    batch_size = train_batch,
    shuffle = True
)

val_loader = DataLoader(
    val_set,
    batch_size = test_batch,
    shuffle = False
)

print(f"Length train_loader: {len(train_loader)}")
print(f"Length val_loader: {len(val_loader)}")

Length train_loader: 12
Length val_loader: 41


-------------------
# **Model**

In [23]:
from huggingface_hub import login

from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
HUGGINGFACE_TOKEN = user_secrets.get_secret("HF_TOKEN")

# Login to Hugging Face
login(HUGGINGFACE_TOKEN)

In [24]:
import torch.nn.functional as F

class FeatureGrouping_cls(nn.Module):
    def __init__(self, num_features=20, max_groups=3, embed_dim=4, output_dim=2, resnet_model="resnet50", finetune = True):
        super().__init__()
        self.num_features = num_features
        self.max_groups = max_groups
        self.embed_dim = embed_dim
        self.output_dim = output_dim

        # Embedding for home_away (3 categories: Home, Away, Neutral)
        self.home_away_embed = nn.Embedding(3, embed_dim)

        # Adjust feature count after embedding replacement
        self.adjusted_num_features = num_features - 2 + 2*embed_dim  # 20 - 2 + 2*4 = 26

        # Calculate the maximum features per group to ensure consistent dimensions
        self.max_features_per_group = self.adjusted_num_features

        # Learnable logits for feature assignment
        self.assignment_logits = nn.Parameter(torch.randn(self.adjusted_num_features, max_groups))

        # Self-Attention layers for each possible number of groups
        self.attention_layers = nn.ModuleDict({
            f"attn_{g}": nn.MultiheadAttention(
                embed_dim=self.max_features_per_group,
                num_heads=1,
                batch_first=True
            )
            for g in range(1, max_groups + 1)
        })

        # Reduce channels before ResNet
        self.channel_reducer = nn.Conv2d(in_channels=max_groups, out_channels=3, kernel_size=1)

        # Pretrained ResNet model
        self.resnet = timm.create_model(resnet_model, pretrained=True)
        in_features = self.resnet.get_classifier().in_features
        self.resnet.reset_classifier(0)

        # Final regression head
        self.fc = nn.Linear(in_features, output_dim)
        
        if finetune:
            for param in self.resnet.parameters():
                param.requires_grad = True
        else:
            for param in self.resnet.parameters():
                param.requires_grad = False
                    
    def forward(self, x):
        batch_size = x.shape[0]

        # Extract home_away index and convert to embeddings
        A_home_away_idx = x[:, 17].long().clamp(0, 2)
        B_home_away_idx = x[:, 18].long().clamp(0, 2)
        
        A_home_away_embed = self.home_away_embed(A_home_away_idx)
        B_home_away_embed = self.home_away_embed(B_home_away_idx)
        
        x = torch.cat([x[:, :17], A_home_away_embed, B_home_away_embed, x[:, 19:]], dim=1)

        # Hard feature assignment
        assignment_hard = torch.nn.functional.gumbel_softmax(self.assignment_logits, tau=0.5, hard=True, dim=1)

        all_group_outputs = []

        # Process different group configurations
        for num_groups in range(1, self.max_groups + 1):
            # Split features into groups
            groups = []
            features_per_group = self.adjusted_num_features // num_groups
            
            for g in range(num_groups):
                start_idx = g * features_per_group
                end_idx = min(start_idx + features_per_group, self.adjusted_num_features)
                group_features = x[:, start_idx:end_idx]
                
                # Pad to match max_features_per_group
                if group_features.shape[1] < self.max_features_per_group:
                    pad_size = self.max_features_per_group - group_features.shape[1]
                    padding = torch.zeros(batch_size, pad_size, device=x.device)
                    group_features = torch.cat([group_features, padding], dim=1)
                
                groups.append(group_features)

            # Process each group with attention
            processed_groups = []
            for g in range(num_groups):
                group_features = groups[g].unsqueeze(1)
                attn_output, _ = self.attention_layers[f"attn_{num_groups}"](
                    group_features, group_features, group_features)
                processed_groups.append(attn_output)

            # Combine processed groups
            group_output = torch.cat(processed_groups, dim=1)
            
            # Pad to match max_groups if necessary
            if num_groups < self.max_groups:
                padding = torch.zeros(
                    batch_size,
                    self.max_groups - num_groups,
                    self.max_features_per_group,
                    device=x.device
                )
                group_output = torch.cat([group_output, padding], dim=1)
            
            all_group_outputs.append(group_output)

        # Stack all configurations
        x_final = torch.stack(all_group_outputs, dim=1)  # [B, max_groups, max_groups, Features]
        
        # Reshape for channel reducer
        x_final = x_final.mean(dim=1)  # [B, max_groups, Features]
        x_final = x_final.permute(0, 2, 1)  # [B, Features, max_groups]
        x_final = x_final.mean(dim=1).unsqueeze(-1).unsqueeze(-1)  # [B, max_groups, 1, 1]
        
        # Apply channel reduction
        x_final = self.channel_reducer(x_final)  # [B, 3, 1, 1]
        
        # Prepare for ResNet
        x_final = x_final.expand(-1, -1, 224, 224)  # [B, 3, 224, 224]

        # Process through ResNet and final layer
        x_final = self.resnet(x_final)
        output = self.fc(x_final)

        return output

batch_size = 1
x = torch.randn(batch_size, 20)

# Ensure y[0] is properly wrapped in a tensor
test_label = torch.tensor([y[0]], dtype=torch.long)  # Added batch dimension

# Model
model = FeatureGrouping_cls(resnet_model="resnet50")
output = model(x)

print(f"Output Shape: {output.shape}")  # Expected: (1, 2)
print(f"Output: {output}")

# Softmax check
sm = nn.Softmax(dim=-1)
print(f"After Softmax: {sm(output)}")

# Loss computation
loss = nn.CrossEntropyLoss()
pred = loss(output, test_label)
print(f"Loss: {pred} | True Label: {test_label}")

model.safetensors:   0%|          | 0.00/102M [00:00<?, ?B/s]

Output Shape: torch.Size([1, 2])
Output: tensor([[-0.0143,  0.0543]], grad_fn=<AddmmBackward0>)
After Softmax: tensor([[0.4829, 0.5171]], grad_fn=<SoftmaxBackward0>)
Loss: 0.7280246615409851 | True Label: tensor([0])


----------------------
# **Train**

In [25]:
from transformers import PreTrainedModel, PretrainedConfig
from transformers.modeling_outputs import SequenceClassifierOutput
from sklearn.metrics import roc_auc_score, f1_score

class FeatureGroupingConfig(PretrainedConfig):
    model_type = "feature_grouping"

    def __init__(self, num_features=20, output_dim=2, **kwargs):
        super().__init__(**kwargs)
        self.num_features = num_features
        self.output_dim = output_dim

class FeatureGroupingModel(PreTrainedModel):
    config_class = FeatureGroupingConfig

    def __init__(self, config):
        super().__init__(config)
        self.model = FeatureGrouping_cls(  # Ensure this is defined somewhere
            num_features=config.num_features, 
            output_dim=config.output_dim
        )
        self.loss_fn = nn.CrossEntropyLoss()

    def forward(self, input_ids, labels=None):
        logits = self.model(input_ids)  # Raw logits

        loss = None
        auc_roc = None
        f1 = None

        if labels is not None:
            loss = self.loss_fn(logits, labels)  

            # Convert logits to probabilities
            probs = F.softmax(logits, dim=-1)[:, 1].detach().cpu().numpy()
            labels_np = labels.detach().cpu().numpy()

            # Compute AUC-ROC only if both classes exist
            if len(set(labels_np)) > 1:
                auc_roc = roc_auc_score(labels_np, probs)
            else:
                auc_roc = 0.0

            # Compute F1-score
            preds = torch.argmax(logits, dim=-1).detach().cpu().numpy()
            f1 = f1_score(labels_np, preds, average="macro")

        return SequenceClassifierOutput(
            loss=loss, 
            logits=logits,
            hidden_states=None, 
            attentions=None
        )

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

In [26]:
def compute_metrics(eval_pred):
    logits, labels = eval_pred

    # Convert logits to a PyTorch tensor
    logits = torch.tensor(logits)  

    # Apply softmax to get probabilities
    probs = torch.nn.functional.softmax(logits, dim=1)[:, 1].cpu().numpy()  # Convert back to NumPy

    # Ensure labels are in NumPy array format
    labels = np.array(labels)

    # Convert probabilities to binary predictions
    preds = (probs >= 0.5).astype(int)

    # Compute metrics
    auc_roc = roc_auc_score(labels, probs) if len(set(labels)) > 1 else 0.0  # Prevent single-class error
    f1 = f1_score(labels, preds, average="macro")

    return {"auc_roc": auc_roc, "f1": f1}

In [27]:
from transformers import Trainer, TrainingArguments
import warnings

warnings.filterwarnings(
    "ignore",
    message="Was asked to gather along dimension 0, but all input tensors were scalars",
    category=UserWarning
)

# Define Training Arguments
training_args = TrainingArguments(
    output_dir="./WhartonDS_ClsModelTest",
    learning_rate=1e-5,
    eval_strategy="epoch",  # Evaluate at each epoch
    save_strategy="epoch",  # Save at each epoch
    logging_strategy="epoch",  # Log every epoch
    logging_dir="./logs",
    logging_steps=1,
    per_device_train_batch_size=64,
    per_device_eval_batch_size=32,
    num_train_epochs=70,
    warmup_ratio = 0.05,
    weight_decay=0.0005,
    report_to="none",
    push_to_hub=True,
    optim="adamw_torch",
    lr_scheduler_type="cosine_with_restarts",
    hub_model_id="KanWasTaken/WhartonDS_ClsModelTest",
    load_best_model_at_end=True,  # Load the best model at the end
    metric_for_best_model="eval_loss",
    greater_is_better=False,  # Lower eval_loss is better
    save_total_limit=3  # Keep multiple checkpoints to avoid overwriting
)

# Initialize Model
model = FeatureGroupingModel(FeatureGroupingConfig())

# Initialize Trainer
trainer = Trainer(
    model=model.to(device),
    args=training_args,
    train_dataset=train_set,
    eval_dataset=val_set,
    compute_metrics=compute_metrics
)

# Train the Model
trainer.train()
    
# Save the best model locally
trainer.save_model("./WhartonDS_ClsModelTest")

# Push best model to Hugging Face Hub
trainer.push_to_hub("KanWasTaken/WhartonDS_ClsModelTest")

Epoch,Training Loss,Validation Loss,Auc Roc,F1
1,0.6903,0.69258,0.567741,0.335865
2,0.6896,0.694143,0.456386,0.445781
3,0.6869,0.690597,0.573147,0.563122
4,0.6835,0.687658,0.596988,0.571583
5,0.6819,0.685648,0.598348,0.574613
6,0.6797,0.684357,0.593655,0.581235
7,0.6786,0.680196,0.611501,0.581373
8,0.6773,0.683858,0.604051,0.578241
9,0.6754,0.681172,0.600194,0.567102
10,0.6743,0.683329,0.595264,0.572257


No files have been modified since last commit. Skipping to prevent empty commit.
No files have been modified since last commit. Skipping to prevent empty commit.


CommitInfo(commit_url='https://huggingface.co/KanWasTaken/WhartonDS_ClsModelTest/commit/e292b9c7d5c9fc317811d298a5a16ab576998de4', commit_message='KanWasTaken/WhartonDS_ClsModelTest', commit_description='', oid='e292b9c7d5c9fc317811d298a5a16ab576998de4', pr_url=None, repo_url=RepoUrl('https://huggingface.co/KanWasTaken/WhartonDS_ClsModelTest', endpoint='https://huggingface.co', repo_type='model', repo_id='KanWasTaken/WhartonDS_ClsModelTest'), pr_revision=None, pr_num=None)