<a href="https://colab.research.google.com/github/DManiscalco/MMA-Matchups/blob/main/Graph_Model.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Can use a Graph Neural Network if we have data of stats of specific fights (nodes are fighters and edges are fights)

In [40]:
%%capture
!pip install torch_geometric

In [41]:
import kagglehub
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
import torch
from torch.utils.data import DataLoader, Dataset
from transformers import BertTokenizer, BertForSequenceClassification, Trainer, TrainingArguments

# Libraries for the graph model
from torch_geometric.data import Data, DataLoader
from torch_geometric.nn import GCNConv, global_mean_pool
from torch.nn import functional as F
import torch.nn as nn

In [2]:
# Download the dataset from kaggle
path = kagglehub.dataset_download('calmdownkarm/ufcdataset')

# Use $ to keep python variable in the terminal command
!ls $path  # make sure there are files in the path as we expect
!cp -r $path/* /content/  # move to /content folder

Downloading from https://www.kaggle.com/api/v1/datasets/download/calmdownkarm/ufcdataset?dataset_version_number=6...


100%|██████████| 977k/977k [00:00<00:00, 71.2MB/s]

Extracting files...
data.csv





In [211]:
# Read CSV file to a pandas df
data_csv = pd.read_csv('/content/data.csv')

In [212]:
# Fight information for the edges
fight_info_cols = ['Event_ID', 'Fight_ID', 'Last_round', 'Max_round', 'winby', 'winner', 'B_ID', 'R_ID', 'B_Age', 'R_Age']  # don't include 'Date' for now
for col in data_csv.columns:
  # Using the below to keep the order of the last round and max round columns
  if 'round' in col.lower() and col not in ['Last_round', 'Max_round']:
    fight_info_cols.append(col)

fight_info_df = data_csv[fight_info_cols]
fight_info_df.loc[:, 'winner'] = fight_info_df.apply(lambda x: x['B_ID'] if x['winner'] == 'blue' else (x['R_ID'] if x['winner'] == 'red' else x['winner']), axis=1)

# Fighter information for the nodes
fighter_info_cols = ['B_Height',	'B_HomeTown',	'B_ID', 'B_Location', 'B_Name', 'R_Height', 'R_HomeTown', 'R_ID', 'R_Location', 'R_Name']
fighter_info_init = data_csv[fighter_info_cols]

# Make df of fighter names and information for red and blue
fighter_info_red = fighter_info_init[['R_Height', 'R_HomeTown', 'R_ID', 'R_Location', 'R_Name']]
fighter_info_blue = fighter_info_init[['B_Height',	'B_HomeTown',	'B_ID',	'B_Location',	'B_Name']]

# Rename the cols to be the same for both dfs
fighter_col_names = ['Height', 'HomeTown', 'ID', 'Location', 'Name']
fighter_info_red.columns = fighter_col_names
fighter_info_blue.columns = fighter_col_names

# Concat the cols and drop duplicates
fighter_info_concat = pd.concat([fighter_info_blue, fighter_info_blue])

In [213]:
# For fighters, check if there are any duplicates between names and IDs
dupe_df = fighter_info_concat.drop_duplicates(subset=['ID'])
# dupe_df[dupe_df.duplicated(['Name'], keep=False)]  # uncomment this to show duplicates

# Duplicate is Dong Hyun Kim with ID of 455 and 2709 - change one of them
fighter_info_concat.loc[fighter_info_concat['ID'] == 2709, 'ID'] = 455

In [214]:
# Drop duplicates of fighter IDs
fighter_info_df = fighter_info_concat.drop_duplicates(subset=['ID'])

In [215]:
# Change anything with the ID of 2709 to be 455 since these are the same fighter
fight_info_df.loc[fight_info_df['R_ID'] == 2709, 'R_ID'] = 455
fight_info_df.loc[fight_info_df['B_ID'] == 2709, 'B_ID'] = 455

### Start setting up the model