In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [2]:
# Loads in the fighter-stats df
fighters = pd.read_csv("data/fighter-stats-threading.csv")
fighters

Unnamed: 0,Full Name,Height(inches),Weight(lbs),Reach(inches),Stance,DOB,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,Record
0,Tom Aaron,,155.0,,,"Jul 13, 1978",0.00,0%,0.00,0%,0.00,0%,0%,0.0,Record: 5-3-0
1,Danny Abbadi,5' 11,155.0,,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.00,0%,77%,0.0,Record: 4-6-0
2,Nariman Abbasov,5' 8,155.0,66.0,Orthodox,"Feb 01, 1994",3.00,20%,5.67,46%,0.00,0%,66%,0.0,Record: 28-4-0
3,David Abbott,6' 0,265.0,,Switch,,1.35,30%,3.55,38%,1.07,33%,66%,0.0,Record: 10-15-0
4,Hamdy Abdelwahab,6' 2,264.0,72.0,Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.00,75%,0%,0.0,Record: 5-0-0 (1 NC)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4217,Dave Zitanick,,170.0,,,"Mar 05, 1980",0.00,0%,0.00,0%,0.00,0%,0%,0.0,Record: 5-7-0 (1 NC)
4218,Alex Zuniga,,145.0,,,,0.00,0%,0.00,0%,0.00,0%,0%,0.0,Record: 6-3-0
4219,George Zuniga,5' 9,185.0,,,,7.64,38%,5.45,37%,0.00,0%,100%,0.0,Record: 3-1-0
4220,Allan Zuniga,5' 7,155.0,70.0,Orthodox,"Apr 04, 1992",3.93,52%,1.80,61%,0.00,0%,57%,1.0,Record: 13-1-0


In [3]:
# Seperate the record into w, l, d and make three new columns
pattern = r"Record:\s(\d+)-(\d+)-(\d+)"
fighters[['win', 'loss', 'draw']] = fighters['Record'].str.extract(pattern).astype(int)
# Drop the record column
fighters = fighters.drop('Record', axis=1)

In [4]:
# Takes in height in ft and convert it to inches
def convert_to_inches(string):
    if pd.isna(string):
         return string
    string_list = string.split("'")
    ft = int(string_list[0].strip())
    inches = int(string_list[1].replace("\"", "").strip())
    return ft * 12 + inches

In [5]:
# Convert height to inches
fighters['Height(inches)'] = fighters['Height(inches)'].apply(convert_to_inches) 
fighters

Unnamed: 0,Full Name,Height(inches),Weight(lbs),Reach(inches),Stance,DOB,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,win,loss,draw
0,Tom Aaron,,155.0,,,"Jul 13, 1978",0.00,0%,0.00,0%,0.00,0%,0%,0.0,5,3,0
1,Danny Abbadi,71.0,155.0,,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.00,0%,77%,0.0,4,6,0
2,Nariman Abbasov,68.0,155.0,66.0,Orthodox,"Feb 01, 1994",3.00,20%,5.67,46%,0.00,0%,66%,0.0,28,4,0
3,David Abbott,72.0,265.0,,Switch,,1.35,30%,3.55,38%,1.07,33%,66%,0.0,10,15,0
4,Hamdy Abdelwahab,74.0,264.0,72.0,Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.00,75%,0%,0.0,5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4217,Dave Zitanick,,170.0,,,"Mar 05, 1980",0.00,0%,0.00,0%,0.00,0%,0%,0.0,5,7,0
4218,Alex Zuniga,,145.0,,,,0.00,0%,0.00,0%,0.00,0%,0%,0.0,6,3,0
4219,George Zuniga,69.0,185.0,,,,7.64,38%,5.45,37%,0.00,0%,100%,0.0,3,1,0
4220,Allan Zuniga,67.0,155.0,70.0,Orthodox,"Apr 04, 1992",3.93,52%,1.80,61%,0.00,0%,57%,1.0,13,1,0


In [6]:
# Rename column names
fighters = fighters.rename(columns={'Height(inches)': 'Height', 'Weight(lbs)': 'Weight', 'Reach(inches)': 'Reach'})

In [7]:
fighters

Unnamed: 0,Full Name,Height,Weight,Reach,Stance,DOB,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,win,loss,draw
0,Tom Aaron,,155.0,,,"Jul 13, 1978",0.00,0%,0.00,0%,0.00,0%,0%,0.0,5,3,0
1,Danny Abbadi,71.0,155.0,,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.00,0%,77%,0.0,4,6,0
2,Nariman Abbasov,68.0,155.0,66.0,Orthodox,"Feb 01, 1994",3.00,20%,5.67,46%,0.00,0%,66%,0.0,28,4,0
3,David Abbott,72.0,265.0,,Switch,,1.35,30%,3.55,38%,1.07,33%,66%,0.0,10,15,0
4,Hamdy Abdelwahab,74.0,264.0,72.0,Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.00,75%,0%,0.0,5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4217,Dave Zitanick,,170.0,,,"Mar 05, 1980",0.00,0%,0.00,0%,0.00,0%,0%,0.0,5,7,0
4218,Alex Zuniga,,145.0,,,,0.00,0%,0.00,0%,0.00,0%,0%,0.0,6,3,0
4219,George Zuniga,69.0,185.0,,,,7.64,38%,5.45,37%,0.00,0%,100%,0.0,3,1,0
4220,Allan Zuniga,67.0,155.0,70.0,Orthodox,"Apr 04, 1992",3.93,52%,1.80,61%,0.00,0%,57%,1.0,13,1,0


In [8]:
# Convert DOB column to datetime
fighters['DOB'] = pd.to_datetime(fighters['DOB'])

# Calculate age
today = datetime.today()
fighters['DOB'] = fighters['DOB'].apply(lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)))

In [9]:
# Rename DOB column to age
fighters.rename(columns={'DOB': 'Age'}, inplace=True)

In [10]:
# Convert percentages to float numbers
def percentages_to_float(column):
    return column.str.rstrip('%').astype(float) / 100
fighters[['Str.Acc.', 'Str.Def', 'TD Acc.', 'TD Def.']] = fighters[['Str.Acc.', 'Str.Def', 'TD Acc.', 'TD Def.']].apply(percentages_to_float)

In [11]:
# Set fighters' full name as the new index
fighters.set_index('Full Name', inplace=True)

In [12]:
# Impute missing height with weight class average height
fighters['Height'] = fighters.groupby('Weight')['Height'].transform(
    lambda x: x.fillna(x.mean())
)
# Impute missing reach with weight class average reach
fighters['Reach'] = fighters.groupby('Weight')['Reach'].transform(
    lambda x: x.fillna(x.mean())
)
# Drop rows with missing weight because it's a low percentage (2%)
fighters = fighters[~fighters['Weight'].isnull()]
# Impute missing stance value with the mode
fighters['Stance'] = fighters['Stance'].fillna('Orthodox')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fighters['Stance'] = fighters['Stance'].fillna('Orthodox')


In [13]:
columns_to_check = ['SLpM.', 'Str.Acc.', 'SApM', 'Str.Def']
fighters = fighters[(fighters['SLpM.'] > 0) & (fighters['Str.Acc.'] > 0) & (fighters['SApM'] > 0) & (fighters['Str.Def'] > 0)]
fighters.drop('Age', axis=1, inplace=True)

In [14]:
fighters

Unnamed: 0_level_0,Height,Weight,Reach,Stance,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,win,loss,draw
Full Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Danny Abbadi,71.0,155.0,71.576503,Orthodox,3.29,0.38,4.41,0.57,0.00,0.00,0.77,0.0,4,6,0
Nariman Abbasov,68.0,155.0,66.000000,Orthodox,3.00,0.20,5.67,0.46,0.00,0.00,0.66,0.0,28,4,0
David Abbott,72.0,265.0,77.871795,Switch,1.35,0.30,3.55,0.38,1.07,0.33,0.66,0.0,10,15,0
Hamdy Abdelwahab,74.0,264.0,72.000000,Southpaw,3.87,0.52,3.13,0.59,3.00,0.75,0.00,0.0,5,0,0
Mansur Abdul-Malik,74.0,185.0,79.000000,Orthodox,6.61,0.54,4.21,0.53,0.00,0.00,0.75,0.0,7,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
James Zikic,74.0,205.0,75.846154,Orthodox,1.47,0.35,1.60,0.44,0.50,0.25,0.74,0.5,21,10,2
Cat Zingano,66.0,145.0,68.000000,Southpaw,2.57,0.61,1.63,0.47,2.77,0.65,0.42,0.8,10,4,0
George Zuniga,69.0,185.0,75.077821,Orthodox,7.64,0.38,5.45,0.37,0.00,0.00,1.00,0.0,3,1,0
Allan Zuniga,67.0,155.0,70.000000,Orthodox,3.93,0.52,1.80,0.61,0.00,0.00,0.57,1.0,13,1,0


In [15]:
fighters = fighters[~fighters['Reach'].isnull()]

In [16]:
full_name_column = np.array(fighters.index)

In [17]:
encoder = OneHotEncoder(sparse_output=False)

# Fit and transform the data
encoded_array = encoder.fit_transform(fighters[['Stance']])
print(encoded_array)

# Convert to a DataFrame for better readability
encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(['Stance']))
encoded_df

[[0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 ...
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]]


Unnamed: 0,Stance_Open Stance,Stance_Orthodox,Stance_Sideways,Stance_Southpaw,Stance_Switch
0,0.0,1.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,1.0,0.0
4,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...
3274,0.0,1.0,0.0,0.0,0.0
3275,0.0,0.0,0.0,1.0,0.0
3276,0.0,1.0,0.0,0.0,0.0
3277,0.0,1.0,0.0,0.0,0.0


In [18]:
fighters.drop('Stance', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fighters.drop('Stance', axis=1, inplace=True)


In [19]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the data
fighters_standardized = scaler.fit_transform(fighters)

# Convert back to a DataFrame for better readability
fighters_standardized = pd.DataFrame(fighters_standardized, columns=fighters.columns)
fighters_standardized

Unnamed: 0,Height,Weight,Reach,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,win,loss,draw
0,0.282398,-0.303697,-0.087593,0.132759,-0.488670,0.265847,0.499067,-0.773633,-1.159647,0.889281,-0.483597,-1.209842,0.037276,-0.327225
1,-0.570899,-0.303697,-1.480631,-0.037220,-2.006444,0.801253,-0.495895,-0.773633,-1.159647,0.538818,-0.483597,1.760264,-0.412916,-0.327225
2,0.566831,2.836891,1.485001,-1.004342,-1.163236,-0.099588,-1.219504,-0.230008,0.037894,0.538818,-0.483597,-0.467315,2.063141,-0.327225
3,1.135695,2.808340,0.018198,0.472717,0.691821,-0.278057,0.679970,0.750549,1.562036,-1.563964,-0.483597,-1.086087,-1.313301,-0.327225
4,1.135695,0.552827,1.766832,2.078726,0.860463,0.180862,0.137263,-0.773633,-1.159647,0.825560,-0.483597,-0.838579,-1.313301,-0.327225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3274,1.135695,1.123843,0.978986,-0.934006,-0.741632,-0.928192,-0.676797,-0.519602,-0.252419,0.793700,-0.114912,0.893983,0.937660,2.402978
3275,-1.139764,-0.589205,-0.981021,-0.289258,1.450709,-0.915445,-0.405444,0.633695,1.199145,-0.225830,0.106299,-0.467315,-0.412916,-0.327225
3276,-0.286467,0.552827,0.787053,2.682444,-0.488670,0.707769,-1.309955,-0.773633,-1.159647,1.622069,-0.483597,-1.333596,-1.088205,-0.327225
3277,-0.855331,-0.303697,-0.481411,0.507885,0.691821,-0.843207,0.860872,-0.773633,-1.159647,0.252075,0.253773,-0.096052,-1.088205,-0.327225


In [20]:
fighters_standardized['Full Name'] = full_name_column
fighters_standardized.drop_duplicates(inplace=True)

In [21]:
fights = pd.read_csv("data/fight-matchups.csv")
fights.head(10)

Unnamed: 0,fighter1,fighter2
0,Petr Yan,Deiveson Figueiredo
1,Yan Xiaonan,Tabatha Ricci
2,Muslim Salikhov,Song Kenan
3,Gabriella Fernandes,Wang Cong
4,Carlos Ulberg,Volkan Oezdemir
5,Zhang Mingyang,Ozzy Diaz
6,SuYoung You,Baergeng Jieleyisi
7,DongHun Choi,Kiru Sahota
8,Shi Ming,Feng Xiaocan
9,Carlos Hernandez,Nyamjargal Tumendemberel


In [22]:
matchups = fights.merge(fighters_standardized, left_on = 'fighter1', right_on = 'Full Name')
fighter2 = matchups['fighter2']
matchups['Full Name'] = fighter2
matchups.drop('fighter2', axis=1, inplace=True)

In [23]:
matchups = matchups.merge(fighters_standardized, left_on = 'Full Name', right_on = 'Full Name')

In [24]:
matchups.rename(columns={'Full Name': 'fighter2'}, inplace=True)

In [25]:
matchups.drop(columns=['fighter1', 'fighter2'], axis=1, inplace=True)

In [26]:
matchups

Unnamed: 0,Height_x,Weight_x,Reach_x,SLpM._x,Str.Acc._x,SApM_x,Str.Def_x,TD Avg._x,TD Acc._x,TD Def._x,...,Str.Acc._y,SApM_y,Str.Def_y,TD Avg._y,TD Acc._y,TD Def._y,Sub. Avg._y,win_y,loss_y,draw_y
0,-0.855331,-0.874713,-1.230826,1.199524,0.860463,0.146868,0.589519,0.044345,0.618519,1.112303,...,0.860463,-0.099588,-0.224542,0.049426,0.074183,0.347656,0.548721,1.265246,-0.412916,1.037877
1,-1.424196,-1.445729,-2.230045,1.129188,0.101576,-0.146330,0.770421,-0.306217,1.489458,0.443237,...,-0.657311,0.639781,0.499067,0.618453,0.183050,1.016722,-0.409860,-0.343561,-0.638013,-0.327225
2,0.282398,0.124565,-0.481411,0.185511,0.438859,-0.375790,0.951323,-0.224927,0.001605,0.729979,...,0.185897,0.864991,-0.043639,-0.560247,-0.070973,0.761840,-0.483597,1.017737,0.712564,-0.327225
3,-1.139764,-1.160221,-1.480631,0.232402,0.185897,0.729015,-0.224542,-0.489119,-0.506443,0.347656,...,0.185897,-0.915445,1.674932,-0.184282,0.654809,1.622069,1.212353,-0.962333,-1.088205,-0.327225
4,1.704560,1.123843,1.267222,2.424545,1.029105,0.206358,-0.134091,-0.407829,1.562036,1.080443,...,0.354538,0.185111,0.318165,-0.519602,-0.107263,0.984862,-0.409860,0.770229,0.487468,-0.327225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7561,-0.002034,0.124565,0.408918,-0.746443,0.101576,-0.902697,1.041774,0.222166,2.469264,0.156494,...,-0.320028,-1.195895,-0.134091,-0.773633,-1.159647,-0.767456,1.064879,0.893983,1.612948,1.037877
7562,0.851263,1.123843,0.767613,-0.125140,1.113425,-0.957937,0.679970,0.638776,0.509652,0.283935,...,0.185897,-0.405535,-0.134091,-0.255411,1.017700,0.124633,-0.041175,1.512755,1.838045,-0.327225
7563,0.566831,0.552827,0.517808,-0.863670,0.185897,-0.405535,-0.134091,-0.255411,1.017700,0.124633,...,-1.163236,-0.099588,-1.219504,-0.230008,0.037894,0.538818,-0.483597,-0.467315,2.063141,-0.327225
7564,0.851263,1.123843,0.767613,-0.693691,0.691821,-0.494769,-1.038602,1.202723,0.291918,0.029052,...,1.029105,-0.698733,-0.314993,-0.570408,2.469264,0.124633,0.106299,1.141492,3.638813,1.037877


In [27]:
# Split the DataFrame into two halves (vertically)
half = len(matchups.columns) // 2  # Number of columns to split

matchups1 = matchups.iloc[:, :half]  # First half
matchups2 = matchups.iloc[:, half:]  # Second half

matchups_reversed = pd.concat([matchups2, matchups1], axis=1)
matchups_reversed

Unnamed: 0,Height_y,Weight_y,Reach_y,SLpM._y,Str.Acc._y,SApM_y,Str.Def_y,TD Avg._y,TD Acc._y,TD Def._y,...,Str.Acc._x,SApM_x,Str.Def_x,TD Avg._x,TD Acc._x,TD Def._x,Sub. Avg._x,win_x,loss_x,draw_x
0,-1.424196,-0.874713,-0.981021,-0.089972,0.860463,-0.099588,-0.224542,0.049426,0.074183,0.347656,...,0.860463,0.146868,0.589519,0.044345,0.618519,1.112303,-0.409860,0.522720,-0.187820,-0.327225
1,-2.561926,-1.445729,-2.729655,0.654419,-0.657311,0.639781,0.499067,0.618453,0.183050,1.016722,...,0.101576,-0.146330,0.770421,-0.306217,1.489458,0.443237,-0.483597,0.646474,-0.412916,-0.327225
2,0.566831,0.124565,-0.231607,0.836120,0.185897,0.864991,-0.043639,-0.560247,-0.070973,0.761840,...,0.438859,-0.375790,0.951323,-0.224927,0.001605,0.729979,-0.483597,0.893983,-0.187820,-0.327225
3,-1.139764,-1.160221,-1.480631,1.609818,0.185897,-0.915445,1.674932,-0.184282,0.654809,1.622069,...,0.185897,0.729015,-0.224542,-0.489119,-0.506443,0.347656,-0.262386,-0.467315,-0.638013,-0.327225
4,1.135695,1.123843,0.767613,1.105742,0.354538,0.185111,0.318165,-0.519602,-0.107263,0.984862,...,1.029105,0.206358,-0.134091,-0.407829,1.562036,1.080443,-0.336123,-0.219807,-1.088205,-0.327225
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7561,-0.570899,0.124565,0.408918,-1.443943,-0.320028,-1.195895,-0.134091,-0.773633,-1.159647,-0.767456,...,0.101576,-0.902697,1.041774,0.222166,2.469264,0.156494,0.253773,1.884018,0.262372,2.402978
7562,0.566831,0.552827,0.517808,-0.863670,0.185897,-0.405535,-0.134091,-0.255411,1.017700,0.124633,...,1.113425,-0.957937,0.679970,0.638776,0.509652,0.283935,-0.114912,0.646474,1.162756,-0.327225
7563,0.566831,2.836891,1.485001,-1.004342,-1.163236,-0.099588,-1.219504,-0.230008,0.037894,0.538818,...,0.185897,-0.405535,-0.134091,-0.255411,1.017700,0.124633,-0.041175,1.512755,1.838045,-0.327225
7564,1.420128,2.380078,1.142320,-0.717136,1.029105,-0.698733,-0.314993,-0.570408,2.469264,0.124633,...,0.691821,-0.494769,-1.038602,1.202723,0.291918,0.029052,0.106299,0.275211,0.937660,-0.327225


In [28]:
matchups_reversed.columns = matchups.columns

In [29]:
matchups['Result'] = 1
matchups_reversed['Result'] = 0

In [30]:
matchups_reversed

Unnamed: 0,Height_x,Weight_x,Reach_x,SLpM._x,Str.Acc._x,SApM_x,Str.Def_x,TD Avg._x,TD Acc._x,TD Def._x,...,SApM_y,Str.Def_y,TD Avg._y,TD Acc._y,TD Def._y,Sub. Avg._y,win_y,loss_y,draw_y,Result
0,-1.424196,-0.874713,-0.981021,-0.089972,0.860463,-0.099588,-0.224542,0.049426,0.074183,0.347656,...,0.146868,0.589519,0.044345,0.618519,1.112303,-0.409860,0.522720,-0.187820,-0.327225,0
1,-2.561926,-1.445729,-2.729655,0.654419,-0.657311,0.639781,0.499067,0.618453,0.183050,1.016722,...,-0.146330,0.770421,-0.306217,1.489458,0.443237,-0.483597,0.646474,-0.412916,-0.327225,0
2,0.566831,0.124565,-0.231607,0.836120,0.185897,0.864991,-0.043639,-0.560247,-0.070973,0.761840,...,-0.375790,0.951323,-0.224927,0.001605,0.729979,-0.483597,0.893983,-0.187820,-0.327225,0
3,-1.139764,-1.160221,-1.480631,1.609818,0.185897,-0.915445,1.674932,-0.184282,0.654809,1.622069,...,0.729015,-0.224542,-0.489119,-0.506443,0.347656,-0.262386,-0.467315,-0.638013,-0.327225,0
4,1.135695,1.123843,0.767613,1.105742,0.354538,0.185111,0.318165,-0.519602,-0.107263,0.984862,...,0.206358,-0.134091,-0.407829,1.562036,1.080443,-0.336123,-0.219807,-1.088205,-0.327225,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7561,-0.570899,0.124565,0.408918,-1.443943,-0.320028,-1.195895,-0.134091,-0.773633,-1.159647,-0.767456,...,-0.902697,1.041774,0.222166,2.469264,0.156494,0.253773,1.884018,0.262372,2.402978,0
7562,0.566831,0.552827,0.517808,-0.863670,0.185897,-0.405535,-0.134091,-0.255411,1.017700,0.124633,...,-0.957937,0.679970,0.638776,0.509652,0.283935,-0.114912,0.646474,1.162756,-0.327225,0
7563,0.566831,2.836891,1.485001,-1.004342,-1.163236,-0.099588,-1.219504,-0.230008,0.037894,0.538818,...,-0.405535,-0.134091,-0.255411,1.017700,0.124633,-0.041175,1.512755,1.838045,-0.327225,0
7564,1.420128,2.380078,1.142320,-0.717136,1.029105,-0.698733,-0.314993,-0.570408,2.469264,0.124633,...,-0.494769,-1.038602,1.202723,0.291918,0.029052,0.106299,0.275211,0.937660,-0.327225,0


In [31]:
matchups_total = pd.concat([matchups, matchups_reversed], ignore_index=True)

In [32]:
matchups_total

Unnamed: 0,Height_x,Weight_x,Reach_x,SLpM._x,Str.Acc._x,SApM_x,Str.Def_x,TD Avg._x,TD Acc._x,TD Def._x,...,SApM_y,Str.Def_y,TD Avg._y,TD Acc._y,TD Def._y,Sub. Avg._y,win_y,loss_y,draw_y,Result
0,-0.855331,-0.874713,-1.230826,1.199524,0.860463,0.146868,0.589519,0.044345,0.618519,1.112303,...,-0.099588,-0.224542,0.049426,0.074183,0.347656,0.548721,1.265246,-0.412916,1.037877,1
1,-1.424196,-1.445729,-2.230045,1.129188,0.101576,-0.146330,0.770421,-0.306217,1.489458,0.443237,...,0.639781,0.499067,0.618453,0.183050,1.016722,-0.409860,-0.343561,-0.638013,-0.327225,1
2,0.282398,0.124565,-0.481411,0.185511,0.438859,-0.375790,0.951323,-0.224927,0.001605,0.729979,...,0.864991,-0.043639,-0.560247,-0.070973,0.761840,-0.483597,1.017737,0.712564,-0.327225,1
3,-1.139764,-1.160221,-1.480631,0.232402,0.185897,0.729015,-0.224542,-0.489119,-0.506443,0.347656,...,-0.915445,1.674932,-0.184282,0.654809,1.622069,1.212353,-0.962333,-1.088205,-0.327225,1
4,1.704560,1.123843,1.267222,2.424545,1.029105,0.206358,-0.134091,-0.407829,1.562036,1.080443,...,0.185111,0.318165,-0.519602,-0.107263,0.984862,-0.409860,0.770229,0.487468,-0.327225,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15127,-0.570899,0.124565,0.408918,-1.443943,-0.320028,-1.195895,-0.134091,-0.773633,-1.159647,-0.767456,...,-0.902697,1.041774,0.222166,2.469264,0.156494,0.253773,1.884018,0.262372,2.402978,0
15128,0.566831,0.552827,0.517808,-0.863670,0.185897,-0.405535,-0.134091,-0.255411,1.017700,0.124633,...,-0.957937,0.679970,0.638776,0.509652,0.283935,-0.114912,0.646474,1.162756,-0.327225,0
15129,0.566831,2.836891,1.485001,-1.004342,-1.163236,-0.099588,-1.219504,-0.230008,0.037894,0.538818,...,-0.405535,-0.134091,-0.255411,1.017700,0.124633,-0.041175,1.512755,1.838045,-0.327225,0
15130,1.420128,2.380078,1.142320,-0.717136,1.029105,-0.698733,-0.314993,-0.570408,2.469264,0.124633,...,-0.494769,-1.038602,1.202723,0.291918,0.029052,0.106299,0.275211,0.937660,-0.327225,0


In [33]:
X = matchups_total.iloc[:, 0:-1]
Y = matchups_total[['Result']]

In [34]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [35]:
# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, Y_train)

# Predict on test data
Y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(Y_test, Y_pred))

Accuracy: 0.7030062768417575


  y = column_or_1d(y, warn=True)


In [36]:
# Train random forest classifier
randomForest = RandomForestClassifier(n_estimators=100, max_depth=10, min_samples_split=20, random_state=42)
randomForest.fit(X_train, Y_train)
Y_pred = randomForest.predict(X_test)
# Evaluate the model
print("Accuracy:", accuracy_score(Y_test, Y_pred))

  return fit_method(estimator, *args, **kwargs)


Accuracy: 0.7066402378592666


In [37]:
# Train XGBoost
xgb_clf = XGBClassifier(n_estimators=100, learning_rate=0.1, max_depth=3, random_state=42)
xgb_clf.fit(X_train, Y_train)
Y_pred = xgb_clf.predict(X_test)
# Evaluate the model
print("Accuracy:", accuracy_score(Y_test, Y_pred))

Accuracy: 0.7129170796167823


In [38]:
from sklearn.metrics import classification_report
print(classification_report(Y_test, Y_pred))

              precision    recall  f1-score   support

           0       0.71      0.72      0.71      1509
           1       0.72      0.71      0.71      1518

    accuracy                           0.71      3027
   macro avg       0.71      0.71      0.71      3027
weighted avg       0.71      0.71      0.71      3027



In [39]:
ian = np.array(fighters_standardized[fighters_standardized['Full Name'] == 'Alexander Volkov']).ravel()[0: -1]
print(ian)

[2.5578572454060273 2.4086285429738052 2.0166365252453966
 1.1174648901682738 1.1134253367661968 -0.3375464967415707
 0.22771398110674343 -0.46371561094181013 1.2354341858163949
 0.7299794988185152 -0.3361226815909607 2.9978079115613103
 1.1627562306297805 -0.3272247016820785]


In [40]:
shavkat = np.array(fighters_standardized[fighters_standardized['Full Name'] == 'Ciryl Gane']).ravel()[0: -1]
print(shavkat)

[1.7045601123223073 2.2658745539704017 2.266441321951022 1.28744390909519
 1.4507085072827441 -0.6604895164556541 0.8608719469871884
 -0.41799016522374094 -0.25241897779380557 -0.19397003427369303
 -0.04117485983762957 -0.09605213565307238 -0.8631086366577064
 -0.3272247016820785]


In [41]:
shavkat_versus_ian = np.append(shavkat, ian)
shavkat_versus_ian = shavkat_versus_ian[np.newaxis, :]
len(shavkat_versus_ian)

1

In [42]:
model.predict(np.array(shavkat_versus_ian))



array([0])

In [43]:
import joblib

In [44]:
joblib.dump(xgb_clf, "model.pickle")

['model.pickle']