In [1]:
import pandas as pd
import numpy as np
from datetime import datetime
import sklearn
from sklearn.preprocessing import OneHotEncoder
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [3]:
# Loads in the fighter-stats df
fighters = pd.read_csv("fighter-stats-threading.csv")
fighters

Unnamed: 0,Full Name,Height(inches),Weight(lbs),Reach(inches),Stance,DOB,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,W,L,D
0,Tom Aaron,,155.0,,,"Jul 13, 1978",0.00,0%,0.00,0%,0.00,0%,0%,0.0,Record: 5-3-0,,
1,Danny Abbadi,5' 11,155.0,,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.00,0%,77%,0.0,Record: 4-6-0,,
2,Nariman Abbasov,5' 8,155.0,66.0,Orthodox,"Feb 01, 1994",3.00,20%,5.67,46%,0.00,0%,66%,0.0,Record: 28-4-0,,
3,David Abbott,6' 0,265.0,,Switch,,1.35,30%,3.55,38%,1.07,33%,66%,0.0,Record: 10-15-0,,
4,Hamdy Abdelwahab,6' 2,264.0,72.0,Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.00,75%,0%,0.0,Record: 5-0-0 (1 NC),,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4258,Dave Zitanick,,170.0,,,"Mar 05, 1980",0.00,0%,0.00,0%,0.00,0%,0%,0.0,Record: 5-7-0 (1 NC),,
4259,Alex Zuniga,,145.0,,,,0.00,0%,0.00,0%,0.00,0%,0%,0.0,Record: 6-3-0,,
4260,George Zuniga,5' 9,185.0,,,,7.64,38%,5.45,37%,0.00,0%,100%,0.0,Record: 3-1-0,,
4261,Allan Zuniga,5' 7,155.0,70.0,Orthodox,"Apr 04, 1992",3.93,52%,1.80,61%,0.00,0%,57%,1.0,Record: 13-1-0,,


In [3]:
# Drops unwanted columns
fighters = fighters.drop(columns = ['L', 'D'])
# Seperate the record into w, l, d and make three new columns
pattern = r"Record:\s(\d+)-(\d+)-(\d+)"
fighters[['win', 'loss', 'draw']] = fighters['W'].str.extract(pattern).astype(int)
# Drop the record column
fighters = fighters.drop('W', axis=1)

In [4]:
# Takes in height in ft and convert it to inches
def convert_to_inches(string):
    if pd.isna(string):
         return string
    string_list = string.split("'")
    ft = int(string_list[0].strip())
    inches = int(string_list[1].replace("\"", "").strip())
    return ft * 12 + inches

In [5]:
# Convert height to inches
fighters['Height(inches)'] = fighters['Height(inches)'].apply(convert_to_inches) 
fighters

Unnamed: 0,Full Name,Height(inches),Weight(lbs),Reach(inches),Stance,DOB,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,win,loss,draw
0,Tom Aaron,,155.0,,,"Jul 13, 1978",0.00,0%,0.00,0%,0.00,0%,0%,0.0,5,3,0
1,Danny Abbadi,71.0,155.0,,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.00,0%,77%,0.0,4,6,0
2,Nariman Abbasov,68.0,155.0,66.0,Orthodox,"Feb 01, 1994",3.00,20%,5.67,46%,0.00,0%,66%,0.0,28,4,0
3,David Abbott,72.0,265.0,,Switch,,1.35,30%,3.55,38%,1.07,33%,66%,0.0,10,15,0
4,Hamdy Abdelwahab,74.0,264.0,72.0,Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.00,75%,0%,0.0,5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4257,Dave Zitanick,,170.0,,,"Mar 05, 1980",0.00,0%,0.00,0%,0.00,0%,0%,0.0,5,7,0
4258,Alex Zuniga,,145.0,,,,0.00,0%,0.00,0%,0.00,0%,0%,0.0,6,3,0
4259,George Zuniga,69.0,185.0,,,,7.64,38%,5.45,37%,0.00,0%,100%,0.0,3,1,0
4260,Allan Zuniga,67.0,155.0,70.0,Orthodox,"Apr 04, 1992",3.93,52%,1.80,61%,0.00,0%,57%,1.0,13,1,0


In [6]:
# Rename column names
fighters = fighters.rename(columns={'Height(inches)': 'Height', 'Weight(lbs)': 'Weight', 'Reach(inches)': 'Reach'})

In [7]:
fighters

Unnamed: 0,Full Name,Height,Weight,Reach,Stance,DOB,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,win,loss,draw
0,Tom Aaron,,155.0,,,"Jul 13, 1978",0.00,0%,0.00,0%,0.00,0%,0%,0.0,5,3,0
1,Danny Abbadi,71.0,155.0,,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.00,0%,77%,0.0,4,6,0
2,Nariman Abbasov,68.0,155.0,66.0,Orthodox,"Feb 01, 1994",3.00,20%,5.67,46%,0.00,0%,66%,0.0,28,4,0
3,David Abbott,72.0,265.0,,Switch,,1.35,30%,3.55,38%,1.07,33%,66%,0.0,10,15,0
4,Hamdy Abdelwahab,74.0,264.0,72.0,Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.00,75%,0%,0.0,5,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
4257,Dave Zitanick,,170.0,,,"Mar 05, 1980",0.00,0%,0.00,0%,0.00,0%,0%,0.0,5,7,0
4258,Alex Zuniga,,145.0,,,,0.00,0%,0.00,0%,0.00,0%,0%,0.0,6,3,0
4259,George Zuniga,69.0,185.0,,,,7.64,38%,5.45,37%,0.00,0%,100%,0.0,3,1,0
4260,Allan Zuniga,67.0,155.0,70.0,Orthodox,"Apr 04, 1992",3.93,52%,1.80,61%,0.00,0%,57%,1.0,13,1,0


In [8]:
# Convert DOB column to datetime
fighters['DOB'] = pd.to_datetime(fighters['DOB'])

# Calculate age
today = datetime.today()
fighters['DOB'] = fighters['DOB'].apply(lambda x: today.year - x.year - ((today.month, today.day) < (x.month, x.day)))

In [9]:
# Rename DOB column to age
fighters.rename(columns={'DOB': 'Age'}, inplace=True)

In [10]:
# Convert percentages to float numbers
def percentages_to_float(column):
    return column.str.rstrip('%').astype(float) / 100
fighters[['Str.Acc.', 'Str.Def', 'TD Acc.', 'TD Def.']] = fighters[['Str.Acc.', 'Str.Def', 'TD Acc.', 'TD Def.']].apply(percentages_to_float)

In [11]:
# Set fighters' full name as the new index
fighters.set_index('Full Name', inplace=True)

In [12]:
# Impute missing height with weight class average height
fighters['Height'] = fighters.groupby('Weight')['Height'].transform(
    lambda x: x.fillna(x.mean())
)
# Impute missing reach with weight class average reach
fighters['Reach'] = fighters.groupby('Weight')['Reach'].transform(
    lambda x: x.fillna(x.mean())
)
# Drop rows with missing weight because it's a low percentage (2%)
fighters = fighters[~fighters['Weight'].isnull()]
# Impute missing stance value with the mode
fighters['Stance'] = fighters['Stance'].fillna('Orthodox')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fighters['Stance'] = fighters['Stance'].fillna('Orthodox')


In [13]:
columns_to_check = ['SLpM.', 'Str.Acc.', 'SApM', 'Str.Def']
fighters = fighters[(fighters['SLpM.'] > 0) & (fighters['Str.Acc.'] > 0) & (fighters['SApM'] > 0) & (fighters['Str.Def'] > 0)]
fighters.drop('Age', axis=1, inplace=True)

In [14]:
fighters

Unnamed: 0_level_0,Height,Weight,Reach,Stance,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,win,loss,draw
Full Name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
Danny Abbadi,71.0,155.0,71.588076,Orthodox,3.29,0.38,4.41,0.57,0.00,0.00,0.77,0.0,4,6,0
Nariman Abbasov,68.0,155.0,66.000000,Orthodox,3.00,0.20,5.67,0.46,0.00,0.00,0.66,0.0,28,4,0
David Abbott,72.0,265.0,77.871795,Switch,1.35,0.30,3.55,0.38,1.07,0.33,0.66,0.0,10,15,0
Hamdy Abdelwahab,74.0,264.0,72.000000,Southpaw,3.87,0.52,3.13,0.59,3.00,0.75,0.00,0.0,5,0,0
Mansur Abdul-Malik,74.0,185.0,79.000000,Orthodox,6.61,0.54,4.21,0.53,0.00,0.00,0.75,0.0,7,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
James Zikic,74.0,205.0,75.853261,Orthodox,1.47,0.35,1.60,0.44,0.50,0.25,0.74,0.5,21,10,2
Cat Zingano,66.0,145.0,68.000000,Southpaw,2.57,0.61,1.63,0.47,2.77,0.65,0.42,0.8,10,4,0
George Zuniga,69.0,185.0,75.071970,Orthodox,7.64,0.38,5.45,0.37,0.00,0.00,1.00,0.0,3,1,0
Allan Zuniga,67.0,155.0,70.000000,Orthodox,3.93,0.52,1.80,0.61,0.00,0.00,0.57,1.0,13,1,0


In [15]:
fighters = fighters[~fighters['Reach'].isnull()]

In [16]:
full_name_column = np.array(fighters.index)

In [17]:
encoder = OneHotEncoder(sparse_output=False)

# Fit and transform the data
encoded_array = encoder.fit_transform(fighters[['Stance']])
print(encoded_array)

# Convert to a DataFrame for better readability
encoded_df = pd.DataFrame(encoded_array, columns=encoder.get_feature_names_out(['Stance']))
encoded_df

[[0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 0. 0. 0. 1.]
 ...
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]
 [0. 1. 0. 0. 0.]]


Unnamed: 0,Stance_Open Stance,Stance_Orthodox,Stance_Sideways,Stance_Southpaw,Stance_Switch
0,0.0,1.0,0.0,0.0,0.0
1,0.0,1.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,1.0
3,0.0,0.0,0.0,1.0,0.0
4,0.0,1.0,0.0,0.0,0.0
...,...,...,...,...,...
3305,0.0,1.0,0.0,0.0,0.0
3306,0.0,0.0,0.0,1.0,0.0
3307,0.0,1.0,0.0,0.0,0.0
3308,0.0,1.0,0.0,0.0,0.0


In [18]:
fighters.drop('Stance', axis=1, inplace=True)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  fighters.drop('Stance', axis=1, inplace=True)


In [19]:
# Initialize the StandardScaler
scaler = StandardScaler()

# Fit and transform the data
fighters_standardized = scaler.fit_transform(fighters)

# Convert back to a DataFrame for better readability
fighters_standardized = pd.DataFrame(fighters_standardized, columns=fighters.columns)
fighters_standardized

Unnamed: 0,Height,Weight,Reach,SLpM.,Str.Acc.,SApM,Str.Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,win,loss,draw
0,0.283888,-0.303502,-0.084264,0.128367,-0.491175,0.267512,0.500568,-0.775648,-1.161977,0.890143,-0.484673,-1.209134,0.040432,-0.326988
1,-0.569874,-0.303502,-1.479995,-0.039103,-2.011313,0.804022,-0.495262,-0.775648,-1.161977,0.539555,-0.484673,1.766504,-0.409412,-0.326988
2,0.568475,2.844160,1.485216,-0.991951,-1.166792,-0.098678,-1.219503,-0.231481,0.037422,0.539555,-0.484673,-0.465224,2.064730,-0.326988
3,1.137650,2.815545,0.018622,0.463308,0.691155,-0.277515,0.681629,0.750053,1.563929,-1.563972,-0.484673,-1.085149,-1.309101,-0.326988
4,1.137650,0.554952,1.767008,2.045612,0.860059,0.182351,0.138448,-0.775648,-1.161977,0.826399,-0.484673,-0.837179,-1.309101,-0.326988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3305,1.137650,1.127254,0.981048,-0.922653,-0.744531,-0.928992,-0.676323,-0.521364,-0.253342,0.794528,-0.114958,0.898610,0.940120,2.413087
3306,-1.139049,-0.589653,-0.980456,-0.287421,1.451224,-0.916218,-0.404732,0.633083,1.200475,-0.225364,0.106871,-0.465224,-0.409412,-0.326988
3307,-0.285287,0.554952,0.785906,2.640421,-0.491175,0.710346,-1.310033,-0.775648,-1.161977,1.623190,-0.484673,-1.333119,-1.084179,-0.326988
3308,-0.854462,-0.303502,-0.480917,0.497957,0.691155,-0.843831,0.862689,-0.775648,-1.161977,0.252710,0.254757,-0.093270,-1.084179,-0.326988


In [20]:
fighters_standardized['Full Name'] = full_name_column
fighters_standardized.drop_duplicates(inplace=True)

In [21]:
fights = pd.read_csv("fight-matchups.csv")
fights.head(10)

Unnamed: 0,fighter1,fighter2
0,Petr Yan,Deiveson Figueiredo
1,Yan Xiaonan,Tabatha Ricci
2,Muslim Salikhov,Song Kenan
3,Gabriella Fernandes,Wang Cong
4,Carlos Ulberg,Volkan Oezdemir
5,Zhang Mingyang,Ozzy Diaz
6,SuYoung You,Baergeng Jieleyisi
7,DongHun Choi,Kiru Sahota
8,Shi Ming,Feng Xiaocan
9,Carlos Hernandez,Nyamjargal Tumendemberel


In [22]:
matchups = fights.merge(fighters_standardized, left_on = 'fighter1', right_on = 'Full Name')
fighter2 = matchups['fighter2']
matchups['Full Name'] = fighter2
matchups.drop('fighter2', axis=1, inplace=True)

In [23]:
matchups = matchups.merge(fighters_standardized, left_on = 'Full Name', right_on = 'Full Name')

In [24]:
matchups.rename(columns={'Full Name': 'fighter2'}, inplace=True)

In [25]:
matchups.drop(columns=['fighter1', 'fighter2'], axis=1, inplace=True)

In [26]:
matchups

Unnamed: 0,Height_x,Weight_x,Reach_x,SLpM._x,Str.Acc._x,SApM_x,Str.Def_x,TD Avg._x,TD Acc._x,TD Def._x,...,Str.Acc._y,SApM_y,Str.Def_y,TD Avg._y,TD Acc._y,TD Def._y,Sub. Avg._y,win_y,loss_y,draw_y
0,-0.854462,-0.875804,-1.230225,1.179387,0.860059,0.148287,0.591098,0.043145,0.618948,1.113244,...,0.860059,-0.098678,-0.223672,0.048231,0.073767,0.348325,0.550529,1.270565,-0.409412,1.043050
1,-1.423636,-1.448106,-2.229303,1.110089,0.099990,-0.145516,0.772159,-0.307766,1.491238,0.443940,...,-0.660079,0.642217,0.500568,0.617826,0.182803,1.017629,-0.410730,-0.341240,-0.634335,-0.326988
2,0.283888,0.125725,-0.480917,0.180341,0.437799,-0.375449,0.953219,-0.226395,0.001076,0.730785,...,0.184442,0.867892,-0.042612,-0.562049,-0.071615,0.762656,-0.484673,1.022595,0.715198,-0.326988
3,-1.139049,-1.161955,-1.479995,0.226539,0.184442,0.731636,-0.223672,-0.490850,-0.507760,0.348325,...,0.184442,-0.916218,1.677459,-0.185710,0.655294,1.623190,1.216017,-0.961164,-1.084179,-0.326988
4,1.706825,1.127254,1.267469,2.386328,1.028964,0.207899,-0.133142,-0.409479,1.563929,1.081372,...,0.353347,0.186609,0.319508,-0.521364,-0.107960,0.985758,-0.410730,0.774625,0.490276,-0.326988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7689,-0.000699,0.125725,0.406758,-0.737858,0.099990,-0.903444,1.043749,0.221144,2.472565,0.157095,...,-0.322270,-1.197247,-0.133142,-0.775648,-1.161977,-0.767182,1.068131,0.898610,1.614886,1.043050
7690,0.853063,1.127254,0.767930,-0.125726,1.113416,-0.958798,0.681629,0.638169,0.509912,0.284582,...,0.184442,-0.405255,-0.133142,-0.256909,1.018748,0.125224,-0.041015,1.518535,1.839808,-0.326988
7691,0.568475,0.554952,0.518161,-0.853355,0.184442,-0.405255,-0.133142,-0.256909,1.018748,0.125224,...,-1.166792,-0.098678,-1.219503,-0.231481,0.037422,0.539555,-0.484673,-0.465224,2.064730,-0.326988
7692,0.853063,1.127254,0.767930,-0.685885,0.691155,-0.494674,-1.038443,1.202678,0.291839,0.029609,...,1.028964,-0.699059,-0.314202,-0.572221,2.472565,0.125224,0.106871,1.146580,3.639184,1.043050


In [27]:
# Split the DataFrame into two halves (vertically)
half = len(matchups.columns) // 2  # Number of columns to split

matchups1 = matchups.iloc[:, :half]  # First half
matchups2 = matchups.iloc[:, half:]  # Second half

matchups_reversed = pd.concat([matchups2, matchups1], axis=1)
matchups_reversed

Unnamed: 0,Height_y,Weight_y,Reach_y,SLpM._y,Str.Acc._y,SApM_y,Str.Def_y,TD Avg._y,TD Acc._y,TD Def._y,...,Str.Acc._x,SApM_x,Str.Def_x,TD Avg._x,TD Acc._x,TD Def._x,Sub. Avg._x,win_x,loss_x,draw_x
0,-1.423636,-0.875804,-0.980456,-0.091077,0.860059,-0.098678,-0.223672,0.048231,0.073767,0.348325,...,0.860059,0.148287,0.591098,0.043145,0.618948,1.113244,-0.410730,0.526655,-0.184490,-0.326988
1,-2.561986,-1.448106,-2.728842,0.642327,-0.660079,0.642217,0.500568,0.617826,0.182803,1.017629,...,0.099990,-0.145516,0.772159,-0.307766,1.491238,0.443940,-0.484673,0.650640,-0.409412,-0.326988
2,0.568475,0.125725,-0.231148,0.821347,0.184442,0.867892,-0.042612,-0.562049,-0.071615,0.762656,...,0.437799,-0.375449,0.953219,-0.226395,0.001076,0.730785,-0.484673,0.898610,-0.184490,-0.326988
3,-1.139049,-1.161955,-1.479995,1.583626,0.184442,-0.916218,1.677459,-0.185710,0.655294,1.623190,...,0.184442,0.731636,-0.223672,-0.490850,-0.507760,0.348325,-0.262844,-0.465224,-0.634335,-0.326988
4,1.137650,1.127254,0.767930,1.086990,0.353347,0.186609,0.319508,-0.521364,-0.107960,0.985758,...,1.028964,0.207899,-0.133142,-0.409479,1.563929,1.081372,-0.336787,-0.217255,-1.084179,-0.326988
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7689,-0.569874,0.125725,0.406758,-1.425064,-0.322270,-1.197247,-0.133142,-0.775648,-1.161977,-0.767182,...,0.099990,-0.903444,1.043749,0.221144,2.472565,0.157095,0.254757,1.890489,0.265354,2.413087
7690,0.568475,0.554952,0.518161,-0.853355,0.184442,-0.405255,-0.133142,-0.256909,1.018748,0.125224,...,1.113416,-0.958798,0.681629,0.638169,0.509912,0.284582,-0.114958,0.650640,1.165042,-0.326988
7691,0.568475,2.844160,1.485216,-0.991951,-1.166792,-0.098678,-1.219503,-0.231481,0.037422,0.539555,...,0.184442,-0.405255,-0.133142,-0.256909,1.018748,0.125224,-0.041015,1.518535,1.839808,-0.326988
7692,1.422237,2.386319,1.142584,-0.708984,1.028964,-0.699059,-0.314202,-0.572221,2.472565,0.125224,...,0.691155,-0.494674,-1.038443,1.202678,0.291839,0.029609,0.106871,0.278685,0.940120,-0.326988


In [28]:
matchups_reversed.columns = matchups.columns

In [29]:
matchups['Result'] = 1
matchups_reversed['Result'] = 0

In [30]:
matchups_reversed

Unnamed: 0,Height_x,Weight_x,Reach_x,SLpM._x,Str.Acc._x,SApM_x,Str.Def_x,TD Avg._x,TD Acc._x,TD Def._x,...,SApM_y,Str.Def_y,TD Avg._y,TD Acc._y,TD Def._y,Sub. Avg._y,win_y,loss_y,draw_y,Result
0,-1.423636,-0.875804,-0.980456,-0.091077,0.860059,-0.098678,-0.223672,0.048231,0.073767,0.348325,...,0.148287,0.591098,0.043145,0.618948,1.113244,-0.410730,0.526655,-0.184490,-0.326988,0
1,-2.561986,-1.448106,-2.728842,0.642327,-0.660079,0.642217,0.500568,0.617826,0.182803,1.017629,...,-0.145516,0.772159,-0.307766,1.491238,0.443940,-0.484673,0.650640,-0.409412,-0.326988,0
2,0.568475,0.125725,-0.231148,0.821347,0.184442,0.867892,-0.042612,-0.562049,-0.071615,0.762656,...,-0.375449,0.953219,-0.226395,0.001076,0.730785,-0.484673,0.898610,-0.184490,-0.326988,0
3,-1.139049,-1.161955,-1.479995,1.583626,0.184442,-0.916218,1.677459,-0.185710,0.655294,1.623190,...,0.731636,-0.223672,-0.490850,-0.507760,0.348325,-0.262844,-0.465224,-0.634335,-0.326988,0
4,1.137650,1.127254,0.767930,1.086990,0.353347,0.186609,0.319508,-0.521364,-0.107960,0.985758,...,0.207899,-0.133142,-0.409479,1.563929,1.081372,-0.336787,-0.217255,-1.084179,-0.326988,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7689,-0.569874,0.125725,0.406758,-1.425064,-0.322270,-1.197247,-0.133142,-0.775648,-1.161977,-0.767182,...,-0.903444,1.043749,0.221144,2.472565,0.157095,0.254757,1.890489,0.265354,2.413087,0
7690,0.568475,0.554952,0.518161,-0.853355,0.184442,-0.405255,-0.133142,-0.256909,1.018748,0.125224,...,-0.958798,0.681629,0.638169,0.509912,0.284582,-0.114958,0.650640,1.165042,-0.326988,0
7691,0.568475,2.844160,1.485216,-0.991951,-1.166792,-0.098678,-1.219503,-0.231481,0.037422,0.539555,...,-0.405255,-0.133142,-0.256909,1.018748,0.125224,-0.041015,1.518535,1.839808,-0.326988,0
7692,1.422237,2.386319,1.142584,-0.708984,1.028964,-0.699059,-0.314202,-0.572221,2.472565,0.125224,...,-0.494674,-1.038443,1.202678,0.291839,0.029609,0.106871,0.278685,0.940120,-0.326988,0


In [31]:
matchups_total = pd.concat([matchups, matchups_reversed], ignore_index=True)

In [32]:
matchups_total

Unnamed: 0,Height_x,Weight_x,Reach_x,SLpM._x,Str.Acc._x,SApM_x,Str.Def_x,TD Avg._x,TD Acc._x,TD Def._x,...,SApM_y,Str.Def_y,TD Avg._y,TD Acc._y,TD Def._y,Sub. Avg._y,win_y,loss_y,draw_y,Result
0,-0.854462,-0.875804,-1.230225,1.179387,0.860059,0.148287,0.591098,0.043145,0.618948,1.113244,...,-0.098678,-0.223672,0.048231,0.073767,0.348325,0.550529,1.270565,-0.409412,1.043050,1
1,-1.423636,-1.448106,-2.229303,1.110089,0.099990,-0.145516,0.772159,-0.307766,1.491238,0.443940,...,0.642217,0.500568,0.617826,0.182803,1.017629,-0.410730,-0.341240,-0.634335,-0.326988,1
2,0.283888,0.125725,-0.480917,0.180341,0.437799,-0.375449,0.953219,-0.226395,0.001076,0.730785,...,0.867892,-0.042612,-0.562049,-0.071615,0.762656,-0.484673,1.022595,0.715198,-0.326988,1
3,-1.139049,-1.161955,-1.479995,0.226539,0.184442,0.731636,-0.223672,-0.490850,-0.507760,0.348325,...,-0.916218,1.677459,-0.185710,0.655294,1.623190,1.216017,-0.961164,-1.084179,-0.326988,1
4,1.706825,1.127254,1.267469,2.386328,1.028964,0.207899,-0.133142,-0.409479,1.563929,1.081372,...,0.186609,0.319508,-0.521364,-0.107960,0.985758,-0.410730,0.774625,0.490276,-0.326988,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
15383,-0.569874,0.125725,0.406758,-1.425064,-0.322270,-1.197247,-0.133142,-0.775648,-1.161977,-0.767182,...,-0.903444,1.043749,0.221144,2.472565,0.157095,0.254757,1.890489,0.265354,2.413087,0
15384,0.568475,0.554952,0.518161,-0.853355,0.184442,-0.405255,-0.133142,-0.256909,1.018748,0.125224,...,-0.958798,0.681629,0.638169,0.509912,0.284582,-0.114958,0.650640,1.165042,-0.326988,0
15385,0.568475,2.844160,1.485216,-0.991951,-1.166792,-0.098678,-1.219503,-0.231481,0.037422,0.539555,...,-0.405255,-0.133142,-0.256909,1.018748,0.125224,-0.041015,1.518535,1.839808,-0.326988,0
15386,1.422237,2.386319,1.142584,-0.708984,1.028964,-0.699059,-0.314202,-0.572221,2.472565,0.125224,...,-0.494674,-1.038443,1.202678,0.291839,0.029609,0.106871,0.278685,0.940120,-0.326988,0


In [33]:
X = matchups_total.iloc[:, 0:-1]
Y = matchups_total[['Result']]

In [34]:
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.2, random_state=42)

In [35]:
# Train logistic regression model
model = LogisticRegression()
model.fit(X_train, Y_train)

# Predict on test data
Y_pred = model.predict(X_test)

# Evaluate the model
print("Accuracy:", accuracy_score(Y_test, Y_pred))

Accuracy: 0.6890838206627681


  y = column_or_1d(y, warn=True)


In [36]:
from sklearn.metrics import classification_report
print(classification_report(Y_test, Y_pred))

              precision    recall  f1-score   support

           0       0.69      0.68      0.68      1538
           1       0.68      0.70      0.69      1540

    accuracy                           0.69      3078
   macro avg       0.69      0.69      0.69      3078
weighted avg       0.69      0.69      0.69      3078



In [37]:
ian = np.array(fighters_standardized[fighters_standardized['Full Name'] == 'Alexander Volkov']).ravel()[0: -1]
print(ian)

[2.5605867943389655 2.4149337675003517 2.016776945695925
 1.1909368192667684 1.1134157776161828 -0.33286903984959837
 0.31950823368846687 -0.5467924674054148 1.127784159916913
 0.7626561640896571 -0.33678703961352974 3.006353813082722
 0.9401197982201033 -0.32698776331575624]


In [38]:
shavkat = np.array(fighters_standardized[fighters_standardized['Full Name'] == 'Ciryl Gane']).ravel()[0: -1]
print(shavkat)

[1.7068247081499959 2.2718582157516742 2.2665463442536296
 1.398830896595385 1.3667721485625544 -0.6777685918405547
 1.0437487770946121 -0.4806787605768157 -0.3987235504040868
 0.029608834096025372 -0.11495795342605632 -0.21725457408272847
 -0.8592565846399138 -0.32698776331575624]


In [39]:
shavkat_versus_ian = np.append(shavkat, ian)
shavkat_versus_ian = shavkat_versus_ian[np.newaxis, :]
len(shavkat_versus_ian)

1

In [40]:
model.predict(np.array(shavkat_versus_ian))



array([0])