In [1]:
from sys import prefix

import numpy as np
import pandas as pd
import os
import sys
import random

from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report

notebook_dir = os.getcwd()
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(project_root)

from Database.database_manager import DatabaseManager
from utils.Fighter_Style import get_all_fighter_styles
from Models.Functional_Classes.Feature_Engineering.My_OneHotEncoder import MyOneHotEncoder
from Models.Functional_Classes.logistic_regression.my_logistic_regressor import MyLogisticRegressor

In [2]:
with DatabaseManager('../../../Database/fighters.db') as db:
    fights = db.get_fights()
    fighter_ids = db.get_fighter_ids()

In [3]:
fights = pd.DataFrame(fights)
fights.head()

Unnamed: 0,fight_id,red_fighter_id,blue_fighter_id,winner_id,event_date,win_method,final_round,red_knockdowns,red_sig_strikes,red_takedowns,...,blue_takedowns,is_completed,event_url,red_sub_attempts,blue_sub_attempts,final_time_seconds,red_fighter_elo_before,blue_fighter_elo_before,red_fighter_elo_after,blue_fighter_elo_after
0,1,2899,3774,,2021-03-13,CNC,2,0,19,0,...,0,1,http://ufcstats.com/event-details/8c90c1563972...,0,0,18,1626.759208,1630.968948,1626.895862,1630.83343
1,2,1483,2126,1483.0,2021-03-13,KO/TKO\n\n \n\n Punches,1,1,16,0,...,0,1,http://ufcstats.com/event-details/8c90c1563972...,0,0,71,1599.310991,1529.172145,1609.069693,1518.465845
2,3,4115,1119,4115.0,2021-03-13,KO/TKO\n\n \n\n Punch,1,1,1,0,...,0,1,http://ufcstats.com/event-details/8c90c1563972...,0,0,22,1627.490218,1406.265316,1632.474205,1397.73995
3,4,607,4274,607.0,2021-03-13,KO/TKO\n\n \n\n Punch,2,1,48,0,...,0,1,http://ufcstats.com/event-details/8c90c1563972...,0,0,183,1559.895877,1584.538271,1568.841704,1576.059124
4,5,2665,3638,2665.0,2021-03-13,S-DEC,3,0,55,2,...,0,1,http://ufcstats.com/event-details/8c90c1563972...,1,0,300,1479.758222,1686.221243,1485.432011,1679.282146


In [4]:
fighter_styles = get_all_fighter_styles(fighter_ids)
styles_df = pd.DataFrame(fighter_styles)

In [5]:
styles_df.head(200)

Unnamed: 0,fighter_id,primary_style,secondary_style,tertiary_attributes
0,2107,Newcomer,Newcomer,Newcomer
1,1709,Newcomer,Newcomer,Newcomer
2,1541,Power Grappler,Grinding Decision Fighter,Conventional Frame (Orthodox)
3,2402,Pure Striker,Counter Decision Fighter,Conventional Frame (Switch)
4,3761,Wrestle-Boxer,Grinding Finisher,Conventional Frame (Orthodox)
...,...,...,...,...
195,4004,Wrestle-Boxer,Grinding Decision Fighter,Conventional Frame (Orthodox)
196,797,Newcomer,Newcomer,Newcomer
197,2692,Pure Striker,Pressure Decision Fighter,Conventional Frame (Orthodox)
198,1245,Power Grappler,Grinding Decision Fighter,Conventional Frame (Orthodox)


In [6]:
styles_lookup = styles_df.set_index('fighter_id')

model_data = []

for _, fight in fights.iterrows():
    red_id = fight['red_fighter_id']
    blue_id = fight['blue_fighter_id']
    winner_id = fight['winner_id']

    try:
        red_style = styles_lookup.loc[red_id]
        blue_style = styles_lookup.loc[blue_id]
    except KeyError:
        continue

    fighters = [
        {'id' :red_id, 'style':red_style},
        {'id' :blue_id, 'style':blue_style}
    ]

    random.shuffle(fighters)

    fighter_1 = fighters[0]
    fighter_2 = fighters[1]

    if fighter_1['id'] == winner_id:
        fighter_1_win = 1
    elif fighter_2['id'] == winner_id:
        fighter_1_win = 0
    else:
        fighter_1_win = 0.5

    new_row = {
        'primary_1': fighter_1['style']['primary_style'],
        'secondary_1': fighter_1['style']['secondary_style'],
        'tertiary_1': fighter_1['style']['tertiary_attributes'],

        'primary_2': fighter_2['style']['primary_style'],
        'secondary_2': fighter_2['style']['secondary_style'],
        'tertiary_2': fighter_2['style']['tertiary_attributes'],

        'fighter_1_win': fighter_1_win
    }
    model_data.append(new_row)

model_data = pd.DataFrame(model_data)
model_data.head(5)

Unnamed: 0,primary_1,secondary_1,tertiary_1,primary_2,secondary_2,tertiary_2,fighter_1_win
0,Power Grappler,Grinding Decision Fighter,Conventional Frame (Orthodox),Wrestle-Boxer,Grinding Decision Fighter,Conventional Frame (Southpaw),0.5
1,Wrestle-Boxer,Grinding Finisher,Conventional Frame (Orthodox),Wrestle-Boxer,Grinding Finisher,Conventional Frame (Orthodox),1.0
2,Power Grappler,Grinding Decision Fighter,Conventional Frame (Southpaw),Striker,Paced Decision Fighter,Conventional Frame (Orthodox),0.0
3,Wrestle-Boxer,Grinding Decision Fighter,Conventional Frame (Orthodox),Striker,Paced Power Puncher,Conventional Frame (Southpaw),1.0
4,Striker,Pressure Power Puncher,Conventional Frame (Southpaw),Wrestle-Boxer,Grinding Power Puncher,Conventional Frame (Orthodox),0.0


In [7]:

X = model_data.iloc[:, :-1]
y = model_data.iloc[:, -1]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

style_groups = {
    'primary' : ['primary_1', 'primary_2'],
    'secondary' : ['secondary_1', 'secondary_2'],
    'tertiary' : ['tertiary_1', 'tertiary_2'],
}

fitted_encoders = {}
X_train_encoded_parts = []
X_test_encoded_parts = []

for group_name, cols in style_groups.items():
    combined_series = pd.concat([X_train[col] for col in cols], ignore_index=True)

    combined_series.name = group_name

    encoder = MyOneHotEncoder()
    encoder.fit(combined_series)
    fitted_encoders[group_name] = encoder

    for col in cols:
        train_parts = encoder.transform(X_train[col], prefix=col)
        test_part = encoder.transform(X_test[col] ,prefix=col)

        X_train_encoded_parts.append(train_parts)
        X_test_encoded_parts.append(test_part)


X_train_final = pd.concat(X_train_encoded_parts, axis=1)
X_test_final = pd.concat(X_test_encoded_parts, axis=1)

In [8]:
X_train_final.head()

Unnamed: 0,primary_1_Newcomer,primary_1_Power Grappler,primary_1_Pure Striker,primary_1_Striker,primary_1_Wrestle-Boxer,primary_2_Newcomer,primary_2_Power Grappler,primary_2_Pure Striker,primary_2_Striker,primary_2_Wrestle-Boxer,...,tertiary_1_Conventional Frame (Sideways),tertiary_1_Conventional Frame (Southpaw),tertiary_1_Conventional Frame (Switch),tertiary_1_Newcomer,tertiary_2_Conventional Frame (Open Stance),tertiary_2_Conventional Frame (Orthodox),tertiary_2_Conventional Frame (Sideways),tertiary_2_Conventional Frame (Southpaw),tertiary_2_Conventional Frame (Switch),tertiary_2_Newcomer
3785,0,0,0,1,0,0,0,0,1,0,...,0,0,0,0,0,0,0,1,0,0
5374,0,0,1,0,0,0,0,0,0,1,...,0,0,0,0,0,0,0,1,0,0
1978,0,0,1,0,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
5700,0,0,0,1,0,0,0,0,0,1,...,0,0,0,0,0,1,0,0,0,0
5469,0,0,0,0,1,0,0,1,0,0,...,0,0,0,0,0,1,0,0,0,0


In [9]:
X_train_np = X_train_final.values
y_train_np = y_train.values

x_test_np = X_test_final.values
y_test_np = y_test.values

In [10]:
style_model = MyLogisticRegressor(learning_rate=0.1, n_iterations=1000)

style_model.fit(X_train_np, y_train_np)

In [11]:
predictions = style_model.predict(x_test_np)
print(predictions)

[1 0 0 ... 1 0 1]


In [12]:
from sklearn.metrics import mean_absolute_error

mae = mean_absolute_error(y_test_np, predictions)

print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"This means the model's probability is off by an average of {mae:.2%} points.")

Mean Absolute Error (MAE): 0.3964
This means the model's probability is off by an average of 39.64% points.


In [13]:
import joblib
import os

MODEL_PATH = "./style_model.pkl"
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
joblib.dump(style_model, MODEL_PATH)

['./style_model.pkl']

In [14]:
ENCODER_PATH = "./style_encoder.pkl"
os.makedirs(os.path.dirname(ENCODER_PATH), exist_ok=True)
joblib.dump(fitted_encoders, ENCODER_PATH)


['./style_encoder.pkl']