In [2]:
import pandas as pd
import numpy as np
import joblib
import os
import sys
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

notebook_dir = os.getcwd() 
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
sys.path.append(project_root)

from Models.Functional_Classes.My_OneHotEncoder import MyOneHotEncoder
from Database.database_manager import DatabaseManager


In [3]:
def load_fight_data():
    with DatabaseManager('../Database/fighters.db') as db:
        fights = db.get_fights()

    if not fights: 
        print("No fights found in the database. Please run the main.py file first to fetch the data.")
        return pd.DataFrame()
    
    dataframe = pd.DataFrame(fights)
    return dataframe

In [4]:
def create_proxy_scores(df: pd.DataFrame) -> pd.Series:
    df['win_method_clean'] = df['win_method'].str.split('\n').str[0]

    def get_detailed_score(row):
        base_score = 1.0 

        win_method = row['win_method_clean']
        if pd.isna(win_method) or pd.isna(row['winner_id']):
            return 1.0 

        if 'KO' in win_method or 'SUB' in win_method:
            base_score = 1.3 
        elif 'U-DEC' in win_method:
            base_score = 1.0 
        elif 'S-DEC' in win_method or 'M-DEC' in win_method:
            base_score = 0.85 
        elif 'Overturned' in win_method or 'CNC' in win_method:
            return 1.0 

        modifier = 0.0
        final_round = row['final_round']
        
        if base_score > 1.0:
            if final_round == 1:
                modifier += 0.7 
            elif final_round == 2:
                modifier += 0.4 
            
            if row['final_time_seconds'] < 90 and final_round == 1:
                modifier += 0.1

        winner_is_red = row['winner_id'] == row['red_fighter_id']
        
        if winner_is_red:
            w_strikes, l_strikes = row['red_sig_strikes'], row['blue_sig_strikes']
            w_td, l_td = row['red_takedowns'], row['blue_takedowns']
            w_kd, l_kd = row['red_knockdowns'], row['blue_knockdowns']
        else:
            w_strikes, l_strikes = row['blue_sig_strikes'], row['red_sig_strikes']
            w_td, l_td = row['blue_takedowns'], row['red_takedowns']
            w_kd, l_kd = row['blue_knockdowns'], row['red_knockdowns']
            
        strike_diff = w_strikes - l_strikes
        td_diff = w_td - l_td

        modifier += (strike_diff // 25) * 0.1
        modifier += (td_diff // 2) * 0.1
        modifier += w_kd * 0.2
        
        modifier -= l_kd * 0.3

        final_score = base_score + modifier
        return max(0.5, min(final_score, 2.5)) 

    scores = df.apply(get_detailed_score, axis=1)
    print("-> Detailed proxy scores generated successfully.")
    return scores

In [5]:
def create_features(df: pd.DataFrame) -> pd.DataFrame:
    winner_is_red = df['winner_id'] == df['red_fighter_id'] 
    features = pd.DataFrame(index=df.index)

    features['sig_strike_differential'] = np.where(
        winner_is_red,
        df['red_sig_strikes'] - df['blue_sig_strikes'],
        df['blue_sig_strikes'] - df['red_sig_strikes']
    )

    features['takedown_differential'] = np.where(
        winner_is_red,
        df['red_takedowns'] - df['blue_takedowns'],
        df['blue_takedowns'] - df['red_takedowns']
    )
    
    features['knockdown_differential'] = np.where(
        winner_is_red,
        df['red_knockdowns'] - df['blue_knockdowns'],
        df['blue_knockdowns'] - df['red_knockdowns']
    )

    features['sub_differential'] = np.where(
        winner_is_red,
        df['red_sub_attempts'] - df['blue_sub_attempts'],
        df['blue_sub_attempts'] - df['red_sub_attempts']
    )

    features['total_sig_strikes'] = df['red_sig_strikes'] + df['blue_sig_strikes']
    features['total_takedowns'] = df['red_takedowns'] + df['blue_takedowns']
    features['total_knockdowns'] = df['red_knockdowns'] + df['blue_knockdowns']
    features['total_submissions'] = df['red_sub_attempts'] + df['blue_sub_attempts']



    features['final_round'] = df['final_round']
    features['final_time_seconds'] = df['final_time_seconds']

    

    return features


In [8]:
df = load_fight_data()

X_raw = df 
y = create_proxy_scores(df)

X_aligned, y_aligned = X_raw.align(y, join='inner', axis=0)

X_train_raw, X_test_raw, y_train, y_test = train_test_split(
    X_aligned, y_aligned, test_size=0.2, random_state=42
)

X_train_num = create_features(X_train_raw)
X_test_num = create_features(X_test_raw)

X_train_raw['win_method_clean'] = X_train_raw['win_method'].str.split('\n').str[0]
X_test_raw['win_method_clean'] = X_test_raw['win_method'].str.split('\n').str[0]

encoder = MyOneHotEncoder()

print(X_train_raw['win_method_clean'])
encoder.fit(X_train_raw['win_method_clean'])

encoded_train_df = encoder.transform(X_train_raw['win_method_clean'])
encoded_test_df = encoder.transform(X_test_raw['win_method_clean'])

X_train_final = pd.concat([X_train_num, encoded_train_df], axis=1)
X_test_final = pd.concat([X_test_num, encoded_test_df], axis=1)

X_train_final.fillna(0, inplace=True)
X_test_final.fillna(0, inplace=True)

X_train_final.head()

-> Detailed proxy scores generated successfully.
3785     U-DEC
5374    KO/TKO
1978    KO/TKO
5700     U-DEC
5469     U-DEC
         ...  
5734    KO/TKO
5191     U-DEC
5390     S-DEC
860      U-DEC
7270    KO/TKO
Name: win_method_clean, Length: 6659, dtype: object


Unnamed: 0,sig_strike_differential,takedown_differential,knockdown_differential,sub_differential,total_sig_strikes,total_takedowns,total_knockdowns,total_submissions,final_round,final_time_seconds,win_method_clean_CNC,win_method_clean_DQ,win_method_clean_KO/TKO,win_method_clean_M-DEC,win_method_clean_Other,win_method_clean_Overturned,win_method_clean_S-DEC,win_method_clean_SUB,win_method_clean_U-DEC
3785,-2,0,0,0,130,2,0,0,3,300,0,0,0,0,0,0,0,0,1
5374,14,0,1,0,68,0,1,0,2,60,0,0,1,0,0,0,0,0,0
1978,4,0,1,0,4,0,1,0,1,7,0,0,1,0,0,0,0,0,0
5700,32,-1,0,0,100,1,0,0,3,300,0,0,0,0,0,0,0,0,1
5469,75,2,0,0,165,2,0,0,3,300,0,0,0,0,0,0,0,0,1


In [7]:
from Models.Functional_Classes.My_XGBoost import My_XGBoost_Regressor

Brainfuck_Model = My_XGBoost_Regressor()
Brainfuck_Model.fit(X_train_final.values, y_train.values)

predictions = Brainfuck_Model.predict(X_test_final.values)


KeyboardInterrupt: 

In [22]:
np.set_printoptions(threshold=sys.maxsize)
print(predictions)

[1.77039899 1.28135651 1.09621872 0.66542294 1.09969843 0.84428604
 1.00926178 1.20563559 0.74437655 1.60929139 1.96070431 2.00461283
 2.06456483 0.69281906 1.90200067 1.2632386  1.57443408 2.27672498
 0.90680846 1.49972226 1.55610717 1.19737245 1.39325085 1.90200067
 1.18850924 0.82948498 1.01417988 2.08302812 1.10431927 1.01800604
 1.30500165 1.21381617 2.00461283 2.19290334 2.27672498 2.09305025
 1.678344   1.00926178 2.08310736 1.39217179 1.29400895 2.06456483
 1.44125288 2.00461283 1.85886496 1.28588689 1.18738343 0.9915608
 2.05837941 1.72471497 1.28788639 2.21007262 1.096725   1.90200067
 0.89458236 0.9642152  2.01637575 0.89458236 1.20563559 1.79756923
 0.9638527  2.27672498 2.27672498 1.92727868 0.87292265 2.21007262
 0.94574714 0.90172578 1.9145916  1.19900399 2.00461283 0.77712272
 2.0630623  1.01800604 1.42704223 1.7189051  2.21007262 2.10075215
 1.64020359 2.09764068 2.01637575 0.9934482  0.53063952 1.18738343
 1.00437223 1.59956303 1.11424601 0.90172578 1.69205264 2.39181

In [26]:
import os

MODEL_PATH = "../Elo_system/Performance_Vector_Modal/dominance_modal.pkl"
os.makedirs(os.path.dirname(MODEL_PATH), exist_ok=True)
joblib.dump(Brainfuck_Model, MODEL_PATH)

['../Elo_system/Performance_Vector_Modal/dominance_modal.pkl']