In [10]:
import pandas as pd
from xgboost import XGBClassifier
from sklearn.model_selection import GridSearchCV
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, accuracy_score

In [11]:
fighters_stats = pd.read_csv('data/fighter_stats_3.csv')
ufc_stats = pd.read_csv('data/ufc_stats.csv')

In [12]:
xgb = XGBClassifier()
xgb.load_model('model.json')

In [13]:
def get_fights(fighter_name):
    all_fights = ufc_stats[ufc_stats['fighter'] == fighter_name]
    return all_fights

def clean_columns(data):
    cleaned_data = data.drop(columns=['fighter', 'Unnamed: 0', 'event', 'location', 'attendance', 'time', 'scheduled_rounds', 'weight_class', 'round', 'last_round', 'id', 'result', 'winner', 'fight_date'])
    return cleaned_data

def get_last_fights(data):
    df = data.sort_values(by=['fight_date'], ascending=False)
    # Group the DataFrame by the unique fight ID
    grouped = df.groupby('id', sort=False)
    # Get the first nb_fights groups (corresponding to the first nb_fights fights)
    first_nb_fights = [group for _, group in grouped]
    # Concatenate the DataFrames for the first nb_fights fights
    result_df = pd.concat(first_nb_fights)
    # Reset the index of the result DataFrame if needed
    result_df.reset_index(drop=True, inplace=True)
    return result_df

def get_all_oponents(fighter_name):
    all_fights = get_fights(fighter_name)
    fights_ids = all_fights['id'].unique()
    filtered_fights = ufc_stats[(ufc_stats['id'].isin(fights_ids)) & (ufc_stats['fighter'] != fighter_name)]['fighter'].unique()
    return filtered_fights

def get_winner(fighter_name_1, fighter_name_2):
    all_fights = get_fights(fighter_name_1)
    fights_ids = all_fights['id'].unique()
    filtered_fights = ufc_stats[(ufc_stats['id'].isin(fights_ids)) & (ufc_stats['fighter'] != fighter_name_1) & (ufc_stats['fighter'] == fighter_name_2)]
    grouped = filtered_fights.groupby('id', sort=False)
    fights = [group.iloc[0] for _, group in grouped]
    fights = pd.DataFrame(fights)
    res = fights['winner'].value_counts()
    
    # return -1 if no fight found
    if len(res) == 0:
         return -1

    res = res.index[0]
    return 0 if res == 'L' else 1

def get_all_fighters_name():
    return ufc_stats['fighter'].unique()

def compute_extra_features(data: pd.DataFrame) -> pd.DataFrame:
    # compute nb fights
    nb_fights = len(data['id'].unique())
    extra_features = {
        'nb_fights': [nb_fights],
        'L': 0,
        'W': 0,
        'D': 0,
        'NC': 0   
    }
    #compute nb wins / loses
    grouped = data.groupby('id')
    fights = [group.iloc[0] for _, group in grouped]
    fights = pd.DataFrame(fights)
    wins_loses = fights['winner'].value_counts()
    extra_features.update(wins_loses)
    return pd.DataFrame(extra_features)

def get_fighter_data(fighter_name):
    fights = get_fights(fighter_name)
    extra_features = compute_extra_features(fights)
    fights = clean_columns(fights)
    fights = fights.median().to_frame().transpose()
    fights = pd.concat([extra_features, fights], axis=1)

    return fights

def predict_fight(fighter_1, fighter_2):
    data_1 = get_fighter_data(fighter_1)
    data_2 = get_fighter_data(fighter_2)
    display(pd.concat([data_1, data_2]))
    data = [data_1.values[0].tolist() + data_2.values[0].tolist()]
    
    preds = xgb.predict_proba(data)
    print(fighter_1, preds[0][0])
    print(fighter_2, preds[0][1])

In [14]:
# search for fighter name
fighter_name = 'Aspi'
ufc_stats[ufc_stats['fighter'].str.contains(fighter_name)]['fighter'].unique()

array(['Tom Aspinall'], dtype=object)

In [15]:
predict_fight('Conor McGregor', 'Benoit Saint Denis')

Unnamed: 0,nb_fights,L,W,D,NC,knockdowns,significant_strikes_landed,significant_strikes_attempted,significant_strikes_rate,total_strikes_landed,...,body_landed,body_attempted,leg_landed,leg_attempted,distance_landed,distance_attempted,clinch_landed,clinch_attempted,ground_landed,ground_attempted
0,14,4,10,0,0,0.0,20.0,39.5,0.54,30.0,...,3.0,4.0,1.0,2.0,15.0,31.5,1.0,1.0,1.0,1.5
0,5,1,4,0,0,0.0,19.5,40.5,0.56,33.5,...,6.0,7.5,1.0,1.0,15.5,32.0,1.5,2.0,1.0,1.5


Conor McGregor 0.6735128
Benoit Saint Denis 0.3264872


In [16]:
predict_fight('Benoit Saint Denis', 'Matt Frevola')

Unnamed: 0,nb_fights,L,W,D,NC,knockdowns,significant_strikes_landed,significant_strikes_attempted,significant_strikes_rate,total_strikes_landed,...,body_landed,body_attempted,leg_landed,leg_attempted,distance_landed,distance_attempted,clinch_landed,clinch_attempted,ground_landed,ground_attempted
0,5,1,4,0,0,0.0,19.5,40.5,0.56,33.5,...,6.0,7.5,1.0,1.0,15.5,32.0,1.5,2.0,1.0,1.5
0,9,3,5,1,0,0.0,11.0,30.0,0.38,16.0,...,2.0,6.0,2.0,3.0,9.0,27.0,1.0,2.0,0.0,0.0


Benoit Saint Denis 0.4611029
Matt Frevola 0.5388971


In [17]:
predict_fight('Sergei Pavlovich', 'Tom Aspinall')

Unnamed: 0,nb_fights,L,W,D,NC,knockdowns,significant_strikes_landed,significant_strikes_attempted,significant_strikes_rate,total_strikes_landed,...,body_landed,body_attempted,leg_landed,leg_attempted,distance_landed,distance_attempted,clinch_landed,clinch_attempted,ground_landed,ground_attempted
0,7,1,6,0,0,1.0,20.0,37.0,0.52,21.0,...,0.0,1.0,0.0,0.0,14.0,22.0,3.0,4.0,4.0,4.0
0,7,1,6,0,0,0.0,14.5,21.5,0.65,14.5,...,1.0,1.0,2.0,2.0,7.0,10.0,0.0,0.0,3.5,4.5


Sergei Pavlovich 0.57310474
Tom Aspinall 0.4268953
