In [1]:
%reload_ext autoreload
%autoreload 2

In [3]:
from catboost import CatBoostClassifier
import pandas as pd
import json
from pandas import json_normalize
from tqdm import tqdm_notebook as tqdm
import numpy as np
import ast
import time
import datetime

import sys
import os
sys.path.append(os.path.join(sys.path[0], '../../core/'))

from train_utils import calculate_roi, get_winner_favorite, combine_df, parse_odds

pd.set_option('display.precision',6)
pd.set_option('display.max_columns', None)
pd.options.mode.chained_assignment = None

In [5]:
# Load data
def load_fighters():
    # fighters_df = pd.read_csv("data/Catboost_v1_0/fighters_df.csv", index_col=0)
    fighters_df = pd.read_csv("./data_models/fighters_df.csv", index_col=0)

    fighters_df["dateOfBirth"] = pd.to_datetime(fighters_df["dateOfBirth"])
    fighters_cols = [
        # "id",
        "name",
        "weight",
        "height",
        "armSpan",
        "legSwing",
        "weightCategory.id",
        "weightCategory.name",
        "dateOfBirth",
        "country",
        "city",
        "timezone",
    ]
    fighters_df = fighters_df[fighters_cols]
    # fighters_df.set_index("id", inplace=True)
    f_name_dict = fighters_df['name'].to_dict()

    return fighters_df, f_name_dict


fighters_df, f_name_dict = load_fighters()
with open('data_models/generated_features_08.04.2021.txt', 'r') as outfile:
    generated_features = json.load(outfile)
num_cols = [i[3:] for i in generated_features['fighter1_stats']][:-8]

static_cols = ['country', 'city', 'armSpan', 'height', 'legSwing', 'timezone', 'weight']
f1_static_cols = ['f1_' + col for col in static_cols]
f2_static_cols = ['f2_' + col for col in static_cols]

f_stats_events_cumulative = pd.read_csv('data_models/PROD_f_stats_events_cumulative_prod_08.04.2021.csv', index_col=0)
f_stats_events_cumulative['eventDate.date'] = pd.to_datetime(f_stats_events_cumulative['eventDate.date'])

In [6]:
f1_id = '2597' # 'Marvin Vettori'
f2_id = '3418' # 'Kevin Holland'
fighters_df.loc[int(f1_id), ['dateOfBirth']]


dateOfBirth    1995-06-28 00:00:00
Name: 2597, dtype: object

In [9]:
'http://127.0.0.1:8001/predict_fight?f1_id=2597&f2_id=3418&f1_odd=2.95&f2_odd=1.43&weightCategory_id=4&city=Abu%20Dhabi&country=United%20Arab%20Emirates&event_date=2021-01-16&event_name=UFC%20Fight%20Night&time_zone=Asia/Dubai'


'http://127.0.0.1:8001/predict_fight?f1_id=2597&f2_id=3418&f1_odd=2.95&f2_odd=1.43&weightCategory_id=4&city=Abu%20Dhabi&country=United%20Arab%20Emirates&event_date=2021-01-16&event_name=UFC%20Fight%20Night&time_zone=Asia/Dubai'

In [13]:
event_date = datetime.date(2021, 1, 16)
f1_odd, f2_odd = [2.95, 1.43]
weightCategory_id, city, country, is_fight_night, timezone = '4', 'Abu Dhabi', 'United Arab Emirates', True, 'Asia/Dubai'


### Static stats
f1_birthDate = fighters_df.loc[int(f1_id), ['dateOfBirth']]
f1_static_stats = fighters_df.loc[int(f1_id), static_cols].values
f1_age = ((pd.to_datetime(event_date) - pd.to_datetime(f1_birthDate)) / 365).dt.days.values[0]

f2_birthDate = fighters_df.loc[int(f2_id), ['dateOfBirth']]
f2_static_stats = fighters_df.loc[int(f2_id), static_cols].values
f2_age = ((pd.to_datetime(event_date) - pd.to_datetime(f2_birthDate)) / 365).dt.days.values[0]


### Dynamic stats
fighter1_stats = f_stats_events_cumulative[
    (f_stats_events_cumulative['fighterId'] == int(f1_id)) &
    (f_stats_events_cumulative['eventDate.date'] < pd.to_datetime(event_date))]
fighter1_stats_NaNs = (fighter1_stats[num_cols].isna().sum(axis=1) / fighter1_stats.shape[1])
fighter1_stats = pd.DataFrame(fighter1_stats[fighter1_stats_NaNs < 0.2].iloc[-1]).T.reset_index(drop=True)

fighter2_stats = f_stats_events_cumulative[
    (f_stats_events_cumulative['fighterId'] == int(f2_id)) &
    (f_stats_events_cumulative['eventDate.date'] < pd.to_datetime(event_date))]
fighter2_stats_NaNs = (fighter2_stats[num_cols].isna().sum(axis=1) / fighter2_stats.shape[1])
fighter2_stats = pd.DataFrame(fighter2_stats[fighter2_stats_NaNs < 0.2].iloc[-1]).T.reset_index(drop=True)
fighter2_stats

Unnamed: 0,cumsum_duration,cumsum_winner,cumsum_hitsTotal,cumsum_hitsSuccessful,cumsum_takedownTotal,cumsum_takedownSuccessful,cumsum_submissionAttempts,cumsum_takeovers,cumsum_accentedHitsTotal,cumsum_accentedHitsSuccessful,cumsum_knockdowns,cumsum_protectionPassage,cumsum_hitsHeadTotal,cumsum_hitsHeadSuccessful,cumsum_hitsBodyTotal,cumsum_hitsBodySuccessful,cumsum_hitsLegsTotal,cumsum_hitsLegsSuccessful,cumsum_accentedHitsPositionDistanceTotal,cumsum_accentedHitsPositionDistanceSuccessful,cumsum_accentedHitsPositionClinchTotal,cumsum_accentedHitsPositionClinchSuccessful,cumsum_accentedHitsPositionParterTotal,cumsum_accentedHitsPositionParterSuccessful,cumsum_winMethods_[DEC],cumsum_winMethods_[DQ],cumsum_winMethods_[KO],cumsum_winMethods_[SUB],current_duration,winner,current_hitsTotal,current_hitsSuccessful,current_takedownTotal,current_takedownSuccessful,current_submissionAttempts,current_takeovers,current_accentedHitsTotal,current_accentedHitsSuccessful,current_knockdowns,current_protectionPassage,current_hitsHeadTotal,current_hitsHeadSuccessful,current_hitsBodyTotal,current_hitsBodySuccessful,current_hitsLegsTotal,current_hitsLegsSuccessful,current_accentedHitsPositionDistanceTotal,current_accentedHitsPositionDistanceSuccessful,current_accentedHitsPositionClinchTotal,current_accentedHitsPositionClinchSuccessful,current_accentedHitsPositionParterTotal,current_accentedHitsPositionParterSuccessful,current_winMethods_[DEC],current_winMethods_[DQ],current_winMethods_[KO],current_winMethods_[SUB],age,eventDate.date,fighterId,fighter_nbr,odds,winnerId,count_of_fights,hits_accuracy,takedown_accuracy,accentedHits_accuracy,hitsHead_accuracy,hitsBody_accuracy,hitsLegs_accuracy,accentedHitsPositionDistance_accuracy,accentedHitsPositionClinch_accuracy,accentedHitsPositionParter_accuracy,DEC_percent,DQ_percent,KO_percent,SUB_percent,hits_PM,takedown_PM,accentedHits_PM,hitsHead_PM,hitsBody_PM,hitsLegs_PM,accentedHitsPositionDistance_PM,accentedHitsPositionClinch_PM,accentedHitsPositionParter_PM,knockdowns_PM,protectionPassage_PM,win_streak,loose_streak,fighterName
0,300.0,0,14.0,2.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,300.0,False,14.0,2.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,25.0,2020-02-08,3418,2,1.48,3420.0,0,0.142857,,0.0,0.0,,,0.0,,,inf,,,,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,Austin Lingo


In [14]:
fighter1_stats = f_stats_events_cumulative[
    (f_stats_events_cumulative['fighterId'] == int(f1_id)) &
    (f_stats_events_cumulative['eventDate.date'] < pd.to_datetime(event_date))]

fighter1_stats


Unnamed: 0,cumsum_duration,cumsum_winner,cumsum_hitsTotal,cumsum_hitsSuccessful,cumsum_takedownTotal,cumsum_takedownSuccessful,cumsum_submissionAttempts,cumsum_takeovers,cumsum_accentedHitsTotal,cumsum_accentedHitsSuccessful,cumsum_knockdowns,cumsum_protectionPassage,cumsum_hitsHeadTotal,cumsum_hitsHeadSuccessful,cumsum_hitsBodyTotal,cumsum_hitsBodySuccessful,cumsum_hitsLegsTotal,cumsum_hitsLegsSuccessful,cumsum_accentedHitsPositionDistanceTotal,cumsum_accentedHitsPositionDistanceSuccessful,cumsum_accentedHitsPositionClinchTotal,cumsum_accentedHitsPositionClinchSuccessful,cumsum_accentedHitsPositionParterTotal,cumsum_accentedHitsPositionParterSuccessful,cumsum_winMethods_[DEC],cumsum_winMethods_[DQ],cumsum_winMethods_[KO],cumsum_winMethods_[SUB],current_duration,winner,current_hitsTotal,current_hitsSuccessful,current_takedownTotal,current_takedownSuccessful,current_submissionAttempts,current_takeovers,current_accentedHitsTotal,current_accentedHitsSuccessful,current_knockdowns,current_protectionPassage,current_hitsHeadTotal,current_hitsHeadSuccessful,current_hitsBodyTotal,current_hitsBodySuccessful,current_hitsLegsTotal,current_hitsLegsSuccessful,current_accentedHitsPositionDistanceTotal,current_accentedHitsPositionDistanceSuccessful,current_accentedHitsPositionClinchTotal,current_accentedHitsPositionClinchSuccessful,current_accentedHitsPositionParterTotal,current_accentedHitsPositionParterSuccessful,current_winMethods_[DEC],current_winMethods_[DQ],current_winMethods_[KO],current_winMethods_[SUB],age,eventDate.date,fighterId,fighter_nbr,odds,winnerId,count_of_fights,hits_accuracy,takedown_accuracy,accentedHits_accuracy,hitsHead_accuracy,hitsBody_accuracy,hitsLegs_accuracy,accentedHitsPositionDistance_accuracy,accentedHitsPositionClinch_accuracy,accentedHitsPositionParter_accuracy,DEC_percent,DQ_percent,KO_percent,SUB_percent,hits_PM,takedown_PM,accentedHits_PM,hitsHead_PM,hitsBody_PM,hitsLegs_PM,accentedHitsPositionDistance_PM,accentedHitsPositionClinch_PM,accentedHitsPositionParter_PM,knockdowns_PM,protectionPassage_PM,win_streak,loose_streak,fighterName
6324,198.0,0,7.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,1.0,0.0,3.0,2.0,0.0,0.0,3.0,1.0,1.0,1.0,0.0,0.0,0,0,0,1,198.0,False,7.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,1.0,0.0,3.0,2.0,0.0,0.0,3.0,1.0,1.0,1.0,0.0,0.0,0,0,0,1,24.0,2019-12-08 05:00:00,2597,1,3.51,2982.0,0,0.714286,,0.5,0.0,0.666667,,0.333333,1.0,,,,,inf,1.515152,0.0,0.606061,0.0,0.606061,0.0,0.30303,0.30303,0.0,0.0,0.0,0,1,Jacob Kilburn


In [15]:
fighter1_stats_NaNs = (fighter1_stats[num_cols].isna().sum(axis=1) / fighter1_stats.shape[1])
fighter1_stats_NaNs

6324    0.066667
dtype: float64

In [16]:
fighter1_stats = pd.DataFrame(fighter1_stats[fighter1_stats_NaNs < 0.2].iloc[-1]).T.reset_index(drop=True)
fighter1_stats

Unnamed: 0,cumsum_duration,cumsum_winner,cumsum_hitsTotal,cumsum_hitsSuccessful,cumsum_takedownTotal,cumsum_takedownSuccessful,cumsum_submissionAttempts,cumsum_takeovers,cumsum_accentedHitsTotal,cumsum_accentedHitsSuccessful,cumsum_knockdowns,cumsum_protectionPassage,cumsum_hitsHeadTotal,cumsum_hitsHeadSuccessful,cumsum_hitsBodyTotal,cumsum_hitsBodySuccessful,cumsum_hitsLegsTotal,cumsum_hitsLegsSuccessful,cumsum_accentedHitsPositionDistanceTotal,cumsum_accentedHitsPositionDistanceSuccessful,cumsum_accentedHitsPositionClinchTotal,cumsum_accentedHitsPositionClinchSuccessful,cumsum_accentedHitsPositionParterTotal,cumsum_accentedHitsPositionParterSuccessful,cumsum_winMethods_[DEC],cumsum_winMethods_[DQ],cumsum_winMethods_[KO],cumsum_winMethods_[SUB],current_duration,winner,current_hitsTotal,current_hitsSuccessful,current_takedownTotal,current_takedownSuccessful,current_submissionAttempts,current_takeovers,current_accentedHitsTotal,current_accentedHitsSuccessful,current_knockdowns,current_protectionPassage,current_hitsHeadTotal,current_hitsHeadSuccessful,current_hitsBodyTotal,current_hitsBodySuccessful,current_hitsLegsTotal,current_hitsLegsSuccessful,current_accentedHitsPositionDistanceTotal,current_accentedHitsPositionDistanceSuccessful,current_accentedHitsPositionClinchTotal,current_accentedHitsPositionClinchSuccessful,current_accentedHitsPositionParterTotal,current_accentedHitsPositionParterSuccessful,current_winMethods_[DEC],current_winMethods_[DQ],current_winMethods_[KO],current_winMethods_[SUB],age,eventDate.date,fighterId,fighter_nbr,odds,winnerId,count_of_fights,hits_accuracy,takedown_accuracy,accentedHits_accuracy,hitsHead_accuracy,hitsBody_accuracy,hitsLegs_accuracy,accentedHitsPositionDistance_accuracy,accentedHitsPositionClinch_accuracy,accentedHitsPositionParter_accuracy,DEC_percent,DQ_percent,KO_percent,SUB_percent,hits_PM,takedown_PM,accentedHits_PM,hitsHead_PM,hitsBody_PM,hitsLegs_PM,accentedHitsPositionDistance_PM,accentedHitsPositionClinch_PM,accentedHitsPositionParter_PM,knockdowns_PM,protectionPassage_PM,win_streak,loose_streak,fighterName
0,198.0,0,7.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,1.0,0.0,3.0,2.0,0.0,0.0,3.0,1.0,1.0,1.0,0.0,0.0,0,0,0,1,198.0,False,7.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,1.0,0.0,3.0,2.0,0.0,0.0,3.0,1.0,1.0,1.0,0.0,0.0,0,0,0,1,24.0,2019-12-08 05:00:00,2597,1,3.51,2982.0,0,0.714286,,0.5,0.0,0.666667,,0.333333,1.0,,,,,inf,1.515152,0.0,0.606061,0.0,0.606061,0.0,0.30303,0.30303,0.0,0.0,0.0,0,1,Jacob Kilburn


In [17]:
# Create prediction vector

X_df = pd.DataFrame(index=[0])

X_df = X_df.join(fighter1_stats[num_cols].add_prefix("f1_"))
X_df = X_df.join(fighter2_stats[num_cols].add_prefix("f2_"))

X_df.loc[0, ['f1_age', 'f2_age', 'f1_odds', 'f2_odds']] = f1_age, f2_age, f1_odd, f2_odd

X_df[['weightCategory.id', 'city', 'country', 'is_fight_night', 'timezone']] = \
    weightCategory_id, city, country, is_fight_night, timezone 

X_df[f1_static_cols] = f1_static_stats
X_df[f2_static_cols] = f2_static_stats

X_df

Unnamed: 0,f1_cumsum_duration,f1_cumsum_winner,f1_cumsum_hitsTotal,f1_cumsum_hitsSuccessful,f1_cumsum_takedownTotal,f1_cumsum_takedownSuccessful,f1_cumsum_submissionAttempts,f1_cumsum_takeovers,f1_cumsum_accentedHitsTotal,f1_cumsum_accentedHitsSuccessful,f1_cumsum_knockdowns,f1_cumsum_protectionPassage,f1_cumsum_hitsHeadTotal,f1_cumsum_hitsHeadSuccessful,f1_cumsum_hitsBodyTotal,f1_cumsum_hitsBodySuccessful,f1_cumsum_hitsLegsTotal,f1_cumsum_hitsLegsSuccessful,f1_cumsum_accentedHitsPositionDistanceTotal,f1_cumsum_accentedHitsPositionDistanceSuccessful,f1_cumsum_accentedHitsPositionClinchTotal,f1_cumsum_accentedHitsPositionClinchSuccessful,f1_cumsum_accentedHitsPositionParterTotal,f1_cumsum_accentedHitsPositionParterSuccessful,f1_cumsum_winMethods_[DEC],f1_cumsum_winMethods_[DQ],f1_cumsum_winMethods_[KO],f1_cumsum_winMethods_[SUB],f1_hits_accuracy,f1_takedown_accuracy,f1_accentedHits_accuracy,f1_hitsHead_accuracy,f1_hitsBody_accuracy,f1_hitsLegs_accuracy,f1_accentedHitsPositionDistance_accuracy,f1_accentedHitsPositionClinch_accuracy,f1_accentedHitsPositionParter_accuracy,f1_DEC_percent,f1_DQ_percent,f1_KO_percent,f1_SUB_percent,f1_hits_PM,f1_takedown_PM,f1_accentedHits_PM,f1_hitsHead_PM,f1_hitsBody_PM,f1_hitsLegs_PM,f1_accentedHitsPositionDistance_PM,f1_accentedHitsPositionClinch_PM,f1_accentedHitsPositionParter_PM,f1_knockdowns_PM,f1_protectionPassage_PM,f1_win_streak,f1_loose_streak,f2_cumsum_duration,f2_cumsum_winner,f2_cumsum_hitsTotal,f2_cumsum_hitsSuccessful,f2_cumsum_takedownTotal,f2_cumsum_takedownSuccessful,f2_cumsum_submissionAttempts,f2_cumsum_takeovers,f2_cumsum_accentedHitsTotal,f2_cumsum_accentedHitsSuccessful,f2_cumsum_knockdowns,f2_cumsum_protectionPassage,f2_cumsum_hitsHeadTotal,f2_cumsum_hitsHeadSuccessful,f2_cumsum_hitsBodyTotal,f2_cumsum_hitsBodySuccessful,f2_cumsum_hitsLegsTotal,f2_cumsum_hitsLegsSuccessful,f2_cumsum_accentedHitsPositionDistanceTotal,f2_cumsum_accentedHitsPositionDistanceSuccessful,f2_cumsum_accentedHitsPositionClinchTotal,f2_cumsum_accentedHitsPositionClinchSuccessful,f2_cumsum_accentedHitsPositionParterTotal,f2_cumsum_accentedHitsPositionParterSuccessful,f2_cumsum_winMethods_[DEC],f2_cumsum_winMethods_[DQ],f2_cumsum_winMethods_[KO],f2_cumsum_winMethods_[SUB],f2_hits_accuracy,f2_takedown_accuracy,f2_accentedHits_accuracy,f2_hitsHead_accuracy,f2_hitsBody_accuracy,f2_hitsLegs_accuracy,f2_accentedHitsPositionDistance_accuracy,f2_accentedHitsPositionClinch_accuracy,f2_accentedHitsPositionParter_accuracy,f2_DEC_percent,f2_DQ_percent,f2_KO_percent,f2_SUB_percent,f2_hits_PM,f2_takedown_PM,f2_accentedHits_PM,f2_hitsHead_PM,f2_hitsBody_PM,f2_hitsLegs_PM,f2_accentedHitsPositionDistance_PM,f2_accentedHitsPositionClinch_PM,f2_accentedHitsPositionParter_PM,f2_knockdowns_PM,f2_protectionPassage_PM,f2_win_streak,f2_loose_streak,f1_age,f2_age,f1_odds,f2_odds,weightCategory.id,city,country,is_fight_night,timezone,f1_country,f1_city,f1_armSpan,f1_height,f1_legSwing,f1_timezone,f1_weight,f2_country,f2_city,f2_armSpan,f2_height,f2_legSwing,f2_timezone,f2_weight
0,198.0,0,7.0,5.0,0.0,0.0,0.0,0.0,4.0,2.0,0.0,0.0,1.0,0.0,3.0,2.0,0.0,0.0,3.0,1.0,1.0,1.0,0.0,0.0,0,0,0,1,0.714286,,0.5,0.0,0.666667,,0.333333,1.0,,,,,inf,1.515152,0.0,0.606061,0.0,0.606061,0.0,0.30303,0.30303,0.0,0.0,0.0,0,1,300.0,0,14.0,2.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,11.0,0.0,0.0,0.0,0.0,0.0,1,0,0,0,0.142857,,0.0,0.0,,,0.0,,,inf,,,,0.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0,1,25.0,26.0,2.95,1.43,4,Abu Dhabi,United Arab Emirates,True,Asia/Dubai,USA,,180.34,175.26,101.7,America/New_York,65.77,,,177.8,175.26,101.7,,65.77


In [18]:
binary_fighter_cols = []
for prefix in ["f1_", "f2_"]:
    for key in ["isHomeCity", "isHomeCountry", "isHomeTimezone"]:
        binary_fighter_cols.append(prefix + key)


binary_stats = []
binary_cols = ['city', 'country', 'timezone']
for prefix in ["f1_", "f2_"]:
    for col in binary_cols:
        binary_stats.append(int(X_df.loc[0, prefix+col] == X_df.loc[0, col]))

X_df[binary_fighter_cols] = binary_stats

In [19]:
X_df[f1_static_cols+f2_static_cols] = X_df[f1_static_cols+f2_static_cols].fillna('unknown')

# Difference

In [20]:
generated_features.keys()

dict_keys(['fighter1_stats', 'fighter2_stats', 'difference_cols'])

In [21]:
fighter1_stats = generated_features['fighter1_stats']
fighter2_stats = generated_features['fighter2_stats']
difference_cols = generated_features['difference_cols']

for col in fighter1_stats:
    new_col_name = col[3:]+'_difference'
    X_df[new_col_name] = X_df['f1_'+col[3:]].astype(float) - X_df['f2_'+col[3:]].astype(float)
X_df[difference_cols]

Unnamed: 0,cumsum_duration_difference,cumsum_winner_difference,cumsum_hitsTotal_difference,cumsum_hitsSuccessful_difference,cumsum_takedownTotal_difference,cumsum_takedownSuccessful_difference,cumsum_submissionAttempts_difference,cumsum_takeovers_difference,cumsum_accentedHitsTotal_difference,cumsum_accentedHitsSuccessful_difference,cumsum_knockdowns_difference,cumsum_protectionPassage_difference,cumsum_hitsHeadTotal_difference,cumsum_hitsHeadSuccessful_difference,cumsum_hitsBodyTotal_difference,cumsum_hitsBodySuccessful_difference,cumsum_hitsLegsTotal_difference,cumsum_hitsLegsSuccessful_difference,cumsum_accentedHitsPositionDistanceTotal_difference,cumsum_accentedHitsPositionDistanceSuccessful_difference,cumsum_accentedHitsPositionClinchTotal_difference,cumsum_accentedHitsPositionClinchSuccessful_difference,cumsum_accentedHitsPositionParterTotal_difference,cumsum_accentedHitsPositionParterSuccessful_difference,cumsum_winMethods_[DEC]_difference,cumsum_winMethods_[DQ]_difference,cumsum_winMethods_[KO]_difference,cumsum_winMethods_[SUB]_difference,hits_accuracy_difference,takedown_accuracy_difference,accentedHits_accuracy_difference,hitsHead_accuracy_difference,hitsBody_accuracy_difference,hitsLegs_accuracy_difference,accentedHitsPositionDistance_accuracy_difference,accentedHitsPositionClinch_accuracy_difference,accentedHitsPositionParter_accuracy_difference,DEC_percent_difference,DQ_percent_difference,KO_percent_difference,SUB_percent_difference,hits_PM_difference,takedown_PM_difference,accentedHits_PM_difference,hitsHead_PM_difference,hitsBody_PM_difference,hitsLegs_PM_difference,accentedHitsPositionDistance_PM_difference,accentedHitsPositionClinch_PM_difference,accentedHitsPositionParter_PM_difference,knockdowns_PM_difference,protectionPassage_PM_difference,win_streak_difference,loose_streak_difference,odds_difference,age_difference,height_difference,armSpan_difference,legSwing_difference,isHomeCity_difference,isHomeCountry_difference,isHomeTimezone_difference
0,-102.0,0.0,-7.0,3.0,0.0,0.0,0.0,0.0,-7.0,2.0,0.0,0.0,-10.0,0.0,3.0,2.0,0.0,0.0,-8.0,1.0,1.0,1.0,0.0,0.0,-1.0,0.0,0.0,1.0,0.571429,,0.5,0.0,,,0.333333,,,,,,,1.115152,0.0,0.606061,0.0,0.606061,0.0,0.30303,0.30303,0.0,0.0,0.0,0.0,0.0,1.52,-1.0,0.0,2.54,0.0,0.0,0.0,0.0


In [22]:
clf1 = CatBoostClassifier()
clf1.load_model('./data_models/cat')
model_cols = clf1.feature_names_
y_proba1 = clf1.predict_proba(X_df[model_cols])[:,1]
y_proba1

CatBoostError: catboost/libs/model/model_import_interface.h:19: Model file doesn't exist: ./data_models/catboost_v1_0_13.04.2021_1.cat

In [15]:
X_df_reversed = X_df.copy()

reversed_cols = []

for col in X_df.columns:
    if 'f2' in col:
        new_col_name = col.replace('f2', 'f1')

    elif 'f1' in col:
        new_col_name = col.replace('f1', 'f2')
    
    else: new_col_name = col
        
    reversed_cols.append(new_col_name)

X_df_reversed.columns = reversed_cols
fighter1_stat_cols = generated_features['fighter1_stats']
for col in fighter1_stat_cols:
    new_col_name = col[3:] + '_difference'
    X_df_reversed[new_col_name] = X_df_reversed['f1_' + col[3:]].astype(float) - X_df_reversed['f2_' + col[3:]].astype(float)

clf2 = CatBoostClassifier()
clf2.load_model('./data_models/catboost_v1_0_13.04.2021_2.cat')
y_proba2 = clf2.predict_proba(X_df_reversed[clf2.feature_names_])[:, 0]
y_proba2

array([0.48211823])

In [216]:
X_df_reversed = X_df.copy()
X_df_reversed.columns = reversed_cols
X_df_reversed

Unnamed: 0,f2_cumsum_duration,f2_cumsum_winner,f2_cumsum_hitsTotal,f2_cumsum_hitsSuccessful,f2_cumsum_takedownTotal,f2_cumsum_takedownSuccessful,f2_cumsum_submissionAttempts,f2_cumsum_takeovers,f2_cumsum_accentedHitsTotal,f2_cumsum_accentedHitsSuccessful,f2_cumsum_knockdowns,f2_cumsum_protectionPassage,f2_cumsum_hitsHeadTotal,f2_cumsum_hitsHeadSuccessful,f2_cumsum_hitsBodyTotal,f2_cumsum_hitsBodySuccessful,f2_cumsum_hitsLegsTotal,f2_cumsum_hitsLegsSuccessful,f2_cumsum_accentedHitsPositionDistanceTotal,f2_cumsum_accentedHitsPositionDistanceSuccessful,f2_cumsum_accentedHitsPositionClinchTotal,f2_cumsum_accentedHitsPositionClinchSuccessful,f2_cumsum_accentedHitsPositionParterTotal,f2_cumsum_accentedHitsPositionParterSuccessful,f2_cumsum_winMethods_[DEC],f2_cumsum_winMethods_[DQ],f2_cumsum_winMethods_[KO],f2_cumsum_winMethods_[SUB],f2_hits_accuracy,f2_takedown_accuracy,f2_accentedHits_accuracy,f2_hitsHead_accuracy,f2_hitsBody_accuracy,f2_hitsLegs_accuracy,f2_accentedHitsPositionDistance_accuracy,f2_accentedHitsPositionClinch_accuracy,f2_accentedHitsPositionParter_accuracy,f2_DEC_percent,f2_DQ_percent,f2_KO_percent,f2_SUB_percent,f2_hits_PM,f2_takedown_PM,f2_accentedHits_PM,f2_hitsHead_PM,f2_hitsBody_PM,f2_hitsLegs_PM,f2_accentedHitsPositionDistance_PM,f2_accentedHitsPositionClinch_PM,f2_accentedHitsPositionParter_PM,f2_knockdowns_PM,f2_protectionPassage_PM,f2_win_streak,f2_loose_streak,f1_cumsum_duration,f1_cumsum_winner,f1_cumsum_hitsTotal,f1_cumsum_hitsSuccessful,f1_cumsum_takedownTotal,f1_cumsum_takedownSuccessful,f1_cumsum_submissionAttempts,f1_cumsum_takeovers,f1_cumsum_accentedHitsTotal,f1_cumsum_accentedHitsSuccessful,f1_cumsum_knockdowns,f1_cumsum_protectionPassage,f1_cumsum_hitsHeadTotal,f1_cumsum_hitsHeadSuccessful,f1_cumsum_hitsBodyTotal,f1_cumsum_hitsBodySuccessful,f1_cumsum_hitsLegsTotal,f1_cumsum_hitsLegsSuccessful,f1_cumsum_accentedHitsPositionDistanceTotal,f1_cumsum_accentedHitsPositionDistanceSuccessful,f1_cumsum_accentedHitsPositionClinchTotal,f1_cumsum_accentedHitsPositionClinchSuccessful,f1_cumsum_accentedHitsPositionParterTotal,f1_cumsum_accentedHitsPositionParterSuccessful,f1_cumsum_winMethods_[DEC],f1_cumsum_winMethods_[DQ],f1_cumsum_winMethods_[KO],f1_cumsum_winMethods_[SUB],f1_hits_accuracy,f1_takedown_accuracy,f1_accentedHits_accuracy,f1_hitsHead_accuracy,f1_hitsBody_accuracy,f1_hitsLegs_accuracy,f1_accentedHitsPositionDistance_accuracy,f1_accentedHitsPositionClinch_accuracy,f1_accentedHitsPositionParter_accuracy,f1_DEC_percent,f1_DQ_percent,f1_KO_percent,f1_SUB_percent,f1_hits_PM,f1_takedown_PM,f1_accentedHits_PM,f1_hitsHead_PM,f1_hitsBody_PM,f1_hitsLegs_PM,f1_accentedHitsPositionDistance_PM,f1_accentedHitsPositionClinch_PM,f1_accentedHitsPositionParter_PM,f1_knockdowns_PM,f1_protectionPassage_PM,f1_win_streak,f1_loose_streak,f2_age,f1_age,f2_odds,f1_odds,weightCategory.id,city,country,is_fight_night,timezone,f2_country,f2_city,f2_armSpan,f2_height,f2_legSwing,f2_timezone,f1_country,f1_city,f1_armSpan,f1_height,f1_legSwing,f1_timezone,f2_isHomeCity,f2_isHomeCountry,f2_isHomeTimezone,f1_isHomeCity,f1_isHomeCountry,f1_isHomeTimezone
0,2027.0,5,423.0,254.0,10.0,6.0,3.0,0.0,313.0,160.0,0.0,4.0,289.0,139.0,12.0,10.0,12.0,11.0,230.0,96.0,21.0,13.0,62.0,51.0,5,0,0,2,0.600473,0.6,0.511182,0.480969,0.833333,0.916667,0.417391,0.619048,0.822581,1.0,0.0,0.0,0.4,7.5185,0.177602,4.736063,4.114455,0.296004,0.325604,2.841638,0.384805,1.50962,0.0,0.118402,3,0,2080.0,8,320.0,259.0,11.0,7.0,2.0,2.0,155.0,106.0,0.0,8.0,117.0,71.0,18.0,16.0,20.0,19.0,93.0,53.0,12.0,10.0,50.0,43.0,5,0,3,2,0.809375,0.636364,0.683871,0.606838,0.888889,0.95,0.569892,0.833333,0.86,0.625,0.0,0.375,0.25,7.471154,0.201923,3.057692,2.048077,0.461538,0.548077,1.528846,0.288462,1.240385,0.0,0.230769,4,0,27.0,28.0,1.35,3.0,7,Las Vegas,USA,True,America/Denver,Italy,Mezzocorona,187.96,182.88,104.14,Europe/Rome,USA,unknown,205.74,190.5,101.6,America/New_York,0,0,0,0,1,0


In [217]:
clf2 = CatBoostClassifier()
clf2.load_model('../../models/Catboost_v1_0/catboost_v1_0_06.04.2021_2.cat')
model_cols = clf2.feature_names_
y_proba2 = clf2.predict_proba(X_df_reversed[model_cols])[:,0]
y_proba2

array([0.37544987])