In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import LabelEncoder
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns',100)

info_lutador = pd.read_csv('fighter_details.csv')
ufc = pd.read_csv('total_fight_data.csv', sep=';')

In [2]:
columns = ['R_SIG_STR.', 'B_SIG_STR.', 'R_TOTAL_STR.', 'B_TOTAL_STR.',
       'R_TD', 'B_TD', 'R_HEAD', 'B_HEAD', 'R_BODY','B_BODY', 'R_LEG', 'B_LEG', 
        'R_DISTANCE', 'B_DISTANCE', 'R_CLINCH','B_CLINCH', 'R_GROUND', 'B_GROUND']

attemp = '_att'
landed = '_landed'

for column in columns:
    ufc[column+attemp] = ufc[column].apply(lambda X:int(X.split('of')[1]))
    ufc[column+landed] = ufc[column].apply(lambda X:int(X.split('of')[0]))

ufc.drop(columns, axis=1, inplace=True)
ufc['Winner'].fillna('Draw', inplace=True)

In [3]:
pct_columns = ['R_SIG_STR_pct','B_SIG_STR_pct', 'R_TD_pct', 'B_TD_pct']

for column in pct_columns:
    ufc[column] = ufc[column].apply(lambda X:float(X.replace('%', ''))/100)

In [4]:
def Division(X):
    for division in weight_classes:
        if division in X:
            return division
    if X == 'Catch Weight Bout' or 'Catchweight Bout':
        return 'Catch Weight'
    else:
        return 'Open Weight'

In [5]:
weight_classes = ['Women\'s Strawweight', 'Women\'s Bantamweight', 
                  'Women\'s Featherweight', 'Women\'s Flyweight', 'Lightweight', 
                  'Welterweight', 'Middleweight','Light Heavyweight', 
                  'Heavyweight', 'Featherweight','Bantamweight', 'Flyweight', 'Open Weight']

ufc['Weight_class'] = ufc['Fight_type'].apply(Division)

In [6]:
ufc['Winner'].fillna('Draw', inplace=True)

In [7]:
def get_renamed_winner(row):
    if row['R_fighter'] == row['Winner']:
        return 'Red'
    if row['B_fighter'] == row['Winner']:
        return 'Blue'
    elif row['Winner'] == 'Draw':
        return 'Draw'
    
ufc['Winner'] = ufc[['R_fighter', 'B_fighter', 'Winner']].apply(get_renamed_winner, axis=1)

In [8]:
def inch_to_cm(X):
    if X is np.NaN:
        return X
    elif len(X.split("'")) == 2:
        feet = float(X.split("'")[0])
        inches = int(X.split("'")[1].replace(' ','').replace('"', ''))
        return (feet*30.48) + (inches * 2.54)
    else:
        return float(X.replace('"',''))*2.54

In [9]:
info_lutador['Height'] = info_lutador['Height'].apply(inch_to_cm)
info_lutador['Reach'] = info_lutador['Reach'].apply(inch_to_cm)

In [10]:
info_lutador['Weight'] = info_lutador['Weight'].apply(lambda X: float(X.replace(' lbs', '')) if X is not np.NaN else X)

In [11]:
ufc_final = ufc.merge(info_lutador, left_on='R_fighter', right_on='fighter_name', how='left')

In [12]:
ufc_final = ufc_final.drop('fighter_name', axis=1)

In [13]:
ufc_final.rename(columns={'Height':'R_Height',
                          'Weight':'R_Weight',
                          'Reach':'R_Reach',
                          'Stance':'R_Stance',
                          'DOB':'R_DOB'}, inplace=True)

In [14]:
ufc_final = ufc_final.merge(info_lutador, left_on='B_fighter', right_on='fighter_name', how='left')

In [15]:
ufc_final = ufc_final.drop('fighter_name', axis=1)

In [16]:
ufc_final.rename(columns={'Height':'B_Height',
                          'Weight':'B_Weight',
                          'Reach':'B_Reach',
                          'Stance':'B_Stance',
                          'DOB':'B_DOB'}, inplace=True)

In [17]:
ufc_final['R_DOB'] = pd.to_datetime(ufc_final['R_DOB'])
ufc_final['B_DOB'] = pd.to_datetime(ufc_final['B_DOB'])
ufc_final['date'] = pd.to_datetime(ufc_final['date'])

In [18]:
ufc_final['R_year'] = ufc_final['R_DOB'].apply(lambda X: X.year)
ufc_final['B_year'] = ufc_final['B_DOB']. apply(lambda X: X.year)
ufc_final['date_year'] = ufc_final['date'].apply(lambda X: X.year)


def get_age(row):
    B_age = (row['date_year'] - row['B_year'])
    R_age = (row['date_year'] - row['R_year'])
    if np.isnan(B_age) != True:
        B_age = B_age
    if np.isnan(R_age) != True:
        R_age = R_age
    return pd.Series([B_age, R_age], index = ['B_age', 'R_age'])

In [19]:
ufc_final[['B_age', 'R_age']] = ufc_final[['date_year', 'R_year', 'B_year']].apply(get_age, axis=1)

In [20]:
ufc_final.drop(['R_DOB', 'B_DOB', 'B_year', 'R_year'],axis=1, inplace=True)

In [21]:
ufc_final['R_age'] = ufc_final['R_age'].fillna(ufc_final['R_age'].median())
ufc_final['B_age'] = ufc_final['B_age'].fillna(ufc_final['B_age'].median())

In [22]:
ufc_final['R_Height'] = ufc_final['R_Height'].fillna(ufc_final['R_Height'].mean())
ufc_final['B_Height'] = ufc_final['B_Height'].fillna(ufc_final['B_Height'].mean())

In [23]:
Fighter = pd.concat([ufc_final['R_fighter'], ufc_final['B_fighter']], ignore_index = True)

In [24]:
from statistics import mode
ufc_final['B_Stance'] = ufc_final['B_Stance'].fillna(ufc_final['B_Stance'].mode()[0])
ufc_final['R_Stance'] = ufc_final['R_Stance'].fillna(ufc_final['R_Stance'].mode()[0])

In [25]:
enc = LabelEncoder()

In [26]:
data_enc1 = ufc_final['Weight_class']
data_enc1 = enc.fit_transform(data_enc1)

data_enc2 = ufc_final['R_Stance']
data_enc2 = enc.fit_transform(data_enc2)

data_enc3 = ufc_final['B_Stance']
data_enc3 = enc.fit_transform(data_enc3)

data_enc2 = pd.DataFrame(data_enc2, columns=['R_Stance'])
data_enc3 = pd.DataFrame(data_enc3, columns=['B_Stance'])

ufc_final[['R_Stance']] = data_enc2[['R_Stance']]
ufc_final[['B_Stance']] = data_enc3[['B_Stance']]

R_S = pd.get_dummies(ufc_final['R_Stance'])
B_S = pd.get_dummies(ufc_final['B_Stance'])

ufc_final = pd.concat([ufc_final,pd.get_dummies(ufc_final['win_by'], prefix='win_by')], axis=1)
ufc_final.drop(['win_by'], axis=1, inplace=True)

ufc_final['Winner_num'] = ufc_final.Winner.map({'Red':0, 'Blue':1, 'Draw':2})

In [27]:
B_S.drop([0,2], axis=1, inplace=True)
R_S.drop([0,2], axis=1, inplace=True)

In [28]:
B_S.columns = ['B_Southpaw','B_Orthodox','B_Switch']
R_S.columns = ['R_Southpaw','R_Orthodox','R_Switch']

In [29]:
ufc_final['B_Southpaw'] = B_S['B_Southpaw']
ufc_final['B_Orthodox']= B_S['B_Orthodox']
ufc_final['B_Switch'] = B_S['B_Switch']
ufc_final['R_Southpaw'] = R_S['R_Southpaw']
ufc_final['R_Orthodox']= R_S['R_Orthodox']
ufc_final['R_Switch'] = R_S['R_Switch']

ufc_final.drop(['R_Stance','B_Stance'], axis=1, inplace=True)

In [30]:
ufc_final = ufc_final[ufc_final['Winner_num']!=2]

In [31]:
ufc_final.drop(['date', 'location', 'Referee', 'last_round', 'last_round_time', 'Format', 'Winner','date_year','Fight_type','Weight_class'], axis=1, inplace=True)

In [32]:
df = ufc_final.copy()

In [33]:
df['R_SUB._ATT'] = df['R_SUB_ATT']
df['R_SIG_STR._pct'] = df['R_SIG_STR_pct']
df['B_SUB._ATT'] = df['B_SUB_ATT']
df['B_SIG_STR._pct'] = df['B_SIG_STR_pct']
df.drop(['B_SUB_ATT','B_SIG_STR_pct', 'R_SUB_ATT', 'R_SIG_STR_pct'], axis=1, inplace=True)

In [34]:
ufc_final.drop(['R_SIG_STR._att',
                'B_SIG_STR._att',
                'R_SIG_STR._landed',
                'B_SIG_STR._landed',
                'win_by_Could Not Continue',
                'win_by_DQ',
                'win_by_Decision - Majority',
                'win_by_Decision - Split',
                'win_by_Decision - Unanimous',
                'win_by_KO/TKO',
                'win_by_Other',
                'win_by_Overturned',
                'win_by_Submission',
                "win_by_TKO - Doctor's Stoppage"
               ], axis=1, inplace=True)

In [35]:
ufc_final['R_TOTAL_STR._pct'] = ufc_final['R_TOTAL_STR._landed']/ufc_final['R_TOTAL_STR._att']
ufc_final['R_BODY_pct'] = ufc_final['R_BODY_landed']/ufc_final['R_BODY_att']
ufc_final['R_CLINCH_pct'] = ufc_final['R_CLINCH_landed']/ufc_final['R_CLINCH_att']
ufc_final['R_DISTANCE_pct'] = ufc_final['R_DISTANCE_landed']/ufc_final['R_DISTANCE_att']
ufc_final['R_GROUND_pct'] = ufc_final['R_GROUND_landed']/ufc_final['R_GROUND_att']
ufc_final['R_HEAD_pct'] = ufc_final['R_HEAD_landed']/ufc_final['R_HEAD_att']
ufc_final['R_LEG_pct'] = ufc_final['R_LEG_landed']/ufc_final['R_LEG_att']
ufc_final['R_TD_pct'] = ufc_final['R_TD_landed']/ufc_final['R_TD_att']
ufc_final['R_SUB._pct'] = ufc_final['R_SUB_ATT']
ufc_final['R_SIG_STR._pct'] = ufc_final['R_SIG_STR_pct']

In [36]:
ufc_final['B_TOTAL_STR._pct'] = ufc_final['B_TOTAL_STR._landed']/ufc_final['B_TOTAL_STR._att']
ufc_final['B_BODY_pct'] = ufc_final['B_BODY_landed']/ufc_final['B_BODY_att']
ufc_final['B_CLINCH_pct'] = ufc_final['B_CLINCH_landed']/ufc_final['B_CLINCH_att']
ufc_final['B_DISTANCE_pct'] = ufc_final['B_DISTANCE_landed']/ufc_final['B_DISTANCE_att']
ufc_final['B_GROUND_pct'] = ufc_final['B_GROUND_landed']/ufc_final['B_GROUND_att']
ufc_final['B_HEAD_pct'] = ufc_final['B_HEAD_landed']/ufc_final['B_HEAD_att']
ufc_final['B_LEG_pct'] = ufc_final['B_LEG_landed']/ufc_final['B_LEG_att']
ufc_final['B_TD_pct'] = ufc_final['B_TD_landed']/ufc_final['B_TD_att']
ufc_final['B_SUB._pct'] = ufc_final['B_SUB_ATT']
ufc_final['B_SIG_STR._pct'] = ufc_final['B_SIG_STR_pct']

In [37]:
ufc_final.drop(['R_TOTAL_STR._landed',
                'R_TOTAL_STR._att',
                'B_TOTAL_STR._landed',
                'B_TOTAL_STR._att',
                'R_BODY_att',
                'R_BODY_landed',
                'R_CLINCH_att',
               'R_CLINCH_landed',
               'R_DISTANCE_att',
               'R_DISTANCE_landed',
               'R_GROUND_att',
               'R_GROUND_landed',
               'R_HEAD_att',
               'R_HEAD_landed',
               'R_LEG_att',
               'R_LEG_landed',
               'R_BODY_att',
               'R_BODY_landed',
               'R_TD_att',
               'R_TD_landed',
               'B_BODY_att',
                'B_BODY_landed',
                'B_CLINCH_att',
               'B_CLINCH_landed',
               'B_DISTANCE_att',
               'B_DISTANCE_landed',
               'B_GROUND_att',
               'B_GROUND_landed',
               'B_HEAD_att',
               'B_HEAD_landed',
               'B_LEG_att',
               'B_LEG_landed',
               'B_BODY_att',
               'B_BODY_landed',
               'B_TD_att',
               'B_TD_landed',
               'B_SUB_ATT',
               'R_SUB_ATT',
               'R_SIG_STR_pct',
               'B_SIG_STR_pct'], axis=1, inplace=True)

## modelo aqui, treinar em ufc final

In [38]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import minmax_scale
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import roc_auc_score
from xgboost import XGBClassifier


import warnings
warnings.filterwarnings("ignore")

In [39]:
X = ufc_final.drop(['Winner_num'], axis=1)
X.drop(['R_fighter', 'B_fighter'],axis=1, inplace=True)
Y = ufc_final['Winner_num']

In [40]:
cols = X.columns

In [41]:
X.fillna(0, inplace=True)

In [42]:
X_train,X_test,y_train,y_test = train_test_split(X,Y, test_size = 0.3, random_state = 40)

In [43]:
from sklearn.model_selection import cross_val_predict
from sklearn.metrics import make_scorer

In [44]:
randmodel=XGBClassifier()

randparams={'subsample':[0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9],
            'reg_alpha':[10e-5, 10e-3, 0.01, 0.1, 1, 10, 100],
            'min_child_weight':[2,3,4,5,6,7,8,9,10], 
            'max_depth':[6,7,8,9,10,15,20,25,30,35,40,45,50,55,100], 
            'gamma':[0.01,0.1,0.2,0.3,0.4,0.5,0.6,0.7,0.8,0.9,2], 
            'eta':[0.01,0.03,0.05,0.07,0.1,0.3,0.5,0.7], 
            'colsample_bytree':[0.4,0.5,0.6,0.7,0.8,0.9]}

clf_model = RandomizedSearchCV(randmodel, param_distributions=randparams, n_iter=2000, cv=7, scoring='roc_auc', n_jobs=5, verbose=3)

In [45]:
search = clf_model.fit(X_train, y_train)
print(search.best_params_, '\n', '\n', search.best_score_)

In [46]:
model = XGBClassifier(subsample='0.9',reg_alpha='0.1',min_child_weight='5',max_depth='7',gamma='0.2',eta='0.3',colsample_bytree='0.8')
model.fit(X_train, y_train)
pred = model.predict(X_test)
accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred, average = 'macro')
recall = recall_score(y_test, pred, average = 'macro')
print('accuracy= %.2f%%, precision= %.2f%%, recall= %.2f%%' % (accuracy*100.00, precision*100, recall*100))

accuracy= 86.45%, precision= 84.01%, recall= 83.57%


In [47]:
roc_auc_score(y_test, pred)

0.8356695436833097

In [48]:
model_final = XGBClassifier(subsample='0.9',reg_alpha='0.1',min_child_weight='5',max_depth='7',gamma='0.2',eta='0.3',colsample_bytree='0.8')
model_final.fit(X, Y)

XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree='0.8', eta='0.3',
              gamma='0.2', learning_rate=0.1, max_delta_step=0, max_depth='7',
              min_child_weight='5', missing=None, n_estimators=100, n_jobs=1,
              nthread=None, objective='binary:logistic', random_state=0,
              reg_alpha='0.1', reg_lambda=1, scale_pos_weight=1, seed=None,
              silent=None, subsample='0.9', verbosity=1)

In [49]:
def get_pct(att, landed):
    pct = (landed / att) * 100
    return pct

In [50]:
def average_pct(r_pct, b_pct):
    average_pct = (r_pct.mean() + b_pct.mean())/2
    return average_pct

In [51]:
fighters = []
for i in ufc_final['B_fighter']:
    if i not in fighters:
        fighters.append(i)
for j in ufc_final['R_fighter']:
    if j not in fighters:
        fighters.append(j)

In [52]:
ufc_final.fillna(0, inplace=True)

In [53]:
def get_fighter_info(name):
    
    name_r = df[df['R_fighter']== name]
    name_b = df[df['B_fighter']== name]
    
    name_r = name_r[name_r.columns.drop(list(name_r.filter(regex='B_')))]
    name_b = name_b[name_b.columns.drop(list(name_b.filter(regex='R_')))]
        
    name_r = name_r.rename(columns={'R_fighter':'Fighter'})
    name_b = name_b.rename(columns={'B_fighter':'Fighter'})
    
    if len(df[df['R_fighter']==name])==1 and len(df[df['B_fighter']==name])==0:
        
        SIG_STR_pct = get_pct(name_r['R_SIG_STR._att'], name_r['R_SIG_STR._landed']).iloc[0]
        TOTAL_STR_pct = get_pct(name_r['R_TOTAL_STR._att'], name_r['R_TOTAL_STR._landed']).iloc[0]
        TD_pct = name_r['R_TD_pct'].iloc[0]
        KD_TOTAL = name_r['R_KD'].iloc[0]
        BODY_pct = get_pct(name_r['R_BODY_att'], name_r['R_BODY_landed']).iloc[0]
        CLINCH_pct = get_pct(name_r['R_CLINCH_att'], name_r['R_CLINCH_landed']).iloc[0]
        DISTANCE_pct = get_pct(name_r['R_DISTANCE_att'], name_r['R_DISTANCE_landed']).iloc[0]
        GROUND_pct = get_pct(name_r['R_GROUND_att'], name_r['R_GROUND_landed']).iloc[0]
        HEAD_pct = get_pct(name_r['R_HEAD_att'], name_r['R_HEAD_landed']).iloc[0]
        LEG_pct = get_pct(name_r['R_LEG_att'], name_r['R_LEG_landed']).iloc[0]
        TOTAL_PASS = name_r['R_PASS'].sum()
        TOTAL_REV = name_r['R_REV'].sum()
        TOTAL_SUB = name_r['R_SUB._ATT'].sum()
                
    elif len(df[df['B_fighter']==name])==1 and len(df[df['R_fighter']==name])==0:
        
        SIG_STR_pct = get_pct(name_b['B_SIG_STR._att'], name_b['B_SIG_STR._landed']).iloc[0]
        TOTAL_STR_pct = get_pct(name_b['B_TOTAL_STR._att'],name_b['B_TOTAL_STR._landed']).iloc[0]
        TD_pct = name_b['B_TD_pct'].iloc[0]
        KD_TOTAL = name_b['B_KD'].sum()
        BODY_pct = get_pct(name_b['B_BODY_att'], name_b['B_BODY_landed']).iloc[0]
        CLINCH_pct = get_pct(name_b['B_CLINCH_att'], name_b['B_CLINCH_landed']).iloc[0]
        DISTANCE_pct = get_pct(name_b['B_DISTANCE_att'], name_b['B_DISTANCE_landed']).iloc[0]
        GROUND_pct = get_pct(name_b['B_GROUND_att'], name_b['B_GROUND_landed']).iloc[0]
        HEAD_pct = get_pct(name_b['B_HEAD_att'], name_b['B_HEAD_landed']).iloc[0]
        LEG_pct = get_pct(name_b['B_LEG_att'], name_b['B_LEG_landed']).iloc[0]
        TOTAL_PASS = name_b['B_PASS'].sum()
        TOTAL_REV = name_b['B_REV'].sum()
        TOTAL_SUB = name_b['B_SUB._ATT'].sum()   
        
    else:
        R_SIG_STR_pct = get_pct(name_r['R_SIG_STR._att'], name_r['R_SIG_STR._landed'])
        B_SIG_STR_pct = get_pct(name_b['B_SIG_STR._att'], name_b['B_SIG_STR._landed'])
    
        R_TOTAL_STR_pct = get_pct(name_r['R_TOTAL_STR._att'],name_r['R_TOTAL_STR._landed'])
        B_TOTAL_STR_pct = get_pct(name_b['B_TOTAL_STR._att'],name_b['B_TOTAL_STR._landed'])
    
        R_BODY_pct = get_pct(name_r['R_BODY_att'], name_r['R_BODY_landed'])
        B_BODY_pct = get_pct(name_b['B_BODY_att'], name_b['B_BODY_landed'])
    
        R_CLINCH_pct = get_pct(name_r['R_CLINCH_att'], name_r['R_CLINCH_landed'])
        B_CLINCH_pct = get_pct(name_b['B_CLINCH_att'], name_b['B_CLINCH_landed'])
    
        R_DISTANCE_pct = get_pct(name_r['R_DISTANCE_att'], name_r['R_DISTANCE_landed'])
        B_DISTANCE_pct = get_pct(name_b['B_DISTANCE_att'], name_b['B_DISTANCE_landed'])

        R_GROUND_pct = get_pct(name_r['R_GROUND_att'], name_r['R_GROUND_landed'])
        B_GROUND_pct = get_pct(name_b['B_GROUND_att'], name_b['B_GROUND_landed'])

        R_HEAD_pct = get_pct(name_r['R_HEAD_att'], name_r['R_HEAD_landed'])
        B_HEAD_pct = get_pct(name_b['B_HEAD_att'], name_b['B_HEAD_landed'])
    
        R_LEG_pct = get_pct(name_r['R_LEG_att'], name_r['R_LEG_landed'])
        B_LEG_pct = get_pct(name_b['B_LEG_att'], name_b['B_LEG_landed'])
          
        R_TD_pct = name_r['R_TD_pct']
        B_TD_pct = name_b['B_TD_pct']
    
        R_KD = name_r['R_KD'].sum()
        B_KD = name_b['B_KD'].sum()
    
        R_rev = name_r['R_REV'].sum()
        B_rev = name_b['B_REV'].sum()
    
        R_pass = name_r['R_PASS'].sum()
        B_pass = name_b['B_PASS'].sum()
    
        R_SUB = name_r['R_SUB._ATT'].sum()
        B_SUB = name_b['B_SUB._ATT'].sum()
        
        SIG_STR_pct = average_pct(R_SIG_STR_pct,B_SIG_STR_pct)
        TOTAL_STR_pct = average_pct(R_TOTAL_STR_pct, B_TOTAL_STR_pct)
        TD_pct = average_pct(R_TD_pct,B_TD_pct)
        KD_TOTAL = R_KD + B_KD
        BODY_pct = average_pct(R_BODY_pct, B_BODY_pct)
        CLINCH_pct = average_pct(R_CLINCH_pct, B_CLINCH_pct)
        DISTANCE_pct = average_pct(R_DISTANCE_pct, B_DISTANCE_pct)
        GROUND_pct = average_pct(R_GROUND_pct, B_GROUND_pct)
        HEAD_pct = average_pct(R_HEAD_pct, B_HEAD_pct)
        LEG_pct = average_pct(R_LEG_pct, B_LEG_pct)
        TOTAL_PASS = R_pass + B_pass
        TOTAL_REV = R_rev + B_rev
        TOTAL_SUB = R_SUB + B_SUB
    
    Height = 0
    Weight = 0
    Reach = 0
    Orthodox = 0
    Southpaw = 0
    Switch = 0
    
    Age_r = name_r['R_age'].max()
    Age_b = name_b['B_age'].max()
    Age = max(Age_r,Age_b)

    
    for i in df['R_fighter']:
        if i == name:
            Height = name_r['R_Height'].iloc[0]
            Weight = name_r['R_Weight'].iloc[0]
            Reach = name_r['R_Reach'].iloc[0]
            Orthodox = name_r['R_Orthodox'].iloc[0]
            Southpaw = name_r['R_Southpaw'].iloc[0]
            Switch = name_r['R_Switch'].iloc[0]
            
    for j in df['B_fighter']:
        if j == name:
            Height = name_b['B_Height'].iloc[0]
            Weight = name_b['B_Weight'].iloc[0]
            Reach = name_b['B_Reach'].iloc[0]
            Orthodox = name_b['B_Orthodox'].iloc[0]
            Southpaw = name_b['B_Southpaw'].iloc[0]
            Switch = name_b['B_Switch'].iloc[0]
    
    F_info = {'SIG_STR_pct':SIG_STR_pct,'TOTAL_STR_pct':TOTAL_STR_pct,'TOTAL_SUB':TOTAL_SUB,'TD_pct':TD_pct,'KD_TOTAL':KD_TOTAL,'BODY_pct':BODY_pct,'CLINCH_pct':CLINCH_pct,'DISTANCE_pct':DISTANCE_pct, 'GROUND_pct':GROUND_pct,'HEAD_pct':HEAD_pct,'LEG_pct':LEG_pct,'TOTAL_REV':TOTAL_REV,'TOTAL_PASS':TOTAL_PASS,'Age':Age,'Height':Height,'Weight':Weight,'Reach':Reach, 'Orthodox':Orthodox, 'Southpaw':Southpaw, 'Switch':Switch}
    
    return F_info

In [54]:
lista = []
for i in fighters:
    lista.append(get_fighter_info(i))
    
df_fighters = pd.DataFrame(data=lista, index=fighters)

In [55]:
df_fighters.fillna(0,inplace=True)

In [56]:
R_SIG_STR_pct = []
R_TOTAL_STR_pct = []
R_TD_pct = []
R_KD_TOTAL = []
R_BODY_pct = []
R_CLINCH_pct = []
R_DISTANCE_pct = []
R_GROUND_pct = []
R_HEAD_pct = []
R_LEG_pct = []
R_TOTAL_REV = []
R_TOTAL_PASS = []
R_SUB = []
R_Height = []
R_Weight = []
R_Reach = []
R_Orthodox = []
R_Southpaw = []
R_Switch = []

for i in range(len(df)):
    R_SIG_STR_pct.append(df_fighters['SIG_STR_pct'].loc[df['R_fighter'].iloc[i]])
    R_TOTAL_STR_pct.append(df_fighters['TOTAL_STR_pct'].loc[df['R_fighter'].iloc[i]])
    R_TD_pct.append(df_fighters['TD_pct'].loc[df['R_fighter'].iloc[i]])
    R_KD_TOTAL.append(df_fighters['KD_TOTAL'].loc[df['R_fighter'].iloc[i]])
    R_BODY_pct.append(df_fighters['BODY_pct'].loc[df['R_fighter'].iloc[i]])
    R_CLINCH_pct.append(df_fighters['CLINCH_pct'].loc[df['R_fighter'].iloc[i]])
    R_DISTANCE_pct.append(df_fighters['DISTANCE_pct'].loc[df['R_fighter'].iloc[i]])
    R_GROUND_pct.append(df_fighters['GROUND_pct'].loc[df['R_fighter'].iloc[i]])
    R_HEAD_pct.append(df_fighters['HEAD_pct'].loc[df['R_fighter'].iloc[i]])
    R_LEG_pct.append(df_fighters['LEG_pct'].loc[df['R_fighter'].iloc[i]])
    R_TOTAL_REV.append(df_fighters['TOTAL_REV'].loc[df['R_fighter'].iloc[i]])
    R_TOTAL_PASS.append(df_fighters['TOTAL_PASS'].loc[df['R_fighter'].iloc[i]])
    R_Height.append(df_fighters['Height'].loc[df['R_fighter'].iloc[i]])
    R_Weight.append(df_fighters['Weight'].loc[df['R_fighter'].iloc[i]])
    R_Reach.append(df_fighters['Reach'].loc[df['R_fighter'].iloc[i]])
    R_Orthodox.append(df_fighters['Orthodox'].loc[df['R_fighter'].iloc[i]])
    R_Southpaw.append(df_fighters['Southpaw'].loc[df['R_fighter'].iloc[i]])
    R_Switch.append(df_fighters['Switch'].loc[df['R_fighter'].iloc[i]])
    R_SUB.append(df_fighters['TOTAL_SUB'].loc[df['R_fighter'].iloc[i]])
    
    
df.drop('R_SIG_STR._att', axis = 1, inplace=True)
df.drop('R_TOTAL_STR._att', axis = 1, inplace=True)
df.drop('R_TD_att', axis = 1, inplace=True)
df.drop('R_KD', axis = 1, inplace=True)
df.drop('R_PASS', axis=1, inplace=True)
df.drop('R_REV', axis=1, inplace=True)
df.drop('R_HEAD_att', axis = 1, inplace=True)
df.drop('R_BODY_att', axis = 1, inplace=True)
df.drop('R_LEG_att', axis = 1, inplace=True)
df.drop('R_DISTANCE_att', axis = 1, inplace=True)
df.drop('R_CLINCH_att', axis = 1, inplace=True)
df.drop('R_GROUND_att', axis = 1, inplace=True)
df.drop('R_Height', axis = 1, inplace=True)
df.drop('R_Weight', axis = 1, inplace=True)
df.drop('R_Reach', axis = 1, inplace=True)
df.drop('R_Orthodox', axis = 1, inplace=True)
df.drop('R_Southpaw', axis = 1, inplace=True)
df.drop('R_Switch', axis = 1, inplace=True)
df.drop('R_SUB._ATT', axis=1, inplace=True)




df['R_SIG_STR._pct'] = R_SIG_STR_pct
df['R_TOTAL_STR_pct'] = R_TOTAL_STR_pct
df['R_TD_pct'] = R_TD_pct
df['R_KD_TOTAL'] = R_KD_TOTAL
df['R_TOTAL_REV'] = R_TOTAL_REV
df['R_TOTAL_PASS'] = R_TOTAL_PASS
df['R_CLINCH_pct'] = R_CLINCH_pct
df['R_GROUND_pct'] = R_GROUND_pct
df['R_DISTANCE_pct'] = R_DISTANCE_pct
df['R_BODY_pct'] = R_BODY_pct
df['R_HEAD_pct'] = R_HEAD_pct
df['R_LEG_pct'] = R_LEG_pct
df['R_Height'] = R_Height
df['R_Weight'] = R_Weight
df['R_Reach'] = R_Reach
df['R_SUB'] = R_SUB
df['R_Orthodox'] = R_Orthodox
df['R_Southpaw'] = R_Southpaw
df['R_Switch'] = R_Switch

In [57]:
B_SIG_STR_pct = []
B_TOTAL_STR_pct = []
B_TD_pct = []
B_KD_TOTAL = []
B_BODY_pct = []
B_CLINCH_pct = []
B_DISTANCE_pct = []
B_GROUND_pct = []
B_HEAD_pct = []
B_LEG_pct = []
B_TOTAL_REV = []
B_TOTAL_PASS = []
B_SUB = []
B_Height = []
B_Weight = []
B_Reach = []
B_Orthodox = []
B_Southpaw = []
B_Switch = []

for i in range(len(df)):
    B_SIG_STR_pct.append(df_fighters['SIG_STR_pct'].loc[df['R_fighter'].iloc[i]])
    B_TOTAL_STR_pct.append(df_fighters['TOTAL_STR_pct'].loc[df['R_fighter'].iloc[i]])
    B_TD_pct.append(df_fighters['TD_pct'].loc[df['R_fighter'].iloc[i]])
    B_KD_TOTAL.append(df_fighters['KD_TOTAL'].loc[df['R_fighter'].iloc[i]])
    B_SUB.append(df_fighters['TOTAL_SUB'].loc[df['R_fighter'].iloc[i]])
    B_BODY_pct.append(df_fighters['BODY_pct'].loc[df['R_fighter'].iloc[i]])
    B_CLINCH_pct.append(df_fighters['CLINCH_pct'].loc[df['R_fighter'].iloc[i]])
    B_DISTANCE_pct.append(df_fighters['DISTANCE_pct'].loc[df['R_fighter'].iloc[i]])
    B_GROUND_pct.append(df_fighters['GROUND_pct'].loc[df['R_fighter'].iloc[i]])
    B_HEAD_pct.append(df_fighters['HEAD_pct'].loc[df['R_fighter'].iloc[i]])
    B_LEG_pct.append(df_fighters['LEG_pct'].loc[df['R_fighter'].iloc[i]])
    B_TOTAL_REV.append(df_fighters['TOTAL_REV'].loc[df['R_fighter'].iloc[i]])
    B_TOTAL_PASS.append(df_fighters['TOTAL_PASS'].loc[df['R_fighter'].iloc[i]])
    B_Height.append(df_fighters['Height'].loc[df['R_fighter'].iloc[i]])
    B_Weight.append(df_fighters['Weight'].loc[df['R_fighter'].iloc[i]])
    B_Reach.append(df_fighters['Reach'].loc[df['R_fighter'].iloc[i]])
    B_Orthodox.append(df_fighters['Orthodox'].loc[df['R_fighter'].iloc[i]])
    B_Southpaw.append(df_fighters['Southpaw'].loc[df['R_fighter'].iloc[i]])
    B_Switch.append(df_fighters['Switch'].loc[df['R_fighter'].iloc[i]])
    
    
df.drop('B_SIG_STR._att', axis = 1, inplace=True)
df.drop('B_TOTAL_STR._att', axis = 1, inplace=True)
df.drop('B_TD_att', axis = 1, inplace=True)
df.drop('B_KD', axis = 1, inplace=True)
df.drop('B_PASS', axis=1, inplace=True)
df.drop('B_REV', axis=1, inplace=True)
df.drop('B_HEAD_att', axis = 1, inplace=True)
df.drop('B_BODY_att', axis = 1, inplace=True)
df.drop('B_LEG_att', axis = 1, inplace=True)
df.drop('B_DISTANCE_att', axis = 1, inplace=True)
df.drop('B_CLINCH_att', axis = 1, inplace=True)
df.drop('B_GROUND_att', axis = 1, inplace=True)
df.drop('B_Height', axis = 1, inplace=True)
df.drop('B_Weight', axis = 1, inplace=True)
df.drop('B_Reach', axis = 1, inplace=True)
df.drop('B_Orthodox', axis = 1, inplace=True)
df.drop('B_Southpaw', axis = 1, inplace=True)
df.drop('B_Switch', axis = 1, inplace=True)
df.drop('B_SUB._ATT', axis=1, inplace=True)




df['B_SIG_STR._pct'] = B_SIG_STR_pct
df['B_TOTAL_STR_pct'] = B_TOTAL_STR_pct
df['B_TD_pct'] = B_TD_pct
df['B_SUB'] = B_SUB
df['B_KD_TOTAL'] = B_KD_TOTAL
df['B_TOTAL_REV'] = B_TOTAL_REV
df['B_TOTAL_PASS'] = B_TOTAL_PASS
df['B_CLINCH_pct'] = B_CLINCH_pct
df['B_GROUND_pct'] = B_GROUND_pct
df['B_DISTANCE_pct'] = B_DISTANCE_pct
df['B_BODY_pct'] = B_BODY_pct
df['B_HEAD_pct'] = B_HEAD_pct
df['B_LEG_pct'] = B_LEG_pct
df['B_Height'] = B_Height
df['B_Weight'] = B_Weight
df['B_Reach'] = B_Reach
df['B_Orthodox'] = B_Orthodox
df['B_Southpaw'] = B_Southpaw
df['B_Switch'] = B_Switch

In [58]:
def merge_dict(dict1, dict2):
    dict2.update(dict1)
    return dict2

In [59]:
def get_fighter_df(fighter_R, fighter_B):
    f0=df_fighters.loc[fighter_R]
    f1=df_fighters.loc[fighter_B]
    fighter_0=f0.to_dict()
    fighter_1=f1.to_dict()

    fighter_0['R_age']=fighter_0.pop('Age')
    fighter_0['R_SIG_STR._pct'] = fighter_0.pop('SIG_STR_pct')
    fighter_0['R_TOTAL_STR._pct'] = fighter_0.pop('TOTAL_STR_pct')
    fighter_0['R_TD_pct'] = fighter_0.pop('TD_pct')
    fighter_0['R_KD_TOTAL'] = fighter_0.pop('KD_TOTAL')
    fighter_0['R_TOTAL_REV'] = fighter_0.pop('TOTAL_REV')
    fighter_0['R_TOTAL_PASS'] =fighter_0.pop('TOTAL_PASS')
    fighter_0['R_CLINCH_pct'] = fighter_0.pop('CLINCH_pct')
    fighter_0['R_GROUND_pct'] = fighter_0.pop('GROUND_pct')
    fighter_0['R_DISTANCE_pct'] = fighter_0.pop('DISTANCE_pct')
    fighter_0['R_BODY_pct'] = fighter_0.pop('BODY_pct')
    fighter_0['R_HEAD_pct'] = fighter_0.pop('HEAD_pct')
    fighter_0['R_LEG_pct'] = fighter_0.pop('LEG_pct')
    fighter_0['R_Height'] = fighter_0.pop('Height')
    fighter_0['R_Weight'] = fighter_0.pop('Weight')
    fighter_0['R_Reach'] = fighter_0.pop('Reach')
    fighter_0['R_SUB'] = fighter_0.pop('TOTAL_SUB')
    fighter_0['R_Orthodox'] = fighter_0.pop('Orthodox')
    fighter_0['R_Southpaw'] = fighter_0.pop('Southpaw')
    fighter_0['R_Switch'] = fighter_0.pop('Switch')

    fighter_1['B_age']=fighter_1.pop('Age')
    fighter_1['B_SIG_STR._pct'] = fighter_1.pop('SIG_STR_pct')
    fighter_1['B_TOTAL_STR._pct'] = fighter_1.pop('TOTAL_STR_pct')
    fighter_1['B_TD_pct'] = fighter_1.pop('TD_pct')
    fighter_1['B_KD_TOTAL'] = fighter_1.pop('KD_TOTAL')
    fighter_1['B_TOTAL_REV'] = fighter_1.pop('TOTAL_REV')
    fighter_1['B_TOTAL_PASS'] =fighter_1.pop('TOTAL_PASS')
    fighter_1['B_CLINCH_pct'] = fighter_1.pop('CLINCH_pct')
    fighter_1['B_GROUND_pct'] = fighter_1.pop('GROUND_pct')
    fighter_1['B_DISTANCE_pct'] = fighter_1.pop('DISTANCE_pct')
    fighter_1['B_BODY_pct'] = fighter_1.pop('BODY_pct')
    fighter_1['B_HEAD_pct'] = fighter_1.pop('HEAD_pct')
    fighter_1['B_LEG_pct'] = fighter_1.pop('LEG_pct')
    fighter_1['B_Height'] = fighter_1.pop('Height')
    fighter_1['B_Weight'] = fighter_1.pop('Weight')
    fighter_1['B_Reach'] = fighter_1.pop('Reach')
    fighter_1['B_SUB'] = fighter_1.pop('TOTAL_SUB')
    fighter_1['B_Orthodox'] = fighter_1.pop('Orthodox')
    fighter_1['B_Southpaw'] = fighter_1.pop('Southpaw')
    fighter_1['B_Switch'] = fighter_1.pop('Switch')
    
    merge_dict(fighter_0,fighter_1)
    fighter_test = pd.DataFrame(fighter_1, index=[0])
    fighter_test = fighter_test[['R_KD_TOTAL',
                            'B_KD_TOTAL',
                            'R_TD_pct',
                            'B_TD_pct',
                            'R_TOTAL_PASS',
                            'B_TOTAL_PASS',
                            'R_TOTAL_REV',
                            'B_TOTAL_REV',
                            'R_Height',
                            'R_Weight',
                            'R_Reach',
                            'B_Height',
                            'B_Weight',
                            'B_Reach',
                            'B_age',
                            'R_age',
                            'B_Southpaw',
                            'B_Orthodox',
                            'B_Switch',
                            'R_Southpaw',
                            'R_Orthodox',
                            'R_Switch',
                            'R_TOTAL_STR._pct',
                            'R_BODY_pct',
                            'R_CLINCH_pct',
                            'R_DISTANCE_pct',
                            'R_GROUND_pct',
                            'R_HEAD_pct',
                            'R_LEG_pct',
                            'R_SUB',
                            'R_SIG_STR._pct',
                            'B_TOTAL_STR._pct',
                            'B_BODY_pct',
                            'B_CLINCH_pct',
                            'B_DISTANCE_pct',
                            'B_GROUND_pct',
                            'B_HEAD_pct',
                            'B_LEG_pct',
                            'B_SUB',
                            'B_SIG_STR._pct']]
    
    fighter_test.rename(columns={'B_SUB':'B_SUB._pct',
                             'R_KD_TOTAL':'R_KD',
                             'B_TOTAL_REV':'B_REV',
                             'R_TOTAL_REV':'R_REV',
                             'R_SUB':'R_SUB._pct',
                            'B_TOTAL_PASS':'B_PASS',
                            'B_KD_TOTAL':'B_KD',
                            'R_TOTAL_PASS':'R_PASS'},inplace=True)
    return fighter_test

In [75]:
import sklearn.naive_bayes

bayes = sklearn.naive_bayes.BernoulliNB ()
bayes.fit(X_train, y_train)

pred = bayes.predict(X_test)
accuracy = accuracy_score(y_test, pred)
precision = precision_score(y_test, pred, average = 'macro')
recall = recall_score(y_test, pred, average = 'macro')
print('accuracy= %.2f%%, precision= %.2f%%, recall= %.2f%%' % (accuracy*100.00, precision*100, recall*100))

accuracy= 80.82%, precision= 77.35%, recall= 79.83%


In [76]:
roc_auc_score(y_test, pred)

0.7983357004989257

In [77]:
bayes_final = sklearn.naive_bayes.BernoulliNB()
bayes_final.fit(X, Y)

BernoulliNB(alpha=1.0, binarize=0.0, class_prior=None, fit_prior=True)

In [84]:
f=open('future_fights','r')
lutas = f.read().replace("\"\"", "").split('\n')[:-1]

lutas = [x.replace('[','') for x in lutas]
lutas = [x.replace(']','') for x in lutas]
lutas = [x.replace("'","") for x in lutas]
lutas = [x.split(',') for x in lutas]

In [85]:
pred_test_xgb = []
pred_test_bayes = []

for i in lutas:
    try:
        pred_test_xgb.append(model_final.predict_proba(get_fighter_df(i[0],i[1])))
        pred_test_bayes.append(bayes_final.predict_proba(get_fighter_df(i[0],i[1])))
    except:
        pred_test_xgb.append('NaN')
        pred_test_bayes.append('NaN')
        pass
    
for i in range(len(lutas)):
    print(lutas[i], pred_test_xgb[i], pred_test_bayes[i])

['Charles Oliveira', 'Michael Chandler'] NaN NaN NaN
['Tony Ferguson', 'Beneil Dariush'] [[0.33649588 0.6635041 ]] [[0.49673515 0.50326485]] [1]
['Jack Hermansson', 'Edmen Shahbazyan'] [[0.3709584 0.6290416]] [[0.13159492 0.86840508]] [1]
['Katlyn Chookagian', 'Viviane Araujo'] [[0.89183974 0.10816025]] [[9.99974345e-01 2.56550683e-05]] [0]
['Shane Burgos', 'Edson Barboza'] [[0.17101377 0.8289862 ]] [[0.50726071 0.49273929]] [0]
['Jacare Souza', 'Andre Muniz'] [[0.3780102 0.6219898]] [[0.80973043 0.19026957]] [1]
['Matt Schnell', 'Rogerio Bontorin'] [[0.81337094 0.18662909]] [[0.7351192 0.2648808]] [1]
['Lando Vannata', 'Mike Grundy'] [[0.38021207 0.61978793]] [[0.76594163 0.23405837]] [1]
['Andrea Lee', 'Antonina Shevchenko'] [[0.8152219 0.1847781]] [[0.80373957 0.19626043]] [0]
['Jordan Wright', 'Jamie Pickett'] NaN NaN NaN
['Gina Mazany', 'Priscila Cachoeira'] [[0.98059976 0.01940026]] [[9.99989067e-01 1.09331592e-05]] [0]
['Kevin Aguilar', 'Tucker Lutz'] NaN NaN NaN
['Christos Giag