In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import warnings
from sklearn.preprocessing import LabelEncoder
warnings.filterwarnings("ignore")

pd.set_option('display.max_columns',100)

info_lutador = pd.read_csv('fighter_details.csv')
ufc = pd.read_csv('total_fight_data.csv', sep=';')

In [2]:
columns = ['R_SIG_STR.', 'B_SIG_STR.', 'R_TOTAL_STR.', 'B_TOTAL_STR.',
       'R_TD', 'B_TD', 'R_HEAD', 'B_HEAD', 'R_BODY','B_BODY', 'R_LEG', 'B_LEG', 
        'R_DISTANCE', 'B_DISTANCE', 'R_CLINCH','B_CLINCH', 'R_GROUND', 'B_GROUND']

attemp = '_att'
landed = '_landed'

for column in columns:
    ufc[column+attemp] = ufc[column].apply(lambda X:int(X.split('of')[1]))
    ufc[column+landed] = ufc[column].apply(lambda X:int(X.split('of')[0]))

ufc.drop(columns, axis=1, inplace=True)

In [3]:
pct_columns = ['R_SIG_STR_pct','B_SIG_STR_pct', 'R_TD_pct', 'B_TD_pct']

for column in pct_columns:
    ufc[column] = ufc[column].apply(lambda X:float(X.replace('%', ''))/100)

In [4]:
def Division(X):
    for division in weight_classes:
        if division in X:
            return division
    if X == 'Catch Weight Bout' or 'Catchweight Bout':
        return 'Catch Weight'
    else:
        return 'Open Weight'

In [5]:
weight_classes = ['Women\'s Strawweight', 'Women\'s Bantamweight', 
                  'Women\'s Featherweight', 'Women\'s Flyweight', 'Lightweight', 
                  'Welterweight', 'Middleweight','Light Heavyweight', 
                  'Heavyweight', 'Featherweight','Bantamweight', 'Flyweight', 'Open Weight']

ufc['Weight_class'] = ufc['Fight_type'].apply(Division)

In [6]:
def get_rounds(X):
    if X == "No Time Limit":
        return 1
    else:
        return len(X.split('(')[1].replace(')','').split('-'))

ufc['Number of Rounds'] = ufc['Format'].apply(get_rounds)

In [7]:
ufc['Winner'].fillna('Draw', inplace=True)

In [8]:
def get_renamed_winner(row):
    if row['R_fighter'] == row['Winner']:
        return 'Red'
    if row['B_fighter'] == row['Winner']:
        return 'Blue'
    elif row['Winner'] == 'Draw':
        return 'Draw'
    
ufc['Winner'] = ufc[['R_fighter', 'B_fighter', 'Winner']].apply(get_renamed_winner, axis=1)

In [9]:
def inch_to_cm(X):
    if X is np.NaN:
        return X
    elif len(X.split("'")) == 2:
        feet = float(X.split("'")[0])
        inches = int(X.split("'")[1].replace(' ','').replace('"', ''))
        return (feet*30.48) + (inches * 2.54)
    else:
        return float(X.replace('"',''))*2.54

In [10]:
info_lutador['Height'] = info_lutador['Height'].apply(inch_to_cm)
info_lutador['Reach'] = info_lutador['Reach'].apply(inch_to_cm)

In [11]:
info_lutador['Weight'] = info_lutador['Weight'].apply(lambda X: float(X.replace(' lbs', '')) if X is not np.NaN else X)

In [12]:
ufc_final = ufc.merge(info_lutador, left_on='R_fighter', right_on='fighter_name', how='left')

In [13]:
ufc_final = ufc_final.drop('fighter_name', axis=1)

In [14]:
ufc_final.rename(columns={'Height':'R_Height',
                          'Weight':'R_Weight',
                          'Reach':'R_Reach',
                          'Stance':'R_Stance',
                          'DOB':'R_DOB'}, inplace=True)

In [15]:
ufc_final = ufc_final.merge(info_lutador, left_on='B_fighter', right_on='fighter_name', how='left')

In [16]:
ufc_final = ufc_final.drop('fighter_name', axis=1)

In [17]:
ufc_final.rename(columns={'Height':'B_Height',
                          'Weight':'B_Weight',
                          'Reach':'B_Reach',
                          'Stance':'B_Stance',
                          'DOB':'B_DOB'}, inplace=True)

In [18]:
ufc_final['R_DOB'] = pd.to_datetime(ufc_final['R_DOB'])
ufc_final['B_DOB'] = pd.to_datetime(ufc_final['B_DOB'])
ufc_final['date'] = pd.to_datetime(ufc_final['date'])

In [19]:
ufc_final['R_year'] = ufc_final['R_DOB'].apply(lambda X: X.year)
ufc_final['B_year'] = ufc_final['B_DOB']. apply(lambda X: X.year)
ufc_final['date_year'] = ufc_final['date'].apply(lambda X: X.year)


def get_age(row):
    B_age = (row['date_year'] - row['B_year'])
    R_age = (row['date_year'] - row['R_year'])
    if np.isnan(B_age) != True:
        B_age = B_age
    if np.isnan(R_age) != True:
        R_age = R_age
    return pd.Series([B_age, R_age], index = ['B_age', 'R_age'])

In [20]:
ufc_final[['B_age', 'R_age']] = ufc_final[['date_year', 'R_year', 'B_year']].apply(get_age, axis=1)

In [21]:
ufc_final.drop(['R_DOB', 'B_DOB', 'B_year', 'R_year'],axis=1, inplace=True)

In [22]:
ufc_final['country'] = ufc_final['location'].apply(lambda X: X.split(",")[-1])

In [23]:
women = ufc_final['Weight_class'].str.contains('Women')

In [24]:
ufc_final['R_age'] = ufc_final['R_age'].fillna(ufc_final['R_age'].median())
ufc_final['B_age'] = ufc_final['B_age'].fillna(ufc_final['B_age'].median())

In [25]:
ufc_final['R_Height'] = ufc_final['R_Height'].fillna(ufc_final['R_Height'].mean())
ufc_final['B_Height'] = ufc_final['B_Height'].fillna(ufc_final['B_Height'].mean())

In [26]:
Attempt = pd.concat([ufc_final['R_TOTAL_STR._att'], ufc_final['B_TOTAL_STR._att']], ignore_index = True)
Landed = pd.concat([ufc_final['R_TOTAL_STR._landed'], ufc_final['B_TOTAL_STR._landed']], ignore_index = True)

In [27]:
r_landed = ufc_final['R_TOTAL_STR._landed']
r_index = r_landed.index

b_landed = ufc_final['B_TOTAL_STR._landed']
b_index = b_landed.index

In [28]:
Fighter = pd.concat([ufc_final['R_fighter'], ufc_final['B_fighter']], ignore_index = True)

In [29]:
df = ufc_final.copy()
df = df.fillna(df.mean())

In [30]:
from statistics import mode
df['B_Stance'] = df['B_Stance'].fillna(df['B_Stance'].mode()[0])
df['R_Stance'] = df['R_Stance'].fillna(df['R_Stance'].mode()[0])

In [31]:
enc = LabelEncoder()

In [32]:
data_enc1 = df['Weight_class']
data_enc1 = enc.fit_transform(data_enc1)

data_enc2 = df['R_Stance']
data_enc2 = enc.fit_transform(data_enc2)

data_enc3 = df['B_Stance']
data_enc3 = enc.fit_transform(data_enc3)

data_enc1 = pd.DataFrame(data_enc1, columns=['Weight_class'])
data_enc2 = pd.DataFrame(data_enc2, columns=['R_Stance'])
data_enc3 = pd.DataFrame(data_enc3, columns=['B_Stance'])

df[['Weight_class']] = data_enc1[['Weight_class']]
df[['R_Stance']] = data_enc2[['R_Stance']]
df[['B_Stance']] = data_enc3[['B_Stance']]

df = pd.concat([df,pd.get_dummies(df['win_by'], prefix='win_by')], axis=1)
df.drop(['win_by'], axis=1, inplace=True)

df['Winner_num'] = df.Winner.map({'Red':0, 'Blue':1, 'Draw':2})



In [33]:
df_fix = df.drop(['date', 'location', 'Referee', 'last_round', 'last_round_time', 'Format','Number of Rounds', 'country', 'Winner','date_year','Fight_type'], axis=1)
df_fix.head()
Y = df['Winner_num']
X = df.drop(['Winner_num'], axis =  1)

In [34]:
c_blaydes_r = df_fix[df_fix['R_fighter']== 'Curtis Blaydes']
c_blaydes_b = df_fix[df_fix['B_fighter']== 'Curtis Blaydes']
c_blaydes_r = c_blaydes_r.drop(['B_fighter'], axis=1)
c_blaydes_b = c_blaydes_b.drop(['R_fighter'], axis=1)
total = [c_blaydes_r, c_blaydes_b]
c_blaydes_total = pd.concat(total)

Tirar a média dessas estatisticas (talvez a média das ultimas k lutas seja melhor):

KD
SIG_STR_pct
TD_pct
SUB_ATT
PASS
REV
SIG_STR._att
SIG_STR._landed
TOTAL_STR._att
TOTAL_STR._landed
TD_att
TD_landed
HEAD_att
HEAD_landed
BODY_att
BODY_landed
LEG_att
LEG_landed
DISTANCE_att
DISTANCE_landed
CLINCH_att
CLINCH_landed
GROUND_att
GROUND_landed

Pegar essas informações, a mais recente/atualizada possivel:

Weight_class
Height
Weight
Reach
Stance
age

Verificar oque fazer com esses dados:

win_by_Could Not Continue
win_by_DQ
win_by_Decision - Majority
win_by_Decision - Split
win_by_Decision - Unanimous
win_by_KO/TKO
win_by_Other
win_by_Overturned
win_by_Submission
win_by_TKO - Doctor's Stoppag
Winner_num

In [35]:
r_strike_pct = c_blaydes_r['R_SIG_STR_pct'].mean()
b_strike_pct = c_blaydes_b['B_SIG_STR_pct'].mean()
average_pct = (r_strike_pct + b_strike_pct)/2

r_body_pct = (c_blaydes_r['R_BODY_landed']/c_blaydes_r['R_BODY_att']) * 100
b_body_pct = (c_blaydes_b['B_BODY_landed']/c_blaydes_b['B_BODY_att']) * 100

body_avg_pct = (r_body_pct.mean() + b_body_pct.mean())/2

c_blaydes_r = df_fix[df_fix['R_fighter']== 'Curtis Blaydes']
c_blaydes_b = df_fix[df_fix['B_fighter']== 'Curtis Blaydes']
c_blaydes_r = c_blaydes_r.drop(['B_fighter'], axis=1)
c_blaydes_b = c_blaydes_b.drop(['R_fighter'], axis=1)
total = [c_blaydes_r, c_blaydes_b]
c_blaydes_total = pd.concat(total)

In [36]:
def average_pct(r_pct, b_pct):
    average_pct = (r_pct.mean() + b_pct.mean())/2
    return average_pct

In [37]:
def get_pct(att, landed):
    pct = (landed / att) * 100
    return pct

In [38]:
fighters = []
for i in df_fix['B_fighter']:
    if i not in fighters:
        fighters.append(i)
for j in df_fix['R_fighter']:
    if j not in fighters:
        fighters.append(j)

df_fix['R_SUB._ATT'] = df_fix['R_SUB_ATT']
df_fix['B_SUB._ATT'] = df_fix['B_SUB_ATT']
df_fix.drop(['R_SUB_ATT','B_SUB_ATT'], axis=1, inplace=True)
df_fix['R_SIG_STR._pct'] = df_fix['R_SIG_STR_pct']
df_fix['B_SIG_STR._pct'] = df_fix['B_SIG_STR_pct']
df_fix.drop(['R_SIG_STR_pct','B_SIG_STR_pct', 'Weight_class'], axis=1, inplace=True)
df_fix.rename(columns = {'R_fighter' : 'V_fighter', 'B_fighter' : 'A_fighter', 'R_age': 'V_age', 'B_age':'A_age'}, inplace = True)
df_fix[df_fix['V_fighter']=='Brad Riddell']

Unnamed: 0,V_fighter,A_fighter,R_KD,B_KD,R_TD_pct,B_TD_pct,R_PASS,B_PASS,R_REV,B_REV,R_SIG_STR._att,R_SIG_STR._landed,B_SIG_STR._att,B_SIG_STR._landed,R_TOTAL_STR._att,R_TOTAL_STR._landed,B_TOTAL_STR._att,B_TOTAL_STR._landed,R_TD_att,R_TD_landed,B_TD_att,B_TD_landed,R_HEAD_att,R_HEAD_landed,B_HEAD_att,B_HEAD_landed,R_BODY_att,R_BODY_landed,B_BODY_att,B_BODY_landed,R_LEG_att,R_LEG_landed,B_LEG_att,B_LEG_landed,R_DISTANCE_att,R_DISTANCE_landed,B_DISTANCE_att,B_DISTANCE_landed,R_CLINCH_att,R_CLINCH_landed,B_CLINCH_att,B_CLINCH_landed,R_GROUND_att,R_GROUND_landed,B_GROUND_att,B_GROUND_landed,R_Height,R_Weight,R_Reach,R_Stance,B_Height,B_Weight,B_Reach,B_Stance,A_age,V_age,win_by_Could Not Continue,win_by_DQ,win_by_Decision - Majority,win_by_Decision - Split,win_by_Decision - Unanimous,win_by_KO/TKO,win_by_Other,win_by_Overturned,win_by_Submission,win_by_TKO - Doctor's Stoppage,Winner_num,R_SUB._ATT,B_SUB._ATT,R_SIG_STR._pct,B_SIG_STR._pct
154,Brad Riddell,Jamie Mullarkey,0,0,1.0,0.2,5,2,1,1,153,91,95,36,167,104,100,41,1,1,15,3,129,70,83,25,16,14,4,4,8,7,8,7,121,63,93,35,3,3,0,0,29,25,2,1,170.18,155.0,180.34,1,182.88,155.0,187.96,1,25.0,28.0,0,0,0,0,1,0,0,0,0,0,0,0,1,0.59,0.37


In [39]:
def get_fighter_info(name):
    
    name_r = df_fix[df_fix['V_fighter']== name]
    name_b = df_fix[df_fix['A_fighter']== name]
    
    name_r = name_r[name_r.columns.drop(list(name_r.filter(regex='B_')))]
    name_b = name_b[name_b.columns.drop(list(name_b.filter(regex='R_')))]
        
    name_r = name_r.rename(columns={'V_fighter':'Fighter'})
    name_b = name_b.rename(columns={'A_fighter':'Fighter'})
    
    if len(df_fix[df_fix['V_fighter']==name])==1 and len(df_fix[df_fix['A_fighter']==name])==0:
        
        SIG_STR_pct = get_pct(name_r['R_SIG_STR._att'], name_r['R_SIG_STR._landed']).iloc[0]
        TOTAL_STR_pct = get_pct(name_r['R_TOTAL_STR._att'], name_r['R_TOTAL_STR._landed']).iloc[0]
        TD_pct = name_r['R_TD_pct'].iloc[0]
        KD_TOTAL = name_r['R_KD'].iloc[0]
        BODY_pct = get_pct(name_r['R_BODY_att'], name_r['R_BODY_landed']).iloc[0]
        CLINCH_pct = get_pct(name_r['R_CLINCH_att'], name_r['R_CLINCH_landed']).iloc[0]
        DISTANCE_pct = get_pct(name_r['R_DISTANCE_att'], name_r['R_DISTANCE_landed']).iloc[0]
        GROUND_pct = get_pct(name_r['R_GROUND_att'], name_r['R_GROUND_landed']).iloc[0]
        HEAD_pct = get_pct(name_r['R_HEAD_att'], name_r['R_HEAD_landed']).iloc[0]
        LEG_pct = get_pct(name_r['R_LEG_att'], name_r['R_LEG_landed']).iloc[0]
        TOTAL_PASS = name_r['R_PASS'].sum()
        TOTAL_REV = name_r['R_REV'].sum()
        TOTAL_SUB = name_r['R_SUB._ATT'].sum()
                
    elif len(df_fix[df_fix['A_fighter']==name])==1 and len(df_fix[df_fix['V_fighter']==name])==0:
        
        SIG_STR_pct = get_pct(name_b['B_SIG_STR._att'], name_b['B_SIG_STR._landed']).iloc[0]
        TOTAL_STR_pct = get_pct(name_b['B_TOTAL_STR._att'],name_b['B_TOTAL_STR._landed']).iloc[0]
        TD_pct = name_b['B_TD_pct'].iloc[0]
        KD_TOTAL = name_b['B_KD'].sum()
        BODY_pct = get_pct(name_b['B_BODY_att'], name_b['B_BODY_landed']).iloc[0]
        CLINCH_pct = get_pct(name_b['B_CLINCH_att'], name_b['B_CLINCH_landed']).iloc[0]
        DISTANCE_pct = get_pct(name_b['B_DISTANCE_att'], name_b['B_DISTANCE_landed']).iloc[0]
        GROUND_pct = get_pct(name_b['B_GROUND_att'], name_b['B_GROUND_landed']).iloc[0]
        HEAD_pct = get_pct(name_b['B_HEAD_att'], name_b['B_HEAD_landed']).iloc[0]
        LEG_pct = get_pct(name_b['B_LEG_att'], name_b['B_LEG_landed']).iloc[0]
        TOTAL_PASS = name_b['B_PASS'].sum()
        TOTAL_REV = name_b['B_REV'].sum()
        TOTAL_SUB = name_b['B_SUB._ATT'].sum()   
        
    else:
        R_SIG_STR_pct = get_pct(name_r['R_SIG_STR._att'], name_r['R_SIG_STR._landed'])
        B_SIG_STR_pct = get_pct(name_b['B_SIG_STR._att'], name_b['B_SIG_STR._landed'])
    
        R_TOTAL_STR_pct = get_pct(name_r['R_TOTAL_STR._att'],name_r['R_TOTAL_STR._landed'])
        B_TOTAL_STR_pct = get_pct(name_b['B_TOTAL_STR._att'],name_b['B_TOTAL_STR._landed'])
    
        R_BODY_pct = get_pct(name_r['R_BODY_att'], name_r['R_BODY_landed'])
        B_BODY_pct = get_pct(name_b['B_BODY_att'], name_b['B_BODY_landed'])
    
        R_CLINCH_pct = get_pct(name_r['R_CLINCH_att'], name_r['R_CLINCH_landed'])
        B_CLINCH_pct = get_pct(name_b['B_CLINCH_att'], name_b['B_CLINCH_landed'])
    
        R_DISTANCE_pct = get_pct(name_r['R_DISTANCE_att'], name_r['R_DISTANCE_landed'])
        B_DISTANCE_pct = get_pct(name_b['B_DISTANCE_att'], name_b['B_DISTANCE_landed'])

        R_GROUND_pct = get_pct(name_r['R_GROUND_att'], name_r['R_GROUND_landed'])
        B_GROUND_pct = get_pct(name_b['B_GROUND_att'], name_b['B_GROUND_landed'])

        R_HEAD_pct = get_pct(name_r['R_HEAD_att'], name_r['R_HEAD_landed'])
        B_HEAD_pct = get_pct(name_b['B_HEAD_att'], name_b['B_HEAD_landed'])
    
        R_LEG_pct = get_pct(name_r['R_LEG_att'], name_r['R_LEG_landed'])
        B_LEG_pct = get_pct(name_b['B_LEG_att'], name_b['B_LEG_landed'])
          
        R_TD_pct = name_r['R_TD_pct']
        B_TD_pct = name_b['B_TD_pct']
    
        R_KD = name_r['R_KD'].sum()
        B_KD = name_b['B_KD'].sum()
    
        R_rev = name_r['R_REV'].sum()
        B_rev = name_b['B_REV'].sum()
    
        R_pass = name_r['R_PASS'].sum()
        B_pass = name_b['B_PASS'].sum()
    
        R_SUB = name_r['R_SUB._ATT'].sum()
        B_SUB = name_b['B_SUB._ATT'].sum()
        
        SIG_STR_pct = average_pct(R_SIG_STR_pct,B_SIG_STR_pct)
        TOTAL_STR_pct = average_pct(R_TOTAL_STR_pct, B_TOTAL_STR_pct)
        TD_pct = average_pct(R_TD_pct,B_TD_pct)
        KD_TOTAL = R_KD + B_KD
        BODY_pct = average_pct(R_BODY_pct, B_BODY_pct)
        CLINCH_pct = average_pct(R_CLINCH_pct, B_CLINCH_pct)
        DISTANCE_pct = average_pct(R_DISTANCE_pct, B_DISTANCE_pct)
        GROUND_pct = average_pct(R_GROUND_pct, B_GROUND_pct)
        HEAD_pct = average_pct(R_HEAD_pct, B_HEAD_pct)
        LEG_pct = average_pct(R_LEG_pct, B_LEG_pct)
        TOTAL_PASS = R_pass + B_pass
        TOTAL_REV = R_rev + B_rev
        TOTAL_SUB = R_SUB + B_SUB
    
    Height = 0
    Weight = 0
    Reach = 0
    Stance = 0
    
    Age_r = name_r['V_age'].max()
    Age_b = name_b['A_age'].max()
    Age = max(Age_r,Age_b)

    
    for i in df_fix['V_fighter']:
        if i == name:
            Height = name_r['R_Height'].iloc[0]
            Weight = name_r['R_Weight'].iloc[0]
            Reach = name_r['R_Reach'].iloc[0]
            Stance = name_r['R_Stance'].iloc[0]
            
    for j in df_fix['A_fighter']:
        if j == name:
            Height = name_b['B_Height'].iloc[0]
            Weight = name_b['B_Weight'].iloc[0]
            Reach = name_b['B_Reach'].iloc[0]
            Stance = name_b['B_Stance'].iloc[0]
    
    r_win_by_could_not_continue = 0
    r_win_by_DQ = 0
    r_win_by_decision = 0
    r_win_by_decision_split = 0
    r_win_by_decision_unanimous = 0
    r_win_by_KO = 0
    r_win_by_Other = 0
    r_win_by_overturned = 0
    r_win_by_submission = 0
    r_win_by_TKO_docstop = 0
    b_win_by_could_not_continue = 0
    b_win_by_DQ = 0
    b_win_by_decision = 0
    b_win_by_decision_split = 0
    b_win_by_decision_unanimous = 0
    b_win_by_KO = 0
    b_win_by_Other = 0
    b_win_by_overturned = 0
    b_win_by_submission = 0
    b_win_by_TKO_docstop =0
    
    
    for k in name_r['Winner_num']:
        if k == 0:
            r_win_by_could_not_continue = name_r['win_by_Could Not Continue'].sum()
            r_win_by_DQ = name_r['win_by_DQ'].sum() 
            r_win_by_decision = name_r['win_by_Decision - Majority'].sum()
            r_win_by_decision_split = name_r['win_by_Decision - Split'].sum()
            r_win_by_decision_unanimous = name_r['win_by_Decision - Unanimous'].sum()
            r_win_by_KO = name_r['win_by_KO/TKO'].sum()
            r_win_by_Other = name_r['win_by_Other'].sum()
            r_win_by_overturned = name_r['win_by_Overturned'].sum()
            r_win_by_submission = name_r['win_by_Submission'].sum()
            r_win_by_TKO_docstop = name_r["win_by_TKO - Doctor's Stoppage"].sum()
    
    for n in name_b['Winner_num']:
        if n == 1:
            b_win_by_could_not_continue = name_b['win_by_Could Not Continue'].sum()
            b_win_by_DQ = name_b['win_by_DQ'].sum() 
            b_win_by_decision = name_b['win_by_Decision - Majority'].sum()
            b_win_by_decision_split = name_b['win_by_Decision - Split'].sum()
            b_win_by_decision_unanimous = name_b['win_by_Decision - Unanimous'].sum()
            b_win_by_KO = name_b['win_by_KO/TKO'].sum()
            b_win_by_Other = name_b['win_by_Other'].sum()
            b_win_by_overturned = name_b['win_by_Overturned'].sum()
            b_win_by_submission = name_b['win_by_Submission'].sum()
            b_win_by_TKO_docstop = name_b["win_by_TKO - Doctor's Stoppage"].sum()
        
            

    win_by_could_not_continue = r_win_by_could_not_continue + b_win_by_could_not_continue 
    win_by_DQ = r_win_by_DQ + b_win_by_DQ
    win_by_decision = r_win_by_decision + b_win_by_decision
    win_by_decision_split = r_win_by_decision_split + b_win_by_decision_split
    win_by_decision_unanimous = r_win_by_decision_unanimous + b_win_by_decision_unanimous
    win_by_KO = r_win_by_KO + b_win_by_KO
    win_by_Other = r_win_by_Other + b_win_by_Other
    win_by_overturned = r_win_by_overturned + b_win_by_overturned
    win_by_submission = r_win_by_submission + b_win_by_submission
    win_by_TKO_docstop = r_win_by_TKO_docstop + b_win_by_TKO_docstop
    wins = win_by_could_not_continue + win_by_DQ + win_by_decision + win_by_decision_split + win_by_decision_unanimous + win_by_KO + win_by_Other + win_by_overturned + win_by_submission + win_by_TKO_docstop   
    
    F_info = {'SIG_STR_pct':SIG_STR_pct,'TOTAL_STR_pct':TOTAL_STR_pct,'TOTAL_SUB':TOTAL_SUB,'TD_pct':TD_pct,'KD_TOTAL':KD_TOTAL,'BODY_pct':BODY_pct,'CLINCH_pct':CLINCH_pct,'DISTANCE_pct':DISTANCE_pct, 'GROUND_pct':GROUND_pct,'HEAD_pct':HEAD_pct,'LEG_pct':LEG_pct,'TOTAL_REV':TOTAL_REV,'TOTAL_PASS':TOTAL_PASS,'Age':Age,'Height':Height,'Weight':Weight,'Reach':Reach, 'Stance':Stance,'Wins':wins}
    #'win_by_could_not_continue':win_by_could_not_continue,'win_by_DQ':win_by_DQ,'win_by_decision':win_by_decision,'win_by_decision_split':win_by_decision_split,'win_by_decision_unanimous':win_by_decision_unanimous,'win_by_KO':win_by_KO,'win_by_Other':win_by_Other,'win_by_overturned':win_by_overturned,'win_by_submission':win_by_submission,'win_by_TKO_docstop':win_by_TKO_docstop
    
    return F_info

get_fighter_info('Brad Riddell')

{'SIG_STR_pct': 59.47712418300654,
 'TOTAL_STR_pct': 62.27544910179641,
 'TOTAL_SUB': 0,
 'TD_pct': 1.0,
 'KD_TOTAL': 0,
 'BODY_pct': 87.5,
 'CLINCH_pct': 100.0,
 'DISTANCE_pct': 52.066115702479344,
 'GROUND_pct': 86.20689655172413,
 'HEAD_pct': 54.263565891472865,
 'LEG_pct': 87.5,
 'TOTAL_REV': 1,
 'TOTAL_PASS': 5,
 'Age': 28.0,
 'Height': 170.18,
 'Weight': 155.0,
 'Reach': 180.34,
 'Stance': 1,
 'Wins': 1}

In [40]:
lista = []
for i in fighters:
    lista.append(get_fighter_info(i))
    
df_final = pd.DataFrame(data=lista, index=fighters)
df_final.info()

<class 'pandas.core.frame.DataFrame'>
Index: 1998 entries, Dominick Reyes to Frank Hamaker
Data columns (total 19 columns):
SIG_STR_pct      1607 non-null float64
TOTAL_STR_pct    1617 non-null float64
TOTAL_SUB        1998 non-null int64
TD_pct           1637 non-null float64
KD_TOTAL         1998 non-null int64
BODY_pct         1414 non-null float64
CLINCH_pct       1338 non-null float64
DISTANCE_pct     1589 non-null float64
GROUND_pct       1096 non-null float64
HEAD_pct         1592 non-null float64
LEG_pct          1320 non-null float64
TOTAL_REV        1998 non-null int64
TOTAL_PASS       1998 non-null int64
Age              1408 non-null float64
Height           1998 non-null float64
Weight           1998 non-null float64
Reach            1998 non-null float64
Stance           1998 non-null int64
Wins             1998 non-null int64
dtypes: float64(13), int64(6)
memory usage: 312.2+ KB


In [41]:
df_final.fillna(0,inplace=True)
df_final.loc['Brad Riddell']

SIG_STR_pct       59.477124
TOTAL_STR_pct     62.275449
TOTAL_SUB          0.000000
TD_pct             1.000000
KD_TOTAL           0.000000
BODY_pct          87.500000
CLINCH_pct       100.000000
DISTANCE_pct      52.066116
GROUND_pct        86.206897
HEAD_pct          54.263566
LEG_pct           87.500000
TOTAL_REV          1.000000
TOTAL_PASS         5.000000
Age               28.000000
Height           170.180000
Weight           155.000000
Reach            180.340000
Stance             1.000000
Wins               1.000000
Name: Brad Riddell, dtype: float64

In [42]:
#df_fix['R_SIG_STR._pct'] = df_fix['R_SIG_STR_pct']
#df_fix['B_SIG_STR._pct'] = df_fix['B_SIG_STR_pct']
#df_fix.drop(['R_SIG_STR_pct','B_SIG_STR_pct'], axis=1, inplace=True)
#df_fix.rename(columns = {'R_fighter' : 'V_fighter', 'B_fighter' : 'A_fighter', 'R_age': 'V_age', 'B_age':'A_age'}, inplace = True)

Preciso fazer um novo dataframe em que contenha o nome do lutador vermelho e as informações associadas àquele lutador, o lutador azul e as coluans associadas a ele e quem venceu. 

Não posso usar as informações sobre a luta, já que não vou ter essas informações antes das lutas que quero prever. Talvez exista alguma forma de extrair informação disso, pois essas informações podem ser uteis, imagino que alguem que ganhe mais por knockouts tenha mais chance de vencer que alguém que ganhe mais por decisões por exemplo.

Consegui dar um jeito de contar as vitórias mas os valores são diferentes dos do site do ufc. Talvez falte informação sobre os empates.

In [43]:
df_fix = df_fix.drop(list(df_fix.filter(regex='landed')), axis=1)
df_fix = df_fix.drop(list(df_fix.filter(regex='pct')), axis=1)

In [44]:
R_SIG_STR_pct = []
R_TOTAL_STR_pct = []
R_TD_pct = []
R_KD_TOTAL = []
R_BODY_pct = []
R_CLINCH_pct = []
R_DISTANCE_pct = []
R_GROUND_pct = []
R_HEAD_pct = []
R_LEG_pct = []
R_TOTAL_REV = []
R_TOTAL_PASS = []
R_SUB = []
R_Height = []
R_Weight = []
R_Reach = []
R_Stance = []
R_wins = []
#R_win_by_could_not_continue = []
#R_win_by_DQ = []
#R_win_by_decision = []
#R_win_by_decision_split = []
#R_win_by_decision_unanimous = []
#R_win_by_KO = []
#R_win_by_Other = []
#R_win_by_overturned = []
#R_win_by_submission = []
#R_win_by_TKO_docstop = []

for i in range(len(df_fix)):
    R_SIG_STR_pct.append(df_final['SIG_STR_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_TOTAL_STR_pct.append(df_final['TOTAL_STR_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_TD_pct.append(df_final['TD_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_KD_TOTAL.append(df_final['KD_TOTAL'].loc[df_fix['V_fighter'].iloc[i]])
    R_BODY_pct.append(df_final['BODY_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_CLINCH_pct.append(df_final['CLINCH_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_DISTANCE_pct.append(df_final['DISTANCE_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_GROUND_pct.append(df_final['GROUND_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_HEAD_pct.append(df_final['HEAD_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_LEG_pct.append(df_final['LEG_pct'].loc[df_fix['V_fighter'].iloc[i]])
    R_TOTAL_REV.append(df_final['TOTAL_REV'].loc[df_fix['V_fighter'].iloc[i]])
    R_TOTAL_PASS.append(df_final['TOTAL_PASS'].loc[df_fix['V_fighter'].iloc[i]])
    R_Height.append(df_final['Height'].loc[df_fix['V_fighter'].iloc[i]])
    R_Weight.append(df_final['Weight'].loc[df_fix['V_fighter'].iloc[i]])
    R_Reach.append(df_final['Reach'].loc[df_fix['V_fighter'].iloc[i]])
    R_Stance.append(df_final['Stance'].loc[df_fix['V_fighter'].iloc[i]])
    R_SUB.append(df_final['TOTAL_SUB'].loc[df_fix['V_fighter'].iloc[i]])
    R_wins.append(df_final['Wins'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_could_not_continue.append(df_final['win_by_could_not_continue'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_DQ.append(df_final['win_by_could_not_continue'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_decision.append(df_final['win_by_decision'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_decision_split.append(df_final['win_by_decision_split'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_decision_unanimous.append(df_final['win_by_decision_unanimous'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_KO.append(df_final['win_by_KO'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_Other.append(df_final['win_by_Other'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_overturned.append(df_final['win_by_overturned'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_submission.append(df_final['win_by_submission'].loc[df_fix['V_fighter'].iloc[i]])
    #R_win_by_TKO_docstop.append(df_final['win_by_TKO_docstop'].loc[df_fix['V_fighter'].iloc[i]])
    
    
df_fix.drop('R_SIG_STR._att', axis = 1, inplace=True)
df_fix.drop('R_TOTAL_STR._att', axis = 1, inplace=True)
df_fix.drop('R_TD_att', axis = 1, inplace=True)
df_fix.drop('R_KD', axis = 1, inplace=True)
df_fix.drop('R_PASS', axis=1, inplace=True)
df_fix.drop('R_REV', axis=1, inplace=True)
df_fix.drop('R_HEAD_att', axis = 1, inplace=True)
df_fix.drop('R_BODY_att', axis = 1, inplace=True)
df_fix.drop('R_LEG_att', axis = 1, inplace=True)
df_fix.drop('R_DISTANCE_att', axis = 1, inplace=True)
df_fix.drop('R_CLINCH_att', axis = 1, inplace=True)
df_fix.drop('R_GROUND_att', axis = 1, inplace=True)
df_fix.drop('R_Height', axis = 1, inplace=True)
df_fix.drop('R_Weight', axis = 1, inplace=True)
df_fix.drop('R_Reach', axis = 1, inplace=True)
df_fix.drop('R_Stance', axis = 1, inplace=True)
df_fix.drop('R_SUB._ATT', axis=1, inplace=True)
df_fix.drop('win_by_Could Not Continue',axis=1, inplace=True)
df_fix.drop('win_by_Decision - Majority',axis=1, inplace=True)
df_fix.drop('win_by_Decision - Split',axis=1, inplace=True)
df_fix.drop('win_by_Decision - Unanimous',axis=1, inplace=True)
df_fix.drop('win_by_DQ',axis=1, inplace=True)
df_fix.drop('win_by_KO/TKO',axis=1,inplace=True)
df_fix.drop('win_by_Other',axis=1,inplace=True)
df_fix.drop('win_by_Overturned',axis=1, inplace=True)
df_fix.drop('win_by_Submission',axis=1,inplace=True)
df_fix.drop("win_by_TKO - Doctor's Stoppage", axis=1, inplace=True)




df_fix['R_SIG_STR._att'] = R_SIG_STR_pct
df_fix['R_TOTAL_STR_pct'] = R_TOTAL_STR_pct
df_fix['R_TD_pct'] = R_TD_pct
df_fix['R_KD_TOTAL'] = R_KD_TOTAL
df_fix['R_TOTAL_REV'] = R_TOTAL_REV
df_fix['R_TOTAL_PASS'] = R_TOTAL_PASS
df_fix['R_CLINCH_pct'] = R_CLINCH_pct
df_fix['R_GROUND_pct'] = R_GROUND_pct
df_fix['R_DISTANCE_pct'] = R_DISTANCE_pct
df_fix['R_BODY_pct'] = R_BODY_pct
df_fix['R_HEAD_pct'] = R_HEAD_pct
df_fix['R_LEG_pct'] = R_LEG_pct
df_fix['R_Height'] = R_Height
df_fix['R_Weight'] = R_Weight
df_fix['R_Reach'] = R_Reach
df_fix['R_SUB'] = R_SUB
df_fix['R_Stance'] = R_Stance
df_fix['R_Wins'] = R_wins
#df_fix['R_win_by_Could_Not_Countinue'] = R_win_by_could_not_continue
#df_fix['R_win_by_DQ'] = R_win_by_DQ
#df_fix['R_win_by_Decision-Majority'] = R_win_by_decision
#df_fix['R_win_by_Decision-Split'] = R_win_by_decision_split
#df_fix['R_win_by_Decision-Unanimous'] = R_win_by_decision_unanimous
#df_fix['R_win_by_KO/TKO'] = R_win_by_KO
#df_fix['R_win_by_Other'] = R_win_by_Other
#df_fix['R_win_by_Overturned'] = R_win_by_overturned
#df_fix['R_win_by_Submission'] = R_win_by_submission
#df_fix["R_win_by_TKO - Doctor's Stoppage"] = R_win_by_TKO_docstop

df_fix.head()

Unnamed: 0,V_fighter,A_fighter,B_KD,B_PASS,B_REV,B_SIG_STR._att,B_TOTAL_STR._att,B_TD_att,B_HEAD_att,B_BODY_att,B_LEG_att,B_DISTANCE_att,B_CLINCH_att,B_GROUND_att,B_Height,B_Weight,B_Reach,B_Stance,A_age,V_age,Winner_num,B_SUB._ATT,R_SIG_STR._att,R_TOTAL_STR_pct,R_TD_pct,R_KD_TOTAL,R_TOTAL_REV,R_TOTAL_PASS,R_CLINCH_pct,R_GROUND_pct,R_DISTANCE_pct,R_BODY_pct,R_HEAD_pct,R_LEG_pct,R_Height,R_Weight,R_Reach,R_SUB,R_Stance,R_Wins
0,Jon Jones,Dominick Reyes,0,0,0,259,263,0,169,55,35,252,7,0,193.04,205.0,195.58,3,31.0,33.0,0,0,59.365045,64.071417,0.568889,5,0,29,78.546607,72.93772,45.611541,80.052334,49.930378,74.164503,193.04,205.0,213.36,10,1,22
1,Valentina Shevchenko,Katlyn Chookagian,0,0,0,56,68,0,28,9,19,56,0,0,175.26,125.0,172.72,1,32.0,32.0,0,0,56.326153,66.961571,0.678333,1,1,20,75.827206,82.908826,43.270478,62.536778,48.855752,74.297635,165.1,125.0,167.64,4,3,10
2,Juan Adams,Justin Tafa,1,0,0,15,15,0,9,1,5,11,1,3,182.88,264.0,187.96,3,27.0,28.0,1,0,56.21204,58.576692,0.166667,0,0,4,0.0,0.0,52.04265,72.826087,59.964788,78.333333,195.58,265.0,203.2,0,1,3
3,Mirsad Bektic,Dan Ige,0,1,0,88,117,2,70,17,1,68,16,4,170.18,145.0,180.34,1,29.0,29.0,1,0,39.175948,52.131286,0.393333,2,1,34,61.684783,66.277854,30.023139,70.888889,34.876019,74.166667,172.72,145.0,177.8,3,1,9
4,Derrick Lewis,Ilir Latifi,0,2,0,12,77,7,8,1,3,8,4,0,177.8,205.0,185.42,1,37.0,35.0,0,0,50.682259,52.911402,0.130714,5,2,17,73.558223,0.0,38.279494,64.874953,43.56525,81.875,190.5,260.0,200.66,0,1,19


In [45]:
B_SIG_STR_pct = []
B_TOTAL_STR_pct = []
B_TD_pct = []
B_KD_TOTAL = []
B_BODY_pct = []
B_CLINCH_pct = []
B_DISTANCE_pct = []
B_GROUND_pct = []
B_HEAD_pct = []
B_LEG_pct = []
B_TOTAL_REV = []
B_TOTAL_PASS = []
B_SUB = []
B_Height = []
B_Weight = []
B_Reach = []
B_Stance = []
B_wins = []
#B_win_by_could_not_continue = []
#B_win_by_DQ = []
#B_win_by_decision = []
#B_win_by_decision_split = []
#B_win_by_decision_unanimous = []
#B_win_by_KO = []
#B_win_by_Other = []
#B_win_by_overturned = []
#B_win_by_submission = []
#B_win_by_TKO_docstop = []

for i in range(len(df_fix)):
    B_SIG_STR_pct.append(df_final['SIG_STR_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_TOTAL_STR_pct.append(df_final['TOTAL_STR_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_TD_pct.append(df_final['TD_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_KD_TOTAL.append(df_final['KD_TOTAL'].loc[df_fix['A_fighter'].iloc[i]])
    B_SUB.append(df_final['TOTAL_SUB'].loc[df_fix['A_fighter'].iloc[i]])
    B_BODY_pct.append(df_final['BODY_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_CLINCH_pct.append(df_final['CLINCH_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_DISTANCE_pct.append(df_final['DISTANCE_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_GROUND_pct.append(df_final['GROUND_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_HEAD_pct.append(df_final['HEAD_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_LEG_pct.append(df_final['LEG_pct'].loc[df_fix['A_fighter'].iloc[i]])
    B_TOTAL_REV.append(df_final['TOTAL_REV'].loc[df_fix['A_fighter'].iloc[i]])
    B_TOTAL_PASS.append(df_final['TOTAL_PASS'].loc[df_fix['A_fighter'].iloc[i]])
    B_Height.append(df_final['Height'].loc[df_fix['A_fighter'].iloc[i]])
    B_Weight.append(df_final['Weight'].loc[df_fix['A_fighter'].iloc[i]])
    B_Reach.append(df_final['Reach'].loc[df_fix['A_fighter'].iloc[i]])
    B_Stance.append(df_final['Stance'].loc[df_fix['A_fighter'].iloc[i]])
    B_wins.append(df_final['Wins'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_could_not_continue.append(df_final['win_by_could_not_continue'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_DQ.append(df_final['win_by_could_not_continue'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_decision.append(df_final['win_by_decision'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_decision_split.append(df_final['win_by_decision_split'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_decision_unanimous.append(df_final['win_by_decision_unanimous'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_KO.append(df_final['win_by_KO'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_Other.append(df_final['win_by_Other'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_overturned.append(df_final['win_by_overturned'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_submission.append(df_final['win_by_submission'].loc[df_fix['A_fighter'].iloc[i]])
    #B_win_by_TKO_docstop.append(df_final['win_by_TKO_docstop'].loc[df_fix['A_fighter'].iloc[i]])
    
    
df_fix.drop('B_SIG_STR._att', axis = 1, inplace=True)
df_fix.drop('B_TOTAL_STR._att', axis = 1, inplace=True)
df_fix.drop('B_TD_att', axis = 1, inplace=True)
df_fix.drop('B_KD', axis = 1, inplace=True)
df_fix.drop('B_PASS', axis=1, inplace=True)
df_fix.drop('B_REV', axis=1, inplace=True)
df_fix.drop('B_HEAD_att', axis = 1, inplace=True)
df_fix.drop('B_BODY_att', axis = 1, inplace=True)
df_fix.drop('B_LEG_att', axis = 1, inplace=True)
df_fix.drop('B_DISTANCE_att', axis = 1, inplace=True)
df_fix.drop('B_CLINCH_att', axis = 1, inplace=True)
df_fix.drop('B_GROUND_att', axis = 1, inplace=True)
df_fix.drop('B_Height', axis = 1, inplace=True)
df_fix.drop('B_Weight', axis = 1, inplace=True)
df_fix.drop('B_Reach', axis = 1, inplace=True)
df_fix.drop('B_Stance', axis = 1, inplace=True)
df_fix.drop('B_SUB._ATT', axis=1, inplace=True)




df_fix['B_SIG_STR._att'] = B_SIG_STR_pct
df_fix['B_TOTAL_STR_pct'] = B_TOTAL_STR_pct
df_fix['B_TD_pct'] = B_TD_pct
df_fix['B_SUB'] = B_SUB
df_fix['B_KD_TOTAL'] = B_KD_TOTAL
df_fix['B_TOTAL_REV'] = B_TOTAL_REV
df_fix['B_TOTAL_PASS'] = B_TOTAL_PASS
df_fix['B_CLINCH_pct'] = B_CLINCH_pct
df_fix['B_GROUND_pct'] = B_GROUND_pct
df_fix['B_DISTANCE_pct'] = B_DISTANCE_pct
df_fix['B_BODY_pct'] = B_BODY_pct
df_fix['B_HEAD_pct'] = B_HEAD_pct
df_fix['B_LEG_pct'] = B_LEG_pct
df_fix['B_Height'] = B_Height
df_fix['B_Weight'] = B_Weight
df_fix['B_Reach'] = B_Reach
df_fix['B_Stance'] = B_Stance
df_fix['B_Wins'] = B_wins

#df_fix['B_win_by_could_not_countinue'] = B_win_by_could_not_continue
#df_fix['B_win_by_DQ'] = B_win_by_DQ
#df_fix['B_win_by_decision'] = B_win_by_decision
#df_fix['B_win_by_decision_split'] = B_win_by_decision_split
#df_fix['B_win_by_decision_unanimous'] = B_win_by_decision_unanimous
#df_fix['B_win_by_KO'] = B_win_by_KO
#df_fix['B_win_by_Other'] = B_win_by_Other
#df_fix['B_win_by_overturned'] = B_win_by_overturned
#df_fix['B_win_by_submission'] = B_win_by_submission
#df_fix['B_win_by_TKO_docstop'] = B_win_by_TKO_docstop


df_fix.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5477 entries, 0 to 5476
Data columns (total 41 columns):
V_fighter          5477 non-null object
A_fighter          5477 non-null object
A_age              5477 non-null float64
V_age              5477 non-null float64
Winner_num         5477 non-null int64
R_SIG_STR._att     5477 non-null float64
R_TOTAL_STR_pct    5477 non-null float64
R_TD_pct           5477 non-null float64
R_KD_TOTAL         5477 non-null int64
R_TOTAL_REV        5477 non-null int64
R_TOTAL_PASS       5477 non-null int64
R_CLINCH_pct       5477 non-null float64
R_GROUND_pct       5477 non-null float64
R_DISTANCE_pct     5477 non-null float64
R_BODY_pct         5477 non-null float64
R_HEAD_pct         5477 non-null float64
R_LEG_pct          5477 non-null float64
R_Height           5477 non-null float64
R_Weight           5477 non-null float64
R_Reach            5477 non-null float64
R_SUB              5477 non-null int64
R_Stance           5477 non-null int64
R_Win

In [46]:
from sklearn.preprocessing import StandardScaler
from sklearn.preprocessing import LabelEncoder
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import precision_score
from sklearn.metrics import recall_score
from sklearn.metrics import roc_auc_score
from sklearn.model_selection import KFold
from sklearn.model_selection import cross_val_score
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import GridSearchCV
from xgboost import XGBClassifier
from sklearn.ensemble import AdaBoostClassifier

import tensorflow as tf 


import warnings
warnings.filterwarnings("ignore")

In [47]:
df = df_fix.copy()
#df.drop(df.filter(regex='win_by'), axis=1, inplace=True)
df.drop(['V_fighter', 'A_fighter'],axis=1, inplace=True)

In [48]:
X = df.drop(['Winner_num'], axis=1)
Y = df['Winner_num']
X.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 5477 entries, 0 to 5476
Data columns (total 38 columns):
A_age              5477 non-null float64
V_age              5477 non-null float64
R_SIG_STR._att     5477 non-null float64
R_TOTAL_STR_pct    5477 non-null float64
R_TD_pct           5477 non-null float64
R_KD_TOTAL         5477 non-null int64
R_TOTAL_REV        5477 non-null int64
R_TOTAL_PASS       5477 non-null int64
R_CLINCH_pct       5477 non-null float64
R_GROUND_pct       5477 non-null float64
R_DISTANCE_pct     5477 non-null float64
R_BODY_pct         5477 non-null float64
R_HEAD_pct         5477 non-null float64
R_LEG_pct          5477 non-null float64
R_Height           5477 non-null float64
R_Weight           5477 non-null float64
R_Reach            5477 non-null float64
R_SUB              5477 non-null int64
R_Stance           5477 non-null int64
R_Wins             5477 non-null int64
B_SIG_STR._att     5477 non-null float64
B_TOTAL_STR_pct    5477 non-null float64
B_T

In [59]:
X_train,X_test,y_train,y_test = train_test_split(X,Y, test_size = 0.3, random_state = 40)

In [60]:
X_train = tf.keras.utils.normalize(X_train,axis=1,order=2)
X_test = tf.keras.utils.normalize(X_test,axis=1,order=2)

In [64]:
model = tf.keras.models.Sequential()
model.add(tf.keras.layers.Flatten())
model.add(tf.keras.layers.Dense(250,activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(300,activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(250,activation=tf.nn.relu))
model.add(tf.keras.layers.Dense(3,activation=tf.nn.softmax))

model.compile(optimizer='adam',
             loss='sparse_categorical_crossentropy',
             metrics=['accuracy'])

model.fit(X_train.values, y_train.values, validation_data=(X_test.values,y_test.values), epochs=400)

Train on 3833 samples, validate on 1644 samples
Epoch 1/400
Epoch 2/400
Epoch 3/400
Epoch 4/400
Epoch 5/400
Epoch 6/400
Epoch 7/400
Epoch 8/400
Epoch 9/400
Epoch 10/400
Epoch 11/400
Epoch 12/400
Epoch 13/400
Epoch 14/400
Epoch 15/400
Epoch 16/400
Epoch 17/400
Epoch 18/400
Epoch 19/400
Epoch 20/400
Epoch 21/400
Epoch 22/400
Epoch 23/400
Epoch 24/400
Epoch 25/400
Epoch 26/400
Epoch 27/400
Epoch 28/400
Epoch 29/400
Epoch 30/400
Epoch 31/400
Epoch 32/400
Epoch 33/400
Epoch 34/400
Epoch 35/400
Epoch 36/400
Epoch 37/400
Epoch 38/400
Epoch 39/400
Epoch 40/400
Epoch 41/400
Epoch 42/400
Epoch 43/400
Epoch 44/400
Epoch 45/400
Epoch 46/400
Epoch 47/400
Epoch 48/400
Epoch 49/400
Epoch 50/400
Epoch 51/400
Epoch 52/400
Epoch 53/400
Epoch 54/400
Epoch 55/400


Epoch 56/400
Epoch 57/400
Epoch 58/400
Epoch 59/400
Epoch 60/400
Epoch 61/400
Epoch 62/400
Epoch 63/400
Epoch 64/400
Epoch 65/400
Epoch 66/400
Epoch 67/400
Epoch 68/400
Epoch 69/400
Epoch 70/400
Epoch 71/400
Epoch 72/400
Epoch 73/400
Epoch 74/400
Epoch 75/400
Epoch 76/400
Epoch 77/400
Epoch 78/400
Epoch 79/400
Epoch 80/400
Epoch 81/400
Epoch 82/400
Epoch 83/400
Epoch 84/400
Epoch 85/400
Epoch 86/400
Epoch 87/400
Epoch 88/400
Epoch 89/400
Epoch 90/400
Epoch 91/400
Epoch 92/400
Epoch 93/400
Epoch 94/400
Epoch 95/400
Epoch 96/400
Epoch 97/400
Epoch 98/400
Epoch 99/400
Epoch 100/400
Epoch 101/400
Epoch 102/400
Epoch 103/400
Epoch 104/400
Epoch 105/400
Epoch 106/400
Epoch 107/400
Epoch 108/400
Epoch 109/400


Epoch 110/400
Epoch 111/400
Epoch 112/400
Epoch 113/400
Epoch 114/400
Epoch 115/400
Epoch 116/400
Epoch 117/400
Epoch 118/400
Epoch 119/400
Epoch 120/400
Epoch 121/400
Epoch 122/400
Epoch 123/400
Epoch 124/400
Epoch 125/400
Epoch 126/400
Epoch 127/400
Epoch 128/400
Epoch 129/400
Epoch 130/400
Epoch 131/400
Epoch 132/400
Epoch 133/400
Epoch 134/400
Epoch 135/400
Epoch 136/400
Epoch 137/400
Epoch 138/400
Epoch 139/400
Epoch 140/400
Epoch 141/400
Epoch 142/400
Epoch 143/400
Epoch 144/400
Epoch 145/400
Epoch 146/400
Epoch 147/400
Epoch 148/400
Epoch 149/400
Epoch 150/400
Epoch 151/400
Epoch 152/400
Epoch 153/400
Epoch 154/400
Epoch 155/400
Epoch 156/400
Epoch 157/400
Epoch 158/400
Epoch 159/400
Epoch 160/400
Epoch 161/400
Epoch 162/400
Epoch 163/400


Epoch 164/400
Epoch 165/400
Epoch 166/400
Epoch 167/400
Epoch 168/400
Epoch 169/400
Epoch 170/400
Epoch 171/400
Epoch 172/400
Epoch 173/400
Epoch 174/400
Epoch 175/400
Epoch 176/400
Epoch 177/400
Epoch 178/400
Epoch 179/400
Epoch 180/400
Epoch 181/400
Epoch 182/400
Epoch 183/400
Epoch 184/400
Epoch 185/400
Epoch 186/400
Epoch 187/400
Epoch 188/400
Epoch 189/400
Epoch 190/400
Epoch 191/400
Epoch 192/400
Epoch 193/400
Epoch 194/400
Epoch 195/400
Epoch 196/400
Epoch 197/400
Epoch 198/400
Epoch 199/400
Epoch 200/400
Epoch 201/400
Epoch 202/400
Epoch 203/400
Epoch 204/400
Epoch 205/400
Epoch 206/400
Epoch 207/400
Epoch 208/400
Epoch 209/400
Epoch 210/400
Epoch 211/400
Epoch 212/400
Epoch 213/400
Epoch 214/400
Epoch 215/400
Epoch 216/400
Epoch 217/400


Epoch 218/400
Epoch 219/400
Epoch 220/400
Epoch 221/400
Epoch 222/400
Epoch 223/400
Epoch 224/400
Epoch 225/400
Epoch 226/400
Epoch 227/400
Epoch 228/400
Epoch 229/400
Epoch 230/400
Epoch 231/400
Epoch 232/400
Epoch 233/400
Epoch 234/400
Epoch 235/400
Epoch 236/400
  32/3833 [..............................] - ETA: 0s - loss: 0.3724 - accuracy: 0.8438

KeyboardInterrupt: 

array([1, 0, 1, ..., 1, 1, 1], dtype=int64)

In [None]:
f0=df_final.loc["Khabib Nurmagomedov"]
f1=df_final.loc['Tony Ferguson']
fighter_0=f0.to_dict()
fighter_1=f1.to_dict()

fighter_0['V_age']=fighter_0.pop('Age')
fighter_0['R_SIG_STR._att'] = fighter_0.pop('SIG_STR_pct')
fighter_0['R_TOTAL_STR_pct'] = fighter_0.pop('TOTAL_STR_pct')
fighter_0['R_TD_pct'] = fighter_0.pop('TD_pct')
fighter_0['R_KD_TOTAL'] = fighter_0.pop('KD_TOTAL')
fighter_0['R_TOTAL_REV'] = fighter_0.pop('TOTAL_REV')
fighter_0['R_TOTAL_PASS'] =fighter_0.pop('TOTAL_PASS')
fighter_0['R_CLINCH_pct'] = fighter_0.pop('CLINCH_pct')
fighter_0['R_GROUND_pct'] = fighter_0.pop('GROUND_pct')
fighter_0['R_DISTANCE_pct'] = fighter_0.pop('DISTANCE_pct')
fighter_0['R_BODY_pct'] = fighter_0.pop('BODY_pct')
fighter_0['R_HEAD_pct'] = fighter_0.pop('HEAD_pct')
fighter_0['R_LEG_pct'] = fighter_0.pop('LEG_pct')
fighter_0['R_Height'] = fighter_0.pop('Height')
fighter_0['R_Weight'] = fighter_0.pop('Weight')
fighter_0['R_Reach'] = fighter_0.pop('Reach')
fighter_0['R_SUB'] = fighter_0.pop('TOTAL_SUB')
fighter_0['R_Stance'] = fighter_0.pop('Stance')
fighter_0['R_Wins'] = fighter_0.pop('Wins')
#fighter_0['R_win_by_Could_Not_Countinue'] = fighter_0.pop('win_by_could_not_continue')
#fighter_0['R_win_by_DQ'] = fighter_0.pop('win_by_DQ')
#fighter_0['R_win_by_Decision-Majority'] = fighter_0.pop("win_by_decision")
#fighter_0['R_win_by_Decision-Split'] = fighter_0.pop("win_by_decision_split")
#fighter_0['R_win_by_Decision-Unanimous'] = fighter_0.pop("win_by_decision_unanimous")
#fighter_0['R_win_by_KO/TKO'] = fighter_0.pop("win_by_KO")
#fighter_0['R_win_by_Other'] = fighter_0.pop("win_by_Other")
#fighter_0['R_win_by_Overturned'] = fighter_0.pop("win_by_overturned")
#fighter_0['R_win_by_Submission'] = fighter_0.pop("win_by_submission")
#fighter_0["R_win_by_TKO - Doctor's Stoppage"] = fighter_0.pop("win_by_TKO_docstop")

fighter_1['A_age']=fighter_1.pop('Age')
fighter_1['B_SIG_STR._att'] = fighter_1.pop('SIG_STR_pct')
fighter_1['B_TOTAL_STR_pct'] = fighter_1.pop('TOTAL_STR_pct')
fighter_1['B_TD_pct'] = fighter_1.pop('TD_pct')
fighter_1['B_KD_TOTAL'] = fighter_1.pop('KD_TOTAL')
fighter_1['B_TOTAL_REV'] = fighter_1.pop('TOTAL_REV')
fighter_1['B_TOTAL_PASS'] =fighter_1.pop('TOTAL_PASS')
fighter_1['B_CLINCH_pct'] = fighter_1.pop('CLINCH_pct')
fighter_1['B_GROUND_pct'] = fighter_1.pop('GROUND_pct')
fighter_1['B_DISTANCE_pct'] = fighter_1.pop('DISTANCE_pct')
fighter_1['B_BODY_pct'] = fighter_1.pop('BODY_pct')
fighter_1['B_HEAD_pct'] = fighter_1.pop('HEAD_pct')
fighter_1['B_LEG_pct'] = fighter_1.pop('LEG_pct')
fighter_1['B_Height'] = fighter_1.pop('Height')
fighter_1['B_Weight'] = fighter_1.pop('Weight')
fighter_1['B_Reach'] = fighter_1.pop('Reach')
fighter_1['B_SUB'] = fighter_1.pop('TOTAL_SUB')
fighter_1['B_Stance'] = fighter_1.pop('Stance')
fighter_1['B_Wins'] = fighter_1.pop('Wins')
#fighter_1['B_win_by_could_not_countinue'] = fighter_1.pop('win_by_could_not_continue')
#fighter_1['B_win_by_DQ'] = fighter_1.pop('win_by_DQ')
#fighter_1['B_win_by_decision'] = fighter_1.pop("win_by_decision")
#fighter_1['B_win_by_decision_split'] = fighter_1.pop("win_by_decision_split")
#fighter_1['B_win_by_decision_unanimous'] = fighter_1.pop("win_by_decision_unanimous")
#fighter_1['B_win_by_KO'] = fighter_1.pop("win_by_KO")
#fighter_1['B_win_by_Other'] = fighter_1.pop("win_by_Other")
#fighter_1['B_win_by_overturned'] = fighter_1.pop("win_by_overturned")
#fighter_1['B_win_by_submission'] = fighter_1.pop("win_by_submission")
#fighter_1["B_win_by_TKO_docstop"] = fighter_1.pop("win_by_TKO_docstop")


In [None]:
def merge_dict(dict1, dict2):
    dict2.update(dict1)
    return dict2
merge_dict(fighter_0,fighter_1)
fighter_test = pd.DataFrame(fighter_1, index=[0])



fighter_test = fighter_test.reindex(columns=['A_age',
'V_age',
'R_SIG_STR._att',
'R_TOTAL_STR_pct',
'R_TD_pct',  
'R_KD_TOTAL',
'R_TOTAL_REV', 
'R_TOTAL_PASS',
'R_CLINCH_pct',
'R_GROUND_pct',
'R_DISTANCE_pct',
'R_BODY_pct',
'R_HEAD_pct',
'R_LEG_pct',
'R_Height',
'R_Weight',
'R_Reach',
'R_SUB',
'R_Stance',
'R_Wins',
'B_SIG_STR._att',
'B_TOTAL_STR_pct',
'B_TD_pct',
'B_SUB',
'B_KD_TOTAL',
'B_TOTAL_REV',
'B_TOTAL_PASS',
'B_CLINCH_pct',
'B_GROUND_pct',
'B_DISTANCE_pct',
'B_BODY_pct',
'B_HEAD_pct',
'B_LEG_pct',
'B_Height',
'B_Weight',
'B_Reach',
'B_Stance',
'B_Wins'])

#'R_win_by_Could_Not_Countinue',
#'R_win_by_DQ',
#'R_win_by_Decision-Majority',
#'R_win_by_Decision-Split',
#'R_win_by_Decision-Unanimous',
#'R_win_by_KO/TKO', 
#'R_win_by_Other',
#'R_win_by_Overturned',
#'R_win_by_Submission', 
#"R_win_by_TKO - Doctor's Stoppage",
#'B_win_by_could_not_countinue',
#'B_win_by_DQ',
#'B_win_by_decision',
#'B_win_by_decision_split',
#'B_win_by_decision_unanimous',
#'B_win_by_KO',
#'B_win_by_Other',
#'B_win_by_overturned',
#'B_win_by_submission',
#'B_win_by_TKO_docstop'

fighter_test

In [None]:
pred_test = model.predict_proba(fighter_test)
pred_test