In [4]:
#Import libraries for loading dataset from csv file
import pandas as pd
import sqlite3

#Import libraries for data analysis and visualisation.
import numpy as np
import matplotlib.pyplot as plt
import scipy.stats as stats
import datetime

#Changes pyplot style to Seaborn for better visualisations
plt.style.use('seaborn-v0_8')



In [5]:
#Loads data from csv file to Pandas dataframe
ufc_events = pd.read_csv('/kaggle/input/mma-dataset-2023-ufc/ufc_event_data.csv', parse_dates = ['event_date'])
ufc_fights = pd.read_csv('/kaggle/input/mma-dataset-2023-ufc/ufc_fight_data.csv')
ufc_fighters = pd.read_csv('/kaggle/input/mma-dataset-2023-ufc/ufc_fighter_data.csv', parse_dates = ['fighter_dob'])
ufc_fight_stats = pd.read_csv('/kaggle/input/mma-dataset-2023-ufc/ufc_fight_stat_data.csv')
pd.options.display.max_columns = None

In [None]:
ufc_fighters.head()

In [6]:
events_date = ufc_events[['event_id','event_date']]
fighters_data = ufc_fighters[['fighter_id','fighter_height_cm','fighter_weight_lbs','fighter_reach_cm','fighter_stance']]
fighters_data = fighters_data.rename(columns = {'fighter_id':'f_1'})

fights_data = ufc_fights.copy()

In [7]:
#join to event_date
fights_data = pd.merge(fights_data, events_date, how='left', on='event_id')

#join to fighters_data
fights_data = pd.merge(fights_data, fighters_data, how='left', on='f_1')
fighters_data = fighters_data.rename(columns = {'f_1':'f_2'})
fights_data = pd.merge(fights_data, fighters_data, how='left', on='f_2')

In [8]:
fights_data = fights_data.rename(columns = {'fighter_height_cm_x':'height_1','fighter_weight_lbs_x':'weight_1','fighter_reach_cm_x':'reach_1','fighter_stance_x':'stance_1'})
fights_data = fights_data.rename(columns = {'fighter_height_cm_y':'height_2','fighter_weight_lbs_y':'weight_2','fighter_reach_cm_y':'reach_2','fighter_stance_y':'stance_2'})

In [9]:
#add date from ufc_events to ufc_fights
fights_data = fights_data.drop(['referee','fight_url','result_details','event_id'], axis=1)

In [None]:
fights_data.head()

In [11]:
#drop null f_1 and f_2
fights_data.dropna(subset=['f_1'], inplace=True)
fights_data.dropna(subset=['f_2'], inplace=True)

In [None]:
print(len(fights_data))

In [12]:
#verify if there is draws in our dataset
fights_data.loc[(fights_data['winner'] != fights_data['f_1']) & (fights_data['winner'] != fights_data['f_2'])]

Unnamed: 0,fight_id,f_1,f_2,winner,num_rounds,title_fight,weight_class,gender,result,finish_round,finish_time,event_date,height_1,weight_1,reach_1,stance_1,height_2,weight_2,reach_2,stance_2


In [13]:
from dateutil import relativedelta
import pandas as pd

def fighter_age(f_id, date):
    f_infos = ufc_fighters.loc[ufc_fighters['fighter_id'] == f_id]
    event_date = pd.to_datetime(date, format='%Y-%m-%d')
    fighter_dob = pd.to_datetime(f_infos['fighter_dob'].values[0], format='%Y-%m-%d')
    age = relativedelta.relativedelta(event_date, fighter_dob)
    return age.years

test_age = fighter_age(fights_data.loc[1]['f_1'], fights_data.loc[1]['event_date'])
print(test_age)

28


In [14]:
#compute age for each fighter
fights_data['age_1'] = ''
fights_data['age_2'] = ''
for i, r in fights_data.iterrows():
    try:
        fights_data.loc[i, 'age_1'] = fighter_age(r['f_1'], r['event_date'])
        fights_data.loc[i, 'age_2'] = fighter_age(r['f_2'], r['event_date'])
    except AssertionError:
        exit(1)

In [None]:
fights_data.loc[0,'f_1']

In [16]:
def fighter_xp(id, date):
    f_infos = ufc_fighters.loc[ufc_fighters['fighter_id'] == id]
    f_fights = fights_data.loc[(fights_data['f_1'] == id) | (fights_data['f_2'] == id)]
    wins = 0
    loses = 0
    draws = 0
    for i, r in f_fights.iterrows():
        if pd.to_datetime(r['event_date'], format='%Y-%m-%d') < pd.to_datetime(date, format='%Y-%m-%d'):
            if r['winner'] == id:
                wins += 1 
            else:
                loses += 1
    total_fights = wins + loses
    wins_ratio = wins / total_fights if total_fights > 0 else 0
    loses_ratio = loses / total_fights if total_fights > 0 else 0
    fighter_xp = pd.DataFrame(data={'wins': [wins], 'loses': [loses], 'total': [total_fights]})
    return fighter_xp

In [17]:
#compute xp for each fighters
fights_data['total_fights_1'] = 0
fights_data['total_fights_2'] = 0

fights_data['wins_1'] = 0
fights_data['wins_2'] = 0

fights_data['loses_1'] = 0
fights_data['loses_2'] = 0

for i, r in fights_data.iterrows():
    try:
        fighter_xp_1 = fighter_xp(r['f_1'], r['event_date'])
        fighter_xp_2 = fighter_xp(r['f_2'], r['event_date'])
        
        fights_data.loc[i, 'total_fights_1'] = fighter_xp_1['total'].values[0]
        fights_data.loc[i, 'total_fights_2'] = fighter_xp_2['total'].values[0]
        
        fights_data.loc[i, 'wins_1'] = fighter_xp_1['wins'].values[0]
        fights_data.loc[i, 'wins_2'] = fighter_xp_2['wins'].values[0]
        
        fights_data.loc[i, 'loses_1'] = fighter_xp_1['loses'].values[0]
        fights_data.loc[i, 'loses_2'] = fighter_xp_2['loses'].values[0]
        
    except AssertionError:
        exit(1)

In [18]:
fights_data.loc[((fights_data.f_1 == 2884) | (fights_data.f_2 == 2884)) & (fights_data.winner == 2884)]

Unnamed: 0,fight_id,f_1,f_2,winner,num_rounds,title_fight,weight_class,gender,result,finish_round,finish_time,event_date,height_1,weight_1,reach_1,stance_1,height_2,weight_2,reach_2,stance_2,age_1,age_2,total_fights_1,total_fights_2,wins_1,wins_2,loses_1,loses_2
0,7218,2976.0,2884.0,2884.0,5,F,Lightweight,M,KO/TKO,2,2:03,2023-09-23,172.72,155.0,180.34,Switch,177.8,155.0,177.8,Southpaw,30,32,8,7,6,5,2,2
325,6893,2884.0,332.0,2884.0,3,F,Lightweight,M,Decision,3,5:00,2023-03-04,177.8,155.0,177.8,Southpaw,190.5,155.0,195.58,Southpaw,32,27,6,8,4,6,2,2
655,6563,350.0,2884.0,2884.0,5,F,Lightweight,M,Decision,5,5:00,2022-06-25,170.18,155.0,182.88,Orthodox,177.8,155.0,177.8,Southpaw,25,31,6,4,5,3,1,1
915,6303,3007.0,2884.0,2884.0,3,F,Lightweight,M,KO/TKO,2,3:26,2021-12-18,175.26,155.0,187.96,Orthodox,177.8,155.0,177.8,Southpaw,36,31,12,3,8,2,4,1
1138,6080,542.0,2884.0,2884.0,3,F,Lightweight,M,Submission,1,1:05,2021-07-17,175.26,145.0,180.34,Orthodox,177.8,155.0,177.8,Southpaw,35,30,32,2,15,1,17,1
1278,5940,2522.0,2884.0,2884.0,3,F,Lightweight,M,KO/TKO,2,1:22,2021-04-10,175.26,155.0,175.26,Orthodox,177.8,155.0,177.8,Southpaw,37,30,11,1,7,0,4,1


In [None]:
def fighter_name(id):
    return ufc_fighters.loc[ufc_fighters.fighter_id == id].fighter_f_name + ' ' + ufc_fighters.loc[ufc_fighters.fighter_id == id].fighter_l_name