## Run get_ufc_data()  <font color=red>ONLY WHEN</font> downloading all fighters' data from UFC
### A single run takes around 10-15 mins

In [171]:
'''
1. Fetches all events that have happened since UFC's inception
2. Iterates through all fights in every event, gets fighter's statistics for every fight
3. Aggregate and dedup fighter statistics. Duplicates happen because a fighter is present in multiple fights.
   Keep latest statistics while deduping.
'''

def get_ufc_data():
    
    # List of statistics to pull from response JSON
    fighter1_stats = ['fighter1_first_name', 'fighter1_last_name', 'fighter1_nickname', \
                      'fighter1_wins', 'fighter1_losses', 'fighter1_draws', 'fighter1_averagefighttime', \
                      'fighter1_averagefighttime_seconds', 'fighter1_kdaverage', 'fighter1_slpm','fighter1_strikingaccuracy',\
                      'fighter1_sapm','fighter1_strikingdefense','fighter1_takedownaverage','fighter1_takedownaccuracy',\
                      'fighter1_takedowndefense','fighter1_submissionsaverage', 'fighter1height', 'fighter1weight', 'fighter1reach']
    fighter2_stats = ['fighter2_first_name', 'fighter2_last_name', 'fighter2_nickname', \
                      'fighter2_wins', 'fighter2_losses', 'fighter2_draws', 'fighter2_averagefighttime', \
                      'fighter2_averagefighttime_seconds', 'fighter2_kdaverage', 'fighter2_slpm','fighter2_strikingaccuracy',\
                      'fighter2_sapm','fighter2_strikingdefense','fighter2_takedownaverage','fighter2_takedownaccuracy',\
                      'fighter2_takedowndefense','fighter2_submissionsaverage','fighter2height', 'fighter2weight', 'fighter2reach']
    
    # Get UFC event data
    res = requests.get(UFC_ALL_EVENTS_API)
    all_events = json.loads(res.content)
    all_events_df = pd.read_json(json.dumps(all_events))
    df = pd.DataFrame()
    for event_id in all_events_df['id']:
        res = requests.get(UFC_ALL_EVENTS_API + '/' + (str)(event_id) + FIGHTS_API_SUFFIX)
        data = json.loads(res.content)
        df = df.append(pd.read_json(json.dumps(data)))
    
    # Rename fighter columns from fighter1_<stats> and fighter2_<stats> to fighter_<stats>
    # Then, we can aggregate all fighters statistics in same DataFrame
    fighter1_stats_df = df.loc[:,fighter1_stats]
    fighter1_stats_df = fighter1_stats_df.rename(columns={'fighter1_first_name':'fighter_first_name', \
                                                          'fighter1_last_name':'fighter_last_name',\
                                                          'fighter1_nickname':'fighter_nickname', \
                                                          'fighter1_wins':'fighter_wins', \
                                                          'fighter1_losses':'fighter_losses', \
                                                          'fighter1_draws':'fighter_draws',\
                                                          'fighter1_averagefighttime':'fighter_averagefighttime', \
                                                          'fighter1_averagefighttime_seconds':'fighter_averagefighttime_seconds',\
                                                          'fighter1_kdaverage':'fighter_kdaverage', \
                                                          'fighter1_slpm':'fighter_slpm',\
                                                          'fighter1_strikingaccuracy':'fighter_strikingaccuracy',\
                                                          'fighter1_sapm':'fighter_sapm',\
                                                          'fighter1_strikingdefense':'fighter_strikingdefense',\
                                                          'fighter1_takedownaverage':'fighter_takedownaverage',\
                                                          'fighter1_takedownaccuracy':'fighter_takedownaccuracy',\
                                                          'fighter1_takedowndefense':'fighter_takedowndefense',\
                                                          'fighter1_submissionsaverage':'fighter_submissionsaverage',\
                                                          'fighter1height':'fighter_height', \
                                                          'fighter1weight':'fighter_weight', \
                                                          'fighter1reach' :'fighter_reach'})

    fighter2_stats_df = df.loc[:, fighter2_stats]
    fighter2_stats_df = fighter2_stats_df.rename(columns={'fighter2_first_name':'fighter_first_name', \
                                                          'fighter2_last_name':'fighter_last_name',\
                                                          'fighter2_nickname':'fighter_nickname', \
                                                          'fighter2_wins':'fighter_wins', \
                                                          'fighter2_losses':'fighter_losses', \
                                                          'fighter2_draws':'fighter_draws',\
                                                          'fighter2_averagefighttime':'fighter_averagefighttime', \
                                                          'fighter2_averagefighttime_seconds':'fighter_averagefighttime_seconds',\
                                                          'fighter2_kdaverage':'fighter_kdaverage', \
                                                          'fighter2_slpm':'fighter_slpm',\
                                                          'fighter2_strikingaccuracy':'fighter_strikingaccuracy',\
                                                          'fighter2_sapm':'fighter_sapm',\
                                                          'fighter2_strikingdefense':'fighter_strikingdefense',\
                                                          'fighter2_takedownaverage':'fighter_takedownaverage',\
                                                          'fighter2_takedownaccuracy':'fighter_takedownaccuracy',\
                                                          'fighter2_takedowndefense':'fighter_takedowndefense',\
                                                          'fighter2_submissionsaverage':'fighter_submissionsaverage',\
                                                          'fighter2height':'fighter_height', \
                                                          'fighter2weight':'fighter_weight', \
                                                          'fighter2reach' :'fighter_reach'})

    fighter_stats_df = pd.concat([fighter1_stats_df, fighter2_stats_df]).reset_index(drop=True)
    
    # Remove duplicates and keep latest statistics
    fighter_stats_df_dedup = fighter_stats_df.drop_duplicates(['fighter_first_name', 'fighter_last_name', 'fighter_nickname'], keep='last')
    
    # Add win% and total fights to DataFrame
    total_fights = fighter_stats_df_dedup['fighter_wins'] + \
                   fighter_stats_df_dedup['fighter_losses'] +\
                   fighter_stats_df_dedup['fighter_draws']
    fighter_stats_df_dedup['total_fights'] = total_fights        
    fighter_stats_df_dedup['win %'] = (fighter_stats_df_dedup['fighter_wins']) / total_fights
    
    # Write to file
    fighter_stats_df_dedup.to_csv('fighters.csv',index=False)



## All methods defined below.

In [1]:
import requests
import pandas as pd
import json
import numpy as np
import requests
import bs4
import matplotlib.pyplot as plt
from IPython.display import display, Image


UFC_ALL_EVENTS_API = 'http://ufc-data-api.ufc.com/api/v3/us/events'
UFC_RECENT_EVENTS_API = 'http://ufc-data-api.ufc.com/api/v1/us/events'
FIGHTS_API_SUFFIX = '/fights'
MENU_HEADER = '===============MENU================='
OPTION_1 = '1. Display fighter stats by name'
OPTION_2 = '2. Display top N fighters per weight class'
OPTION_3 = '3. Upcoming fight predictions'
OPTION_QUIT = 'q. Quit'
WEIGHT_CLASSES = ['105 lbs.', '115 lbs.', '125 lbs.',
                  '135 lbs.', '145 lbs.', '155 lbs.', '170 lbs.',
                  '185 lbs.', '205 lbs.', '265 lbs.']

# ANSI Esscape seq for formatting console output
BOLD = '\033[1m'
UNDERLINE = '\033[4m'
END = '\033[0m'

# minimum number of fights per fighter to be eligible for top n ranking
MINIMUM_FIGHTS_TO_HAVE = 10

In [2]:
'''
Merge the data scraped from FightMetric and UFC.
FightMetric has certain attributes that UFC doesn't e.g. stance
UFC has more feature rich data e.g. takedowns attempted, takedowns defended, strike landed per minute, strikes absorbed etc.
'''
def join_ufc_fm_dataframes():
    try:
        # Load Fightmetric data
        fighter_stats_fm = pd.read_excel('./fighter_stats.xlsx')
        
        # Load UFC data
        fighter_stats_ufc = pd.read_csv('./fighters.csv', encoding='ISO-8859-1')
        
        # merge dataframes and create one master dataframe
        all_df = pd.merge(fighter_stats_fm, fighter_stats_ufc, how='left', left_on = ['First', 'Last'], right_on = ['fighter_first_name', 'fighter_last_name'])
        all_df.rename(columns={'Ht.': 'height', 'Wt.': 'weight'}, inplace=True)
        return all_df
    except FileNotFoundError :
        print ('Please check if fighter_stats.xlsx and fighter.csv are present in curr dir')
        
# Argument fighter_df is the master dataframe created by merging UFC and fightmetric dataframes
def display_menu(fighter_df):
    print(MENU_HEADER + '\n' + \
         OPTION_1 + '\n' + \
         OPTION_2 + '\n' + \
         OPTION_3 + '\n' + \
         OPTION_QUIT)
    user_input = (str)(input('Please select an option:'))
    if user_input.lower() == 'q':
        return False
    elif user_input == '1':
        return display_fighter_stats_by_name(fighter_df)
    elif user_input == '2':
        return display_top_n_fighters_per_weight_class(fighter_df)
    elif user_input == '3':
        return display_upcoming_event_predictions()
        

        
'''
Gets fighter stats by first and last name. Case insensitive
'''
def get_fighter_stats(all_df, first, last):
    # filter by first name
    one = all_df[all_df.First.str.match(first, case=False)]
    
    # filter by last name
    two = one[one.Last.str.match(last, case=False)]
    
    # remove duplicate columns. some coluns are repeated since they are in both UFC and FM dataframes
    two = two.drop(['fighter_first_name', 'fighter_last_name', 'fighter_nickname', 'fighter_wins', 'fighter_losses', 'fighter_draws', 'fighter_height', 'fighter_weight', 'fighter_reach'], axis=1).transpose()
    display(two)
    return True

def display_fighter_stats_by_name(fighter_df):
    fighter_name = input('Please input space separated first and last names of the fighter to display stats\n(e.g. Daniel Cormier, Conor McGregor, Frankie Edgar...)')
    
    # process user input
    first_name = fighter_name.split()[0].lower()
    last_name = fighter_name.split()[1].lower()
    return get_fighter_stats(fighter_df, first_name, last_name)

def display_top_n_fighters_per_weight_class(fighter_df):
    try:
        top_n = int(input('Please select N and we shall display top N fighters in every weight class(0 for all)'))
        if top_n < 0:
            top_n = -top_n
    except ValueError:
        # give default value of 10
        top_n = 10
    
    # display all fighters in weight classes
    for weight_class in WEIGHT_CLASSES:
        fighters_in_weight_class = fighter_df[fighter_df['weight'] == weight_class]

        # Rank them by win percentage
        fighters_in_weight_class = fighters_in_weight_class[fighters_in_weight_class.total_fights >= MINIMUM_FIGHTS_TO_HAVE]
        fighters_in_weight_class = fighters_in_weight_class.sort_values(by=['win %', 'total_fights'],ascending=[False, False])
        top_n_fighters = fighters_in_weight_class.loc[:, 
            ['fighter_first_name', 'fighter_last_name', 'total_fights', 'weight','win %']]
        if top_n > 0:
            top_n_fighters = top_n_fighters.head(n=top_n)
        display(top_n_fighters)
        print('==================================================================================================================')
    return True
        

def ask_user_for_event(recent_event_list):
    while(True):
        print(recent_event_list['base_title'] + ' ' + recent_event_list['title_tag_line'])
        event_input = input('Enter row number of event for which you need to view prediction\n'+\
                    'Event ID should be a integer index starting from 0\n' + \
                    BOLD + 'Enter i to display event posters' + END)
        
        # show event posters to user if he wants to see them
        if event_input.lower() == 'i':
            show_event_images(recent_event_list)
            continue
        return event_input

def ask_user_for_fight(event_fight_list):
    print(event_fight_list['fighter1_first_name']+ " " + event_fight_list['fighter1_last_name'] +\
         ' V/S ' + event_fight_list['fighter2_first_name']+ " " + event_fight_list['fighter2_last_name'])
    fight_id_to_predict = input('Enter fight for which you need to view prediction(input should be index id): ')
    return fight_id_to_predict

In [3]:
'''
Shows bitmaps of event poster
'''
def show_event_images(event_list):
    image_title_df = event_list.loc[:, ['feature_image', 'base_title', 'title_tag_line']]
    for index, row in image_title_df.iterrows():
        display(Image(url=row['feature_image'], height = 200, width = 200))
        print(BOLD + row['base_title'] + " " + row['title_tag_line'] + '\n\n\n' + END)
        

'''
Show matpoltlib graphs of fighters' skillsets
'''
def show_fighters_skill_plots(fight_df):
    # show wins plot
    plt.figure()
    fight_df.plot(y=[ 'fighter1_wins', 'fighter2_wins'], kind='bar', title='Wins')
    plt.show()

    # show knockdown average graph
    plt.clf()
    fight_df.plot(y=['fighter1_kdaverage', 'fighter2_kdaverage'], kind ='bar', title='KnockDown Avg')
    plt.show()

    # show shots landed per minute
    plt.clf()
    fight_df.plot(y=['fighter1_slpm', 'fighter2_slpm'], kind ='bar', title='Shots landed per min')
    plt.show()

    # show takedowns average
    plt.clf()
    fight_df.plot(y=['fighter1_takedownaverage', 'fighter2_takedownaverage'], kind='bar', title='Takedowns avg')
    plt.show()
    

'''
Rudimentary algo predicting future winner on past performance
'''    
def predict_winner(fight_df):
    f1_w = fight_df['fighter1_wins']
    f1_l = fight_df['fighter1_losses']
    f1_d = fight_df['fighter1_draws']
    f2_w = fight_df['fighter2_wins']
    f2_l = fight_df['fighter2_losses']
    f2_d = fight_df['fighter2_draws']
    f1_name = fight_df['fighter1_first_name'] + fight_df['fighter1_last_name']
    f2_name = fight_df['fighter2_first_name'] + fight_df['fighter2_last_name']
    f1_total_fights = (int)(f1_w + f1_l+ f1_d)
    f2_total_fights = (int)(f2_w + f2_l + f2_d)

    # get win % of both fighters
    win_percent_f1 = (float)(f1_w/f1_total_fights)
    win_percent_f2 = (float)(f2_w/f2_total_fights)
    if win_percent_f1 > win_percent_f2:
        winner = f1_name
    elif win_percent_f2 > win_percent_f1:
        winner = f2_name
    else:
        return BOLD + 'Even matchup. More data needed.' + END
    return BOLD + 'Winner is: ' + winner + END


'''
1. Ask user for event. Show bitmaps if user wants.
2. List all fights in that event. Ask user for fight.
3. Plot fighters' skillset in matplotlib.
4. Predict winner.
'''
def display_upcoming_event_predictions():
    
    # fetch upcoming and recently passed events from UFC event API
    recent_events_data = requests.get(UFC_RECENT_EVENTS_API)
    recent_events = json.loads(recent_events_data.content)
    recent_event_list = pd.read_json(json.dumps(recent_events))
    
    # now ask for user input, which event is he interested in?
    user_event_input_idx = ask_user_for_event(recent_event_list)
    event_df = recent_event_list.iloc[[user_event_input_idx]]
    
    # display all fights inside one event
    # and ask for user input, which fight is he interested in?
    chosen_event_data = requests.get(UFC_ALL_EVENTS_API + '/' + (str)(event_df.iloc[0]['id']) + FIGHTS_API_SUFFIX)
    all_fights= json.loads(chosen_event_data.content)
    event_fight_list = pd.read_json(json.dumps(all_fights))
    user_fight_input_idx = ask_user_for_fight(event_fight_list)
    fight_df = event_fight_list.iloc[[user_fight_input_idx]]
    
    # show fighters' skills comparisons plots
    show_fighters_skill_plots(fight_df)
    
    # predict winner
    print(predict_winner(fight_df))
    return True

In [4]:
def main():
    # create master dataframe from ufc and fightmetric dataframes
    all_df = join_ufc_fm_dataframes()
    
    # show menu to the user
    while(display_menu(all_df)):
        pass
    return
        
if __name__ == 'main':
    main()



# Run the cell below to execute the code

In [5]:
main()

1. Display fighter stats by name
2. Display top N fighters per weight class
3. Upcoming fight predictions
q. Quit
Please select an option:1
Please input space separated first and last names of the fighter to display stats
(e.g. Daniel Cormier, Conor McGregor, Frankie Edgar...)Brian Ortega


Unnamed: 0,2001
First,Brian
Last,Ortega
Nickname,T-City
height,"5' 8"""
weight,145 lbs.
Reach,"69.0"""
Stance,Switch
W,13
L,0
D,0


1. Display fighter stats by name
2. Display top N fighters per weight class
3. Upcoming fight predictions
q. Quit
Please select an option:2
Please select N and we shall display top N fighters in every weight class(0 for all)


Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
1256,Joanna,Jedrzejczyk,15.0,115 lbs.,0.933333
2088,Viviane,Pereira,14.0,115 lbs.,0.928571
994,Alexa,Grasso,11.0,115 lbs.,0.909091
2830,Polyana,Viana,11.0,115 lbs.,0.909091
1423,Karolina,Kowalkiewicz,13.0,115 lbs.,0.846154
869,Claudia,Gadelha,18.0,115 lbs.,0.833333
2746,Tecia,Torres,12.0,115 lbs.,0.833333
354,Joanne,Calderwood,14.0,115 lbs.,0.785714
757,Carla,Esparza,18.0,115 lbs.,0.777778
569,Aisling,Daly,21.0,115 lbs.,0.761905




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
802,Deiveson,Figueiredo,14.0,125 lbs.,1.0
1217,Naoki,Inoue,11.0,125 lbs.,1.0
218,Magomed,Bibulatov,15.0,125 lbs.,0.933333
293,Jarred,Brooks,14.0,125 lbs.,0.928571
451,Katlyn,Chookagian,11.0,125 lbs.,0.909091
1280,Demetrious,Johnson,30.0,125 lbs.,0.9
1172,Kyoji,Horiguchi,20.0,125 lbs.,0.9
1837,Joseph,Morales,10.0,125 lbs.,0.9
1639,Jose,Maria,38.0,125 lbs.,0.868421
190,Joseph,Benavidez,29.0,125 lbs.,0.862069




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
1272,Brett,Johns,15.0,135 lbs.,1.0
207,Manny,Bermudez,12.0,135 lbs.,1.0
2261,Jimmie,Rivera,22.0,135 lbs.,0.954545
2597,Cody,Stamann,17.0,135 lbs.,0.941176
2514,Douglas,Silva de Andrade,27.0,135 lbs.,0.925926
547,Dominick,Cruz,24.0,135 lbs.,0.916667
890,Cody,Garbrandt,12.0,135 lbs.,0.916667
2192,Ricardo,Ramos,12.0,135 lbs.,0.916667
2517,Ricky,Simon,11.0,135 lbs.,0.909091
602,Mark,De La Rosa,10.0,135 lbs.,0.9




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
2001,Brian,Ortega,13.0,145 lbs.,1.0
1324,Cris,Cyborg,20.0,145 lbs.,0.95
2849,Alexander,Volkanovski,18.0,145 lbs.,0.944444
1612,Zabit,Magomedsharipov,15.0,145 lbs.,0.933333
48,Arnold,Allen,13.0,145 lbs.,0.923077
185,Mirsad,Bektic,13.0,145 lbs.,0.923077
277,Martin,Bravo,13.0,145 lbs.,0.923077
1360,Jeremy,Kennedy,12.0,145 lbs.,0.916667
1015,Wang,Guan,22.0,145 lbs.,0.909091
172,Stephen,Bass,11.0,145 lbs.,0.909091




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
1958,Khabib,Nurmagomedov,25.0,155 lbs.,1.0
925,Gregor,Gillespie,11.0,155 lbs.,1.0
871,Justin,Gaethje,19.0,155 lbs.,0.947368
1621,Islam,Makhachev,16.0,155 lbs.,0.9375
2058,Alan,Patrick,16.0,155 lbs.,0.9375
2832,James,Vick,14.0,155 lbs.,0.928571
2150,Michel,Prazeres,26.0,155 lbs.,0.923077
2124,Vinc,Pichel,12.0,155 lbs.,0.916667
1610,Rashid,Magomedov,22.0,155 lbs.,0.909091
2512,Joaquim,Silva,11.0,155 lbs.,0.909091




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
1787,Alberto,Mina,13.0,170 lbs.,1.0
2731,Darren,Till,17.0,170 lbs.,0.941176
531,Colby,Covington,14.0,170 lbs.,0.928571
562,Claudio,Silva,12.0,170 lbs.,0.916667
926,Jason,Gilliam,12.0,170 lbs.,0.916667
2154,Niko,Price,12.0,170 lbs.,0.916667
2788,Kamaru,Usman,12.0,170 lbs.,0.916667
2141,Santiago,Ponzinibbio,30.0,170 lbs.,0.9
416,Gil,Castillo,19.0,170 lbs.,0.894737
1080,John,Hathaway,19.0,170 lbs.,0.894737




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
1776,Phillip,Miller,15.0,185 lbs.,1.0
10,Israel,Adesanya,12.0,185 lbs.,1.0
922,Trevin,Giles,11.0,185 lbs.,1.0
1109,Paulo,Costa,11.0,185 lbs.,1.0
2593,Georges,St-Pierre,28.0,185 lbs.,0.928571
631,Alessio,Di Chirico,12.0,185 lbs.,0.916667
2127,Oskar,Piechota,12.0,185 lbs.,0.916667
73,Eryk,Anders,11.0,185 lbs.,0.909091
278,Tom,Breese,11.0,185 lbs.,0.909091
1568,Nate,Loughran,10.0,185 lbs.,0.9




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
1291,Jon,Jones,24.0,205 lbs.,0.958333
514,Daniel,Cormier,21.0,205 lbs.,0.952381
2313,Kristian,Rothaermel,44.0,205 lbs.,0.909091
2870,Cory,Walmsley,11.0,205 lbs.,0.909091
1827,Homer,Moore,19.0,205 lbs.,0.894737
1491,James,Lee,28.0,205 lbs.,0.892857
1972,Volkan,Oezdemir,17.0,205 lbs.,0.882353
2948,Travis,Wiuff,25.0,205 lbs.,0.88
1983,Michal,Oleksiejczuk,15.0,205 lbs.,0.866667
254,Steve,Bosse,14.0,205 lbs.,0.857143




Unnamed: 0,fighter_first_name,fighter_last_name,total_fights,weight,win %
232,Curtis,Blaydes,10.0,265 lbs.,0.9
2663,Tim,Sylvia,30.0,265 lbs.,0.866667
406,Shane,Carwin,14.0,265 lbs.,0.857143
1711,Gan,McGee,14.0,265 lbs.,0.857143
1697,Sean,McCorkle,12.0,265 lbs.,0.833333
2957,Justin,Wren,12.0,265 lbs.,0.833333
2309,Jared,Rosholt,17.0,265 lbs.,0.823529
510,Josh,Copeland,11.0,265 lbs.,0.818182
2035,Soa,Palelei,27.0,265 lbs.,0.814815
2314,Ben,Rothwell,46.0,265 lbs.,0.782609


1. Display fighter stats by name
2. Display top N fighters per weight class
3. Upcoming fight predictions
q. Quit
Please select an option:q
