In [1]:
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from bs4 import BeautifulSoup
import re
import pandas as pd
import os
from datetime import datetime
import numpy as np

driver = webdriver.Firefox(executable_path=r'D:\downloads\geckodriver\geckodriver.exe')
driver.implicitly_wait(10)

def get_fights():
    # Define columns and initialize empty dataframe
    fight_columns = ['Date', 'Fighter1', 'Fighter2','Result', 'Event', 'Location', 'Attendance', 'Weightclass',
                     'Details', 'Judge1', 'Judge2', 'Judge3', 'Judge1_Fighter1_Score', 'Judge1_Fighter2_Score',
                     'Judge2_Fighter1_Score', 'Judge2_Fighter2_Score', 'Judge3_Fighter1_Score', 'Judge3_Fighter2_Score']
    fights = pd.DataFrame(columns=fight_columns)
    
    # Startup Selenium and nagivate to events page
    url = 'http://ufcstats.com/statistics/events/completed?page=all'
    driver.get(url)
    
    # Get a list of links to all events
    events_page = BeautifulSoup(driver.page_source, 'lxml')
    events_table = events_page.find('table', {'class':'b-statistics__table-events'})
    events_links = events_table.find_all('a')
    events_urls = [link.get('href') for link in events_links[1::]]

    # Loop through all events
    for event_url in events_urls[::-1][0::]:
        driver.get(event_url)
        fights_page = BeautifulSoup(driver.page_source, 'lxml')
        
        # Get Basic Event Information
        event_details_html = fights_page.find_all('li', 'b-list__box-list-item')
        event_details = {}
        
        event_title = fights_page.find('span', 'b-content__title-highlight').get_text(strip=True)

        for event_detail in event_details_html:
            text = event_detail.contents[1].get_text(strip=True)
            if text == 'Date:':
                date = datetime.strptime(event_detail.get_text(strip=True).split(':')[1], '%B %d, %Y')
            elif text == 'Location:':
                location = event_detail.get_text(strip=True).split(':')[1]
            elif text == 'Attendance:':
                attendance = event_detail.get_text(strip=True).split(':')[1]
        
        # Get list of links to all fights in event
        fights_table = fights_page.find('table', {'class':'b-fight-details__table'})
        fights_table_rows = fights_table.find_all('tr', 'b-fight-details__table-row')
        fights_urls = [row.get('data-link') for row in fights_table_rows[1::]]
        
        # Loop through all fights in event
        for fight_url in fights_urls[::-1][0::]:
            fight_row = pd.DataFrame(columns=fight_columns)
            
            driver.get(fight_url)
            
            fight_stats_page = BeautifulSoup(driver.page_source, 'lxml')
            
            # Get fighters in fight
            fighters_spans = fight_stats_page.find_all('h3', 'b-fight-details__person-name')
            fighters = [span.get_text(strip=True) for span in fighters_spans]
            
            # Get result of fight (win / draw)
            results_html = fight_stats_page.find_all('i', 'b-fight-details__person-status')
            results = [element.get_text(strip=True) for element in results_html]
            if 'D' in results:
                result = 'Draw'
            else:
                result = 'Win'
                
            # Get weight lcass
            weight_class_details_text = fight_stats_page.find('i', 'b-fight-details__fight-title').get_text(strip=True)
            weight_class_details_text_list = weight_class_details_text.split(' ')
            for i in range(len(weight_class_details_text_list)):
                # For all weight classes except 'Open'    
                if 'weight' in weight_class_details_text_list[i]:
                    weight_class = weight_class_details_text_list[i]
                else:
                    weight_class = 'Open Weight'
                    
            fight_info = {'Date':date, 'Fighter1':fighters[0], 'Fighter2':fighters[1],'Result':result,
                          'Event':event_title, 'Location':location, 'Attendance':attendance, 'Weightclass':weight_class}
            
            # Get Fight Details
            fight_details_html = fight_stats_page.find_all('i', ['b-fight-details__text-item', 'b-fight-details__text-item_first'])
            fight_details_list = [element for element in fight_details_html]
            
            # Get Fight Details_Details and Judges/Scores
            details = fight_details_list[-1].parent.get_text(' ', strip=True).split(':')
            if '.' in details[1]:
                details = details[1::][0].split('.')[0:-1]
                details = [detail[1::].split(' ') for detail in details]
                
                for i in range(0,3):
                    for j in range(1,3): 
                        fight_info[f'Judge{i+1}'] = ' '.join(details[i][0:-3])
                        fight_info[f'Judge{i+1}_Fighter{j}_Score'] = details[i][-3::][j-1]
            else:
                fight_info['Details'] = details[1][1::]
                        
            fight_details_list = [element.get_text(strip=True).split(':') for element in fight_details_html]
            for fight_detail in fight_details_list:
                if fight_detail[0] == 'Round':
                    fight_info['Round_End'] = fight_detail[1]
                elif fight_detail[0] == 'Time':
                    fight_info['Round_Time'] = (int(fight_detail[1]) * 60) + int(fight_detail[2])
                elif fight_detail[0] == 'Time format':
                    fight_info['Time Format'] = fight_detail[1]
                elif fight_detail[0] == 'Referee':
                    fight_info['Referee'] = fight_detail[1]
                elif fight_detail[0] == 'Method':
                    fight_info['Method'] = fight_detail[1]
                    
            fight_stats_row_html_list_all_rows = fight_stats_page.find_all('tr', 'b-fight-details__table-row')
            fight_stats_total_end = int(len(fight_stats_row_html_list_all_rows)-(len(fight_stats_row_html_list_all_rows)/2))
            
            fight_stats_total_row_html_list = fight_stats_row_html_list_all_rows[3:fight_stats_total_end]
            fight_stats_sig_row_html_list = fight_stats_row_html_list_all_rows[fight_stats_total_end+3::]
            
            # Round Total Stats
            total_stats = []
            for i in range(len(fight_stats_total_row_html_list)):
                round_stats_total_text_html = fight_stats_total_row_html_list[i].find_all('p', 'b-fight-details__table-text')
                round_stats_total_list = [element.get_text(strip=True) for element in round_stats_total_text_html[2::]]
                
                round_total_stats = {}
                
                for j in range(1,3):
                    round_total_stats[f'Fighter{j}_Knock_Downs_Round{i+1}'] = round_stats_total_list[j-1]
                    round_total_stats[f'Fighter{j}_Sig_Strikes_Round{i+1}'] = round_stats_total_list[j+1].split(' ')[0]
                    round_total_stats[f'Fighter{j}_Sig_Strikes_Attempted_Round{i+1}'] = round_stats_total_list[j+1].split(' ')[-1]
                    round_total_stats[f'Fighter{j}_Total_Strikes_Round{i+1}'] = round_stats_total_list[j+5].split(' ')[0]
                    round_total_stats[f'Fighter{j}_Total_Strikes_Attempted_Round{i+1}'] = round_stats_total_list[j+5].split(' ')[-1]
                    round_total_stats[f'Fighter{j}_Take_Downs_Round{i+1}'] = round_stats_total_list[j+7].split(' ')[0]
                    round_total_stats[f'Fighter{j}_Take_Downs_Attempted_Round{i+1}'] = round_stats_total_list[j+7].split(' ')[-1]
                    round_total_stats[f'Fighter{j}_Submission_Attempts_Round{i+1}'] = round_stats_total_list[j+11]
                    round_total_stats[f'Fighter{j}_Reversals_Round{i+1}'] = round_stats_total_list[j+13]  
                    
                total_stats.append(round_total_stats)
              
            total_stats_combined = {}
            for round_dict in total_stats:
                total_stats_combined.update(round_dict)
                
            # Round Significant Strikes Stats
            sig_stats = []
            for i in range(len(fight_stats_sig_row_html_list)):
                round_stats_sig_text_html = fight_stats_sig_row_html_list[i].find_all('p', 'b-fight-details__table-text')
                round_stats_sig_list = [element.get_text(strip=True) for element in round_stats_sig_text_html[2::]]
            
                round_sig_stats = {}
                
                for j in range(1,3):
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Head_Round{i+1}'] = round_stats_sig_list[j+3].split(' ')[0]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Head_Attempted_Round{i+1}'] = round_stats_sig_list[j+3].split(' ')[-1]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Body_Round{i+1}'] = round_stats_sig_list[j+5].split(' ')[0]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Body_Attempted_Round{i+1}'] = round_stats_sig_list[j+5].split(' ')[-1]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Leg_Round{i+1}'] = round_stats_sig_list[j+7].split(' ')[0]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Leg_Attempted_Round{i+1}'] = round_stats_sig_list[j+7].split(' ')[-1]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Distance_Round{i+1}'] = round_stats_sig_list[j+9].split(' ')[0]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Distance_Attempted_Round{i+1}'] = round_stats_sig_list[j+9].split(' ')[-1]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Clinch_Round{i+1}'] = round_stats_sig_list[j+11].split(' ')[0]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Clinch_Attempted_Round{i+1}'] = round_stats_sig_list[j+11].split(' ')[-1]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Ground_Round{i+1}'] = round_stats_sig_list[j+13].split(' ')[0]
                    round_sig_stats[f'Fighter{j}_Sig_Strikes_Ground_Attempted_Round{i+1}'] = round_stats_sig_list[j+13].split(' ')[-1]
            
                sig_stats.append(round_sig_stats)
            
            sig_stats_combined = {}
            for round_dict in sig_stats:
                sig_stats_combined.update(round_dict)
            
            
            # Combine all the different dictionaries into one and add is as a row to dataframe
            stats = {}
            for stats_dict in (total_stats_combined, sig_stats_combined):
                stats.update(stats_dict)
                
            fight = {}
            for d in (fight_info, stats):
                fight.update(d)
            
            fights = fights.append(fight, ignore_index=True)
    return fights
        
fights = get_fights()
fights

Unnamed: 0,Date,Fighter1,Fighter2,Result,Event,Location,Attendance,Weightclass,Details,Judge1,...,Fighter2_Sig_Strikes_Head_Attempted_Round5,Fighter2_Sig_Strikes_Head_Round5,Fighter2_Sig_Strikes_Leg_Attempted_Round5,Fighter2_Sig_Strikes_Leg_Round5,Fighter2_Sig_Strikes_Round5,Fighter2_Submission_Attempts_Round5,Fighter2_Take_Downs_Attempted_Round5,Fighter2_Take_Downs_Round5,Fighter2_Total_Strikes_Attempted_Round5,Fighter2_Total_Strikes_Round5
0,1993-11-12,Gerard Gordeau,Teila Tuli,Win,UFC 1: The Beginning,"Denver, Colorado, USA",2800,Open Weight,Kick to Head On Ground,,...,,,,,,,,,,
1,1993-11-12,Kevin Rosier,Zane Frazier,Win,UFC 1: The Beginning,"Denver, Colorado, USA",2800,Open Weight,to \n Corner Stoppage,,...,,,,,,,,,,
2,1993-11-12,Royce Gracie,Art Jimmerson,Win,UFC 1: The Beginning,"Denver, Colorado, USA",2800,Open Weight,Other \n Position - Mount,,...,,,,,,,,,,
3,1993-11-12,Ken Shamrock,Patrick Smith,Win,UFC 1: The Beginning,"Denver, Colorado, USA",2800,Open Weight,Heel Hook From Guard,,...,,,,,,,,,,
4,1993-11-12,Gerard Gordeau,Kevin Rosier,Win,UFC 1: The Beginning,"Denver, Colorado, USA",2800,Open Weight,to \n Corner Stoppage,,...,,,,,,,,,,
5,1993-11-12,Royce Gracie,Ken Shamrock,Win,UFC 1: The Beginning,"Denver, Colorado, USA",2800,Open Weight,Rear Naked Choke,,...,,,,,,,,,,
6,1993-11-12,Jason DeLucia,Trent Jenkins,Win,UFC 1: The Beginning,"Denver, Colorado, USA",2800,Open Weight,Rear Naked Choke,,...,,,,,,,,,,
7,1993-11-12,Royce Gracie,Gerard Gordeau,Win,UFC 1: The Beginning,"Denver, Colorado, USA",2800,Open Weight,Rear Naked Choke,,...,,,,,,,,,,
8,1994-03-11,Scott Morris,Sean Daugherty,Win,UFC 2: No Way Out,"Denver, Colorado, USA",2000,Open Weight,Guillotine Choke From Mount,,...,,,,,,,,,,
9,1994-03-11,Patrick Smith,Ray Wizard,Win,UFC 2: No Way Out,"Denver, Colorado, USA",2000,Open Weight,Guillotine Choke Standing,,...,,,,,,,,,,
