# UFC_Stats Web Scraping

## importing pertinent libraries

In [2]:
#importing pertinent libraries
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests, re
import json
import itertools
import dask.dataframe as dd
from datetime import datetime

In [3]:
from datetime import datetime

today = datetime.today()
current_date= today.strftime("%B %d, %Y")
current_datetime = datetime.strptime(current_date, "%B %d, %Y" ) 

## Extracting UFC urls

In [4]:
#creating a list of unique fighter URLs available on website
fighter_url_list = []
response = requests.get('http://www.ufcstats.com/statistics/fighters?char=a&page=all')
soup = BeautifulSoup(response.content, 'html.parser')

#targeting URLs that contain fighter data
fighters = soup.findAll('a', attrs={'href': re.compile('http:')})
for fighter in fighters:
    fighter_url_list.append(fighter.get('href'))

#removing duplicate URLs
fighter_url_list = list(set(fighter_url_list))

In [5]:
fighter_url_list[:15]

['http://www.ufcstats.com/fighter-details/ebc5af72ad5a28cb',
 'http://www.ufcstats.com/fighter-details/79ded75550efc139',
 'http://www.ufcstats.com/fighter-details/1291edf2d566a71a',
 'http://www.ufcstats.com/fighter-details/9bcfb40dbcd50568',
 'http://www.ufcstats.com/fighter-details/2f181c0467965b98',
 'http://www.ufcstats.com/fighter-details/8738eacd62e82a32',
 'http://www.ufcstats.com/fighter-details/d221ee27afc7a60e',
 'http://www.ufcstats.com/fighter-details/1ccff7f0cfdf85eb',
 'http://www.ufcstats.com/fighter-details/46d0a888d87d91ac',
 'http://www.ufcstats.com/fighter-details/79cb2a690b9ba5e8',
 'http://www.ufcstats.com/fighter-details/93fe7332d16c6ad9',
 'http://www.ufcstats.com/fighter-details/b1d19449397541dc',
 'http://www.ufcstats.com/fighter-details/1c5879330d42255f',
 'http://www.ufcstats.com/fighter-details/26387c19f32dda0f',
 'http://www.ufcstats.com/fighter-details/e741536153227386']

Looks like there are some URLs we don't need in out list. Let's weed them out.

In [6]:
#removing invalid URLs
for url in fighter_url_list:
    if 'fighter-details' not in url:
        fighter_url_list.remove(url)

In [7]:
fighter_url_list[:15]

['http://www.ufcstats.com/fighter-details/ebc5af72ad5a28cb',
 'http://www.ufcstats.com/fighter-details/79ded75550efc139',
 'http://www.ufcstats.com/fighter-details/1291edf2d566a71a',
 'http://www.ufcstats.com/fighter-details/9bcfb40dbcd50568',
 'http://www.ufcstats.com/fighter-details/2f181c0467965b98',
 'http://www.ufcstats.com/fighter-details/8738eacd62e82a32',
 'http://www.ufcstats.com/fighter-details/d221ee27afc7a60e',
 'http://www.ufcstats.com/fighter-details/1ccff7f0cfdf85eb',
 'http://www.ufcstats.com/fighter-details/46d0a888d87d91ac',
 'http://www.ufcstats.com/fighter-details/79cb2a690b9ba5e8',
 'http://www.ufcstats.com/fighter-details/93fe7332d16c6ad9',
 'http://www.ufcstats.com/fighter-details/b1d19449397541dc',
 'http://www.ufcstats.com/fighter-details/1c5879330d42255f',
 'http://www.ufcstats.com/fighter-details/26387c19f32dda0f',
 'http://www.ufcstats.com/fighter-details/e741536153227386']

## Creating Dataframe Building Blocks

In [8]:
def info_generator(url):
    '''Meant to parse any url via beuatiful soup
    
    Args:
        url(str): url that will be parsed using BeautifulSoup
        '''
    
    response = requests.get(url)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    return soup

In [9]:
#creating a soup list to reduce number of website requests
basic_soup_list = list(map(info_generator, fighter_url_list))

In [10]:
def fighter_history_fetcher(soup):
    '''This function is meant to extract the html information for each fighter as a list of urls. '''
    
    soup_holder = soup.findAll('a', attrs={'href': re.compile('fight-details')})
    
    list_of_fights = []
    
    for x in soup_holder:
        fight_holder = []
        
        fight_holder.append(x.get('href'))
        
        list_of_fights.append(fight_holder) 
        
    return list_of_fights

### Generating basic statistics lists

In [11]:
def base_stats_soup_generator(soup):
    '''Meant to identiy statistics in the context of UFC Stats urls.
    
    Args:
        
        soup(BeautifulSoup object) : must be an unaltered soup'''
    
    base_stats = soup.findChildren('li', attrs={'class': re.compile('b-list')})
    
    return base_stats

In [12]:
stats_soups = list(map(base_stats_soup_generator, basic_soup_list))

In [13]:
fighter_history_url_list = list(map(fighter_history_fetcher, basic_soup_list))
fighter_history_url_list[:3]

[[['http://www.ufcstats.com/fight-details/823d7037552a8907']],
 [['http://www.ufcstats.com/fight-details/51c354507aa80512'],
  ['http://www.ufcstats.com/fight-details/6cddc822670517ca'],
  ['http://www.ufcstats.com/fight-details/a0d99312f392f33e'],
  ['http://www.ufcstats.com/fight-details/820d44b5dccba9b6'],
  ['http://www.ufcstats.com/fight-details/3e49c868130978e8'],
  ['http://www.ufcstats.com/fight-details/87a150a16a195e77'],
  ['http://www.ufcstats.com/fight-details/81bd1c1f7dfccbb7'],
  ['http://www.ufcstats.com/fight-details/57cb1a350540c29e'],
  ['http://www.ufcstats.com/fight-details/cfb5a6c998f1954b'],
  ['http://www.ufcstats.com/fight-details/97addff4a2e5efa6'],
  ['http://www.ufcstats.com/fight-details/63c5c2dc21fb4a27'],
  ['http://www.ufcstats.com/fight-details/3a0cb2d68f5c62cf'],
  ['http://www.ufcstats.com/fight-details/695597b78c8f94e3'],
  ['http://www.ufcstats.com/fight-details/3ea14584d775efc9'],
  ['http://www.ufcstats.com/fight-details/037f4c25a87b9234'],
  ['htt

fighter_history_url_list generated lists 

In [14]:
def compound_flattener(fight_url_list):
    '''Will reduce the dimentionaliy of lists by factor of one'''
    
    flattened_fight_urls = list(itertools.chain(*fight_url_list))
    return flattened_fight_urls

In [15]:
fighter_history_url_list = list(map(compound_flattener, fighter_history_url_list))

In [16]:
fighter_history_url_list[:3]

[['http://www.ufcstats.com/fight-details/823d7037552a8907'],
 ['http://www.ufcstats.com/fight-details/51c354507aa80512',
  'http://www.ufcstats.com/fight-details/6cddc822670517ca',
  'http://www.ufcstats.com/fight-details/a0d99312f392f33e',
  'http://www.ufcstats.com/fight-details/820d44b5dccba9b6',
  'http://www.ufcstats.com/fight-details/3e49c868130978e8',
  'http://www.ufcstats.com/fight-details/87a150a16a195e77',
  'http://www.ufcstats.com/fight-details/81bd1c1f7dfccbb7',
  'http://www.ufcstats.com/fight-details/57cb1a350540c29e',
  'http://www.ufcstats.com/fight-details/cfb5a6c998f1954b',
  'http://www.ufcstats.com/fight-details/97addff4a2e5efa6',
  'http://www.ufcstats.com/fight-details/63c5c2dc21fb4a27',
  'http://www.ufcstats.com/fight-details/3a0cb2d68f5c62cf',
  'http://www.ufcstats.com/fight-details/695597b78c8f94e3',
  'http://www.ufcstats.com/fight-details/3ea14584d775efc9',
  'http://www.ufcstats.com/fight-details/037f4c25a87b9234',
  'http://www.ufcstats.com/fight-detail

### Basic Statistics DataFrame Construction

In [16]:
def fighter_name(soup):
    '''Identifies fighter history to be parsed.
    Args:
    
        soup(BeautifulSoup object): BeautifulSoup object must originate from a urls hosting fighter profiles'''
    
    name = soup.find('span', attrs={'class': re.compile('b-content__title-highlight')})
    
    return name.text.strip()

In [17]:
def fighter_record(soup):
    
    record = soup.findChildren('span', attrs={'class': re.compile('b-content__title-record')})
    
    return record[0].text.strip().split(':')[1].strip()

In [18]:
def basic_stats_height(soup):
    '''soups run thorugh this function should be run through stats_soup function beforehand.'''
    
    #feet
    height = soup[0].text.strip().split(':')[1].strip().split(' ')[0].replace("'", "")
    
   #inches
    if height == '--':
        pass
    else:
        
        height = int(height)
        
        inches = int(soup[0].text.strip().split(':')[1].strip().split(' ')[1].replace("'", "").replace('"', '').replace("--", "0"))

        height_to_inches = (height * 12) 
    
        final_height = height_to_inches + inches
    
        return (inches + height_to_inches)

In [19]:
def basic_stats_weight(soup):
    
    weight = soup[1].text.strip().split(':')[1].strip().replace(' lbs.', '')
    
    if weight == '--':
        pass
    else:
        weight = int(weight)
    return weight 

In [20]:
def basic_stats_reach(soup):
    
    reach = soup[2].text.strip().split(':')[1].strip().replace('"', '')
    
    if reach == '--':
        pass
    else:
        reach = int(reach)
    return reach

In [21]:
def basic_stats_stance(soup):
    
    stance = soup[3].text.strip().split(':')[1].strip()
    return stance

In [22]:
def basic_stats_DOB(soup):
    
    DOB = soup[4].text.strip().split(':')[1].strip()
    return DOB

In [23]:
fighter_names = list(map(fighter_name, basic_soup_list))

fighter_records = list(map(fighter_record, basic_soup_list))

fighter_heights =  list(map(basic_stats_height, stats_soups))

fighter_weights = list(map(basic_stats_weight, stats_soups))

fighter_reachs = list(map(basic_stats_reach, stats_soups))

fighter_stances = list(map(basic_stats_stance, stats_soups))

fighter_DOBs = list(map(basic_stats_DOB, stats_soups))

In [24]:
fighter_basic_stats_df = pd.DataFrame()

fighter_basic_stats_df['name'] = fighter_names

fighter_basic_stats_df['record'] = fighter_records

fighter_basic_stats_df['height'] = fighter_heights

fighter_basic_stats_df['weight'] = fighter_weights

fighter_basic_stats_df['reach'] = fighter_reachs

fighter_basic_stats_df['stance'] = fighter_stances

fighter_basic_stats_df['DOB'] = fighter_DOBs

In [25]:
fighter_basic_stats_df.head()

Unnamed: 0,name,record,height,weight,reach,stance,DOB
0,Rico Altamirano,4-3-0,70.0,170,--,,--
1,Scott Adams,8-1-0,72.0,225,--,Southpaw,--
2,Magno Almeida,10-3-0 (1 NC),72.0,155,--,,"Jun 06, 1985"
3,Magomed Ankalaev,13-1-0,75.0,205,75,Orthodox,"Jun 02, 1992"
4,Estefani Almeida,7-3-0,64.0,125,--,Orthodox,"Oct 17, 1988"


In [26]:
fighter_basic_stats_df.to_csv('fighter_basic_stats_csv', index=False)

### Career Statistics DataFrame Construction

In [27]:
def career_stats_soup(soup):
    
    career_stats_soup = soup.findChildren('li', attrs={'class': re.compile('b-list')})
    career_stats = career_stats_soup[5:]
    
    return career_stats_soup

In [28]:
def career_stats_SLpM(soup):

    SLpM = soup[5].text.split(':')[1].replace('%', '').strip()
    
    if SLpM == '--':
        pass
    else:
        SLpM = float(SLpM)
        
    return SLpM

In [29]:
def career_stats_StrAcc(soup):
    StrAcc =  soup[6].text.split(':')[1].replace('%', '').strip()
    
    if StrAcc == '--':
        pass
    else:
        StrAcc = float(StrAcc)
        
    return StrAcc

In [30]:
def career_stats_SApM(soup):
    SApM = soup[7].text.split(':')[1].strip()
    
    if SApM == '--':
        pass
    else:
        SApM = float(SApM)
        
    return SApM

In [31]:
def career_stats_StrDef(soup):
    StrDef = soup[8].text.split(':')[1].replace('%', '').strip()
    
    if StrDef == '--':
        pass
    else:
        StrDef = float(StrDef)
        
    return StrDef

In [32]:
def career_stats_TDAvg(soup):
    TDAvg = soup[10].text.split(':')[1].strip()
    
    if TDAvg == '--':
        pass
    else:
        TDAvg = float(TDAvg)
        
    return TDAvg

In [33]:
def career_stats_TDAcc(soup):
    TDAcc = soup[11].text.split(':')[1].strip().replace('%', '')
    
    if TDAcc == '--':
        pass
    else:
        TDAcc = float(TDAcc)
        
    return TDAcc

In [34]:
def career_stats_TDDef(soup):
    TDDef = soup[12].text.split(':')[1].replace('%', '').strip()
    
    if TDDef == '--':
        pass
    else:
        TDDef = float(TDDef)
        
    return TDDef

In [35]:
def career_stats_SubAvg(soup):
    SubAvg =  soup[13].text.split(':')[1].strip().replace('%', '')  
    
    if SubAvg == '--':
        pass
    else:
        SubAvg = float(SubAvg)
        
    return SubAvg

In [36]:
fighter_SLpMs = list(map(career_stats_SLpM, stats_soups))

fighter_StrAccs = list(map(career_stats_StrAcc, stats_soups))

fighter_SApMs = list(map(career_stats_SApM, stats_soups))

fighter_StrDefs =list(map(career_stats_StrDef, stats_soups))

fighter_TDAvgs = list(map(career_stats_TDAvg, stats_soups))

fighter_TDAccs = list(map(career_stats_TDAcc, stats_soups))

fighter_TDDefs = list(map(career_stats_TDDef, stats_soups))

fighter_SubAvgs = list(map(career_stats_SubAvg, stats_soups))

In [37]:
fighter_career_stats_df = pd.DataFrame()

fighter_career_stats_df['name'] = fighter_names

fighter_career_stats_df['SLpMs'] = fighter_SLpMs

fighter_career_stats_df['StrAccs'] = fighter_StrAccs

fighter_career_stats_df['SApMs'] = fighter_SApMs

fighter_career_stats_df['StrDefs'] = fighter_StrDefs

fighter_career_stats_df['TDAvgs'] = fighter_TDAvgs

fighter_career_stats_df['TDAccs'] = fighter_TDAccs

fighter_career_stats_df['TDDefs'] = fighter_TDDefs

fighter_career_stats_df['SubAvgs'] = fighter_SubAvgs

In [38]:
fighter_career_stats_df.head()

Unnamed: 0,name,SLpMs,StrAccs,SApMs,StrDefs,TDAvgs,TDAccs,TDDefs,SubAvgs
0,Rico Altamirano,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Scott Adams,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Magno Almeida,1.67,36.0,2.6,41.0,0.0,0.0,40.0,5.0
3,Magomed Ankalaev,3.46,56.0,1.24,67.0,0.68,33.0,85.0,0.0
4,Estefani Almeida,3.13,37.0,6.07,42.0,0.0,0.0,0.0,0.0


In [39]:
fighter_career_stats_df.to_csv('fighter_career_stats_csv', index=False)

### Constructing Fight Event and Fight List Database

In [19]:
#creating a list of unique fighter URLs available on website
event_url_list = []
response = requests.get('http://www.ufcstats.com/statistics/events/completed?page=all')
soup = BeautifulSoup(response.content, 'html.parser')

#targeting URLs that contain fighter data
events = soup.findAll('a', attrs={'href': re.compile('http:')})
for event in events:
    event_url_list.append(event.get('href'))

#removing duplicate URLs
event_url_list = list(set(event_url_list))

In [20]:
#removing invalid URLs
for url in event_url_list:
    if 'event-details' not in url:
        event_url_list.remove(url)
    
for url in event_url_list:  
    if len(url) <  54:
        event_url_list.remove(url)

In [21]:
event_url_list[:15]

['http://www.ufcstats.com/event-details/b1605ea39fba6af6',
 'http://www.ufcstats.com/event-details/29f935654825331b',
 'http://www.ufcstats.com/event-details/d6b68eaf4b68b160',
 'http://www.ufcstats.com/event-details/3fed746acfd026dd',
 'http://www.ufcstats.com/event-details/1652f3213655b935',
 'http://www.ufcstats.com/event-details/319fa1bd3176bded',
 'http://www.ufcstats.com/event-details/3c241737a6069b9f',
 'http://www.ufcstats.com/event-details/32a3025d5db456ae',
 'http://www.ufcstats.com/event-details/02fc8f50f56eb307',
 'http://www.ufcstats.com/event-details/02177caefe7c07d4',
 'http://www.ufcstats.com/event-details/fc31f896cde2bc2e',
 'http://www.ufcstats.com/event-details/cfb65863d5099327',
 'http://www.ufcstats.com/event-details/e8c170a64dc920ac',
 'http://www.ufcstats.com/event-details/4985113c0928aa62',
 'http://www.ufcstats.com/event-details/3795fca327cbcf23']

In [22]:
len(event_url_list)

523

In [23]:
event_soups = list(map(info_generator, event_url_list))

In [119]:
len(event_soups)

521

In [44]:
def event_date_fetcher(event_soup):
    date_holder = event_soup.findChildren('li', attrs={'class': re.compile('b-list__')})
    
    try:
        date = date_holder[0].text.split(':')[1].strip()
        
        return date 
        
    except:
        
        pass    

In [45]:
def event_fights_parser(event_soup):
        
    holder = event_soup.findAll('a', attrs={'href': re.compile('fight-details')})
    
    event_fights_holder = []
    
    for x in range(0, len(holder)):
        
        event_fights_holder.append(holder[x].get('href'))
        
    return event_fights_holder

In [46]:
def event_name_fetcher(event_soup):
    event_title = event_soup.find('h2', attrs={'class': re.compile('b-content__')}).text.strip()
    return event_title

In [240]:
event_database_df = pd.DataFrame()

#adding event_urls to dataframe
#event_database_df['event'] = event_url_list

#adding event_dates to dataframe and converting to time series objects
event_dates_list = list(map(event_date_fetcher, event_soups))

event_database_df['date'] = event_dates_list

event_database_df['date'] = pd.to_datetime(event_database_df['date'])

#adding event_name to dataframe
event_names_list = list(map(event_name_fetcher,event_soups))

event_database_df['name'] = event_names_list

In [241]:
event_database_df

Unnamed: 0,date,name
0,2005-08-20,UFC 54: Boiling Point
1,2018-04-14,UFC Fight Night: Poirier vs. Gaethje
2,2020-01-25,UFC Fight Night: Blaydes vs. Dos Santos
3,2019-01-19,UFC Fight Night: Cejudo vs. Dillashaw
4,2006-07-08,UFC 61: Bitter Rivals
...,...,...
516,2014-05-31,UFC Fight Night: Munoz vs Mousasi
517,2006-02-04,UFC 57: Liddell vs Couture 3
518,2012-06-23,UFC 147: Silva vs Franklin 2
519,2018-11-10,UFC Fight Night: Korean Zombie vs. Rodriguez


In [48]:
event_database_df_dropped = event_database_df[event_database_df.date > current_datetime]

pending_events_indexes = event_database_df_dropped.index.tolist()

for index in pending_events_indexes:
    event_soups.pop(index)

In [49]:
event_database_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 522 entries, 0 to 521
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   event   522 non-null    object        
 1   date    522 non-null    datetime64[ns]
 2   name    522 non-null    object        
dtypes: datetime64[ns](1), object(2)
memory usage: 12.4+ KB


In [73]:
def event_fight_participants_fetcher(event_soup):
    
    fight_pairs = []
    
    name_holder = event_soup.findChildren('a', attrs={'class': re.compile('b-link')})

    fighter1_list_holder, fighter2_list_holder = name_holder[0::2], name_holder[1::2]

    fighter1_list = []
    fighter2_list = []

    for fighter1 in fighter1_list_holder:
        fighter1_list.append(fighter1.text.strip())
        
    for fighter2 in fighter2_list_holder:
        fighter2_list.append(fighter2.text.strip())
      
    fighters_in_ring = zip(fighter1_list, fighter2_list)
    
    for duo in fighters_in_ring:
        fight_pairs.append(duo)
        
    return fight_pairs

In [192]:
def event_fight_strikes(event_soup):
        
    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    event_strike_list = []
    
    for x in range(0, len(parser)):

        fight_strikes_holder = parser[x].findAll('td', attrs={'class': re.compile('b-fight-details')})

        event_strike_list.append(fight_strikes_holder[2].text.split())

    return event_strike_list

In [198]:
def event_fight_takedowns(event_soup):
        
    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    event_takedowns_list = []
    
    for x in range(0, len(parser)):

        fight_takedowns_holder = parser[x].findAll('td', attrs={'class': re.compile('b-fight-details')})
        
        event_takedowns_list.append(fight_takedowns_holder[3].text.split())
            
    return event_takedowns_list

In [212]:
def event_fight_submissions(event_soup):    
    
    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})

    event_submissions_list = []

    for x in range(0, len(parser)):

        event_submissions_holder = parser[x].findAll('td', attrs={'class': re.compile('b-fight-details')})      
    
        event_submissions_list.append(event_submissions_holder[4].text.split())

    return event_submissions_list


In [219]:
def event_fight_passes(event_soup):
    
    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})

    event_passes_list = []

    for x in range(0, len(parser)):

        event_passes_holder = parser[x].findAll('td', attrs={'class': re.compile('b-fight-details')})      

        event_passes_list.append(event_passes_holder[5].text.split())

    return event_passes_list

In [82]:
def event_fight_weight_classes(event_soup):
    
    event_weight_class_list = []

    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})

    for x in range(0, len(parser)):

        holder = parser[x].findAll('td', attrs={'class': re.compile('b-fight-details')})

        weight_class = holder[6].text.strip()

        event_weight_class_list.append(weight_class)

    return event_weight_class_list

In [68]:
def event_fight_win_method(event_soup):
        
    win_methods_holder = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    win_methods_list = []    
    
    for x in range(0, len(win_methods_holder)):
        
        win_method_holder = win_methods_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[7]
        
        win_method = win_method_holder.findAll('p', attrs={'class': re.compile('b-fight-details')})[0].text.strip()
        
        win_methods_list.append(win_method)
    
    return win_methods_list

In [69]:
def event_fight_win_method_details(event_soup):
        
    win_methods_holder = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    win_methods_list = []    
    
    for x in range(0, len(win_methods_holder)):
        
        win_method_holder = win_methods_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[7]
        
        win_method = win_method_holder.findAll('p', attrs={'class': re.compile('b-fight-details')})[1].text.strip()
        
        win_methods_list.append(win_method)
    
    return win_methods_list

In [70]:
def event_fight_final_round(event_soup):
    
    ending_round_holder = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    ending_round_list = []
    
    for x in range(0, len(ending_round_holder)):
        
        ending_round = int(ending_round_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[8].text.strip())
        
        ending_round_list.append(ending_round)
        
    return ending_round_list
    

In [71]:
def event_fight_final_round_time(event_soup):
    
    ending_round_time_list = []
    
    ending_round_time_holder = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    for x in range(0, len(ending_round_time_holder)):
    
        ending_round_time = ending_round_time_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[-1].text.strip()
        
        ending_round_time_list.append(ending_round_time)

    return ending_round_time_list

In [226]:
event_fight_list = list(map(event_fights_parser, event_soups))

matchup_list = list(map(event_fight_participants_fetcher, event_soups))

fighter_strike_count_list = list(map(event_fight_strikes,event_soups))
    
fight_takedown_list = list(map(event_fight_takedowns,event_soups))

fight_submission_list = list(map(event_fight_submissions,event_soups))
    
fight_pass_list = list(map(event_fight_passes,event_soups))

fight_weight_class_list = list(map(event_fight_weight_classes,event_soups))
    
fight_win_method_list = list(map(event_fight_win_method,event_soups))

fight_final_round_list = list(map(event_fight_final_round,event_soups))
    
fight_final_round_time_list = list(map(event_fight_final_round_time,event_soups))

In [242]:
event_database_df['event_fight_list'] = event_fight_list

event_database_df['matchups'] = matchup_list

event_database_df['strike_counts'] = fighter_strike_count_list

event_database_df['takedown_counts'] = fight_takedown_list

event_database_df['submission_counts'] = fight_submission_list

event_database_df['pass_counts'] = fight_pass_list

event_database_df['weight_class'] = fight_weight_class_list

event_database_df['win_method'] = fight_win_method_list

event_database_df['final_round'] = fight_final_round_list

event_database_df['final_round_time'] = fight_final_round_time_list

event_database_df

Unnamed: 0,date,name,event_fight_list,matchups,strike_counts,takedown_counts,submission_counts,pass_counts,weight_class,win_method,final_round,final_round_time
0,2005-08-20,UFC 54: Boiling Point,[http://www.ufcstats.com/fight-details/5a3fcd3...,"[(Chuck Liddell, Jeremy Horn), (Tim Sylvia, Tr...","[[64, 31], [31, 13], [27, 8], [35, 3], [25, 1]...","[[0, 0], [0, 0], [4, 1], [3, 0], [1, 0], [4, 2...","[[0, 0], [0, 0], [3, 0], [1, 0], [2, 0], [4, 1...","[[0, 0], [0, 0], [0, 0], [6, 0], [10, 0], [5, ...","[Light Heavyweight, Heavyweight, Light Heavywe...","[KO/TKO, KO/TKO, SUB, KO/TKO, SUB, U-DEC, U-DE...","[4, 1, 3, 2, 1, 3, 3, 2]","[2:46, 4:59, 0:52, 1:56, 4:09, 5:00, 5:00, 0:09]"
1,2018-04-14,UFC Fight Night: Poirier vs. Gaethje,[http://www.ufcstats.com/fight-details/bfae2fb...,"[(Dustin Poirier, Justin Gaethje), (Alex Olive...","[[174, 115], [9, 12], [57, 46], [61, 36], [31,...","[[0, 0], [3, 1], [0, 2], [3, 0], [1, 0], [0, 0...","[[0, 0], [1, 1], [0, 0], [0, 3], [1, 0], [0, 0...","[[0, 0], [4, 2], [0, 1], [0, 1], [1, 0], [0, 0...","[Lightweight, Welterweight, Middleweight, Wome...","[KO/TKO, SUB, S-DEC, S-DEC, SUB, KO/TKO, U-DEC...","[4, 2, 3, 3, 1, 2, 3, 3, 2, 3, 3, 2, 2, 3]","[0:33, 3:17, 5:00, 5:00, 4:28, 4:12, 5:00, 2:1..."
2,2020-01-25,UFC Fight Night: Blaydes vs. Dos Santos,[http://www.ufcstats.com/fight-details/037d2f4...,"[(Curtis Blaydes, Junior Dos Santos), (Michael...","[[28, 7], [9, 14], [2, 2], [70, 46], [101, 31]...","[[0, 0], [6, 0], [1, 0], [1, 0], [0, 6], [0, 0...","[[0, 0], [0, 0], [1, 0], [0, 0], [0, 0], [0, 0...","[[0, 0], [3, 0], [1, 0], [2, 0], [0, 4], [0, 0...","[Heavyweight, Welterweight, Flyweight, Women's...","[KO/TKO, U-DEC, SUB, KO/TKO, U-DEC, U-DEC, U-D...","[2, 3, 1, 2, 3, 3, 3, 3, 3, 3, 3, 1]","[1:06, 5:00, 2:33, 4:26, 5:00, 5:00, 5:00, 5:0..."
3,2019-01-19,UFC Fight Night: Cejudo vs. Dillashaw,[http://www.ufcstats.com/fight-details/85511ab...,"[(Henry Cejudo, TJ Dillashaw), (Allen Crowder,...","[[18, 1], [12, 29], [51, 1], [21, 49], [17, 10...","[[0, 0], [1, 0], [6, 0], [7, 3], [0, 4], [1, 0...","[[0, 0], [1, 0], [0, 0], [0, 0], [2, 0], [1, 0...","[[0, 0], [2, 0], [7, 0], [2, 2], [1, 4], [3, 2...","[Flyweight, Heavyweight, Lightweight, Flyweigh...","[KO/TKO, DQ, KO/TKO, U-DEC, SUB, SUB, KO/TKO, ...","[1, 2, 2, 3, 2, 1, 2, 3, 1, 1, 3, 3, 1]","[0:32, 2:28, 4:59, 5:00, 1:50, 3:21, 3:43, 5:0..."
4,2006-07-08,UFC 61: Bitter Rivals,[http://www.ufcstats.com/fight-details/3c3533b...,"[(Tim Sylvia, Andrei Arlovski), (Joshua Burkma...","[[97, 66], [35, 19], [11, 0], [53, 48], [28, 1...","[[0, 0], [4, 1], [1, 0], [3, 0], [3, 0], [3, 0...","[[0, 0], [0, 1], [0, 0], [0, 1], [1, 2], [1, 0...","[[0, 0], [1, 0], [0, 0], [4, 0], [1, 0], [2, 0...","[Heavyweight, Welterweight, Light Heavyweight,...","[U-DEC, U-DEC, KO/TKO, U-DEC, KO/TKO, SUB, KO/...","[5, 3, 1, 3, 2, 3, 1, 1, 3]","[5:00, 5:00, 1:18, 5:00, 5:00, 0:47, 2:42, 4:1..."
...,...,...,...,...,...,...,...,...,...,...,...,...
516,2014-05-31,UFC Fight Night: Munoz vs Mousasi,[http://www.ufcstats.com/fight-details/5d7c91a...,"[(Gegard Mousasi, Mark Munoz), (CB Dollaway, F...","[[18, 3], [44, 29], [48, 44], [5, 7], [50, 49]...","[[0, 0], [3, 1], [1, 0], [0, 2], [2, 0], [3, 0...","[[2, 0], [0, 1], [0, 0], [2, 0], [0, 0], [1, 0...","[[3, 0], [7, 0], [0, 0], [0, 2], [2, 1], [4, 3...","[Middleweight, Middleweight, Middleweight, Fea...","[SUB, U-DEC, S-DEC, SUB, U-DEC, SUB, KO/TKO, U...","[1, 3, 3, 1, 3, 2, 1, 3, 3, 3]","[3:57, 5:00, 5:00, 4:15, 5:00, 4:59, 0:25, 5:0..."
517,2006-02-04,UFC 57: Liddell vs Couture 3,[http://www.ufcstats.com/fight-details/0e94e0a...,"[(Chuck Liddell, Randy Couture), (Brandon Vera...","[[19, 7], [7, 4], [31, 3], [9, 1], [51, 38], [...","[[0, 1], [0, 0], [1, 1], [0, 0], [3, 0], [9, 0...","[[0, 0], [0, 0], [0, 0], [1, 0], [0, 1], [0, 0...","[[0, 0], [0, 0], [1, 0], [1, 1], [0, 0], [3, 0...","[Light Heavyweight, Heavyweight, Heavyweight, ...","[KO/TKO, KO/TKO, KO/TKO, SUB, U-DEC, U-DEC, KO...","[2, 1, 1, 1, 3, 3, 2, 1, 3]","[1:28, 1:25, 4:10, 2:21, 5:00, 5:00, 2:27, 4:3..."
518,2012-06-23,UFC 147: Silva vs Franklin 2,[http://www.ufcstats.com/fight-details/1b80d56...,"[(Rich Franklin, Wanderlei Silva), (Cezar Ferr...","[[122, 58], [42, 38], [36, 32], [31, 3], [19, ...","[[1, 0], [0, 0], [0, 0], [0, 0], [5, 0], [0, 0...","[[0, 0], [0, 0], [0, 1], [0, 0], [0, 1], [1, 0...","[[1, 0], [0, 0], [0, 0], [0, 0], [4, 1], [1, 0...","[Catch Weight, Middleweight, Featherweight, He...","[U-DEC, U-DEC, U-DEC, KO/TKO, U-DEC, SUB, KO/T...","[5, 3, 3, 1, 3, 1, 1, 3, 3, 3, 3]","[5:00, 5:00, 5:00, 2:28, 5:00, 2:12, 4:21, 5:0..."
519,2018-11-10,UFC Fight Night: Korean Zombie vs. Rodriguez,[http://www.ufcstats.com/fight-details/0ccd162...,"[(Yair Rodriguez, Chan Sung Jung), (Donald Cer...","[[119, 126], [9, 11], [42, 24], [44, 8], [68, ...","[[0, 0], [0, 1], [0, 0], [4, 0], [1, 0], [1, 1...","[[0, 0], [1, 0], [0, 0], [2, 4], [0, 0], [1, 0...","[[0, 0], [2, 1], [0, 0], [5, 0], [1, 0], [0, 1...","[Featherweight, Welterweight, Women's Bantamwe...","[KO/TKO, SUB, U-DEC, U-DEC, KO/TKO, S-DEC, S-D...","[5, 1, 3, 3, 2, 3, 3, 2, 1, 1, 3, 3]","[4:59, 4:46, 5:00, 5:00, 2:01, 5:00, 5:00, 2:4..."


In [243]:
event_database_df.to_csv('event_database_csv', index=False)

In [18]:
event_database_df = dd.read_csv('event_database_csv')

In [33]:
test = pd.read_html('http://www.ufcstats.com/fighter-details/d28dee5c705991df')
test[0]

Unnamed: 0,W/L,Fighter,Str,Td,Sub,Pass,Event,Method,Round,Time
0,,,,,,,,,,
1,next,Stipe Miocic Daniel Cormier,,Matchup Preview,Matchup Preview,,"UFC 252: Miocic vs. Cormier 3 Aug. 15, 2020",,,
2,win,Stipe Miocic Daniel Cormier,123 181,1 1,0 0,0 2,"UFC 241: Cormier vs. Miocic 2 Aug. 17, 2019",KO/TKO Punches,4.0,4:09
3,loss,Stipe Miocic Daniel Cormier,24 25,1 0,0 0,0 0,"UFC 226: Miocic vs. Cormier Jul. 07, 2018",KO/TKO Punches,1.0,4:33
4,win,Stipe Miocic Francis Ngannou,70 21,6 0,0 1,4 0,"UFC 220: Miocic vs. Ngannou Jan. 20, 2018",U-DEC,5.0,5:00
5,win,Stipe Miocic Junior Dos Santos,26 12,0 0,0 0,0 0,"UFC 211: Miocic vs. Dos Santos May. 13, 2017",KO/TKO Punch,1.0,2:22
6,win,Stipe Miocic Alistair Overeem,34 9,1 0,0 1,0 0,"UFC 203: Miocic vs. Overeem Sep. 10, 2016",KO/TKO Punch,1.0,4:27
7,win,Stipe Miocic Fabricio Werdum,20 15,0 0,0 0,0 0,"UFC 198: Werdum vs Miocic May. 14, 2016",KO/TKO Punch,1.0,2:47
8,win,Stipe Miocic Andrei Arlovski,11 7,0 0,0 0,0 0,"UFC 195: Lawler vs Condit Jan. 02, 2016",KO/TKO Punch,1.0,0:54
9,win,Stipe Miocic Mark Hunt,113 33,6 0,0 0,5 0,"UFC Fight Night: Miocic vs Hunt May. 09, 2015",KO/TKO Punches,5.0,2:47


In [34]:
test[1]

IndexError: list index out of range

--------------------------------------FIX IT FIX IT FIX IT FIX IT FIX IT FIX IT--------------------------------------------