# UFC_Stats Web Scraping

## importing pertinent libraries

In [6]:
#importing pertinent libraries
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests, re
import json
import itertools
import dask.dataframe as dd
from datetime import datetime

In [7]:
from datetime import datetime

today = datetime.today()
current_date= today.strftime("%B %d, %Y")
current_datetime = datetime.strptime(current_date, "%B %d, %Y" ) 

## Extracting UFC urls

In [8]:
#creating a list of unique fighter URLs available on website
fighter_url_list = []
response = requests.get('http://www.ufcstats.com/statistics/fighters?char=a&page=all')
soup = BeautifulSoup(response.content, 'html.parser')

#targeting URLs that contain fighter data
fighters = soup.findAll('a', attrs={'href': re.compile('http:')})
for fighter in fighters:
    fighter_url_list.append(fighter.get('href'))

#removing duplicate URLs
fighter_url_list = list(set(fighter_url_list))

In [9]:
fighter_url_list[:15]

['http://www.ufcstats.com/fighter-details/2b074403b7c6cdb4',
 'http://www.ufcstats.com/fighter-details/c136b2a8852da5bd',
 'http://www.ufcstats.com/fighter-details/1e38bba6738b7b10',
 'http://www.ufcstats.com/fighter-details/93fe7332d16c6ad9',
 'http://www.ufcstats.com/fighter-details/210935fd21670f6d',
 'http://www.ufcstats.com/fighter-details/8753e125f4499816',
 'http://www.ufcstats.com/fighter-details/7bd94b60d7521e4a',
 'http://www.ufcstats.com/fighter-details/2dea80c069847321',
 'http://www.ufcstats.com/fighter-details/af997f7611673880',
 'http://www.ufcstats.com/fighter-details/6ebe96a116e79e52',
 'http://www.ufcstats.com/fighter-details/73ef22f25d0f70e2',
 'http://www.ufcstats.com/fighter-details/669a3cb6e394f515',
 'http://www.ufcstats.com/fighter-details/1562b12763cc8d67',
 'http://www.ufcstats.com/fighter-details/87a1dc546b1c5caf',
 'http://www.ufcstats.com/fighter-details/20821819c401ced8']

Looks like there are some URLs we don't need in out list. Let's weed them out.

In [434]:
#removing invalid URLs
for url in fighter_url_list:
    if 'fighter-details' not in url:
        fighter_url_list.remove(url)

## Creating Dataframe Building Blocks

In [11]:
def info_generator(url):
    '''Meant to parse any url via beuatiful soup
    
    Args:
        url(str)
        '''
    
    response = requests.get(url)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    return soup

In [12]:
#creating a soup list to reduce number of website requests
basic_soup_list = list(map(info_generator, fighter_url_list))

In [13]:
def fighter_history_fetcher(soup):
    '''This function is meant to extract the html information for each fighter as a list of urls. '''
    
    soup_holder = soup.findAll('a', attrs={'href': re.compile('fight-details')})
    
    list_of_fights = []
    
    for x in soup_holder:
        fight_holder = []
        
        fight_holder.append(x.get('href'))
        
        list_of_fights.append(fight_holder) 
        
    return list_of_fights

### Generating basic statistics lists

In [9]:
def base_stats_soup_generator(soup):
    '''Meant to identiy statistics in the context of UFC Stats urls.
    
    Args:
        soup(BeautifulSoup object) : must be an unaltered soup'''
    
    base_stats = soup.findChildren('li', attrs={'class': re.compile('b-list')})
    
    return base_stats

In [10]:
stats_soups = list(map(base_stats_soup_generator, basic_soup_list))

In [11]:
fighter_history_url_list = list(map(fighter_history_fetcher, basic_soup_list))
fighter_history_url_list[:3]

[[],
 [['http://www.ufcstats.com/fight-details/97125cd6a94b6394'],
  ['http://www.ufcstats.com/fight-details/934b44ddcb3d95e6']],
 [['http://www.ufcstats.com/fight-details/2c7b788effdc1b4c']]]

fighter_history_url_list generated lists 

In [12]:
def compound_flattener(fight_url_list):
    flattened_fight_urls = list(itertools.chain(*fight_url_list))
    return flattened_fight_urls

In [13]:
fighter_history_url_list = list(map(compound_flattener, fighter_history_url_list))

In [14]:
fighter_history_url_list[:3]

[[],
 ['http://www.ufcstats.com/fight-details/97125cd6a94b6394',
  'http://www.ufcstats.com/fight-details/934b44ddcb3d95e6'],
 ['http://www.ufcstats.com/fight-details/2c7b788effdc1b4c']]

### Basic Statistics DataFrame Construction

In [17]:
def fighter_name(soup):
    
    name = soup.find('span', attrs={'class': re.compile('b-content__title-highlight')})
    
    return name.text.strip()#[0].text.strip()

In [18]:
def fighter_record(soup):
    
    record = soup.findChildren('span', attrs={'class': re.compile('b-content__title-record')})
    
    return record[0].text.strip().split(':')[1].strip()

In [19]:
def basic_stats_height(soup):
    '''soups run thorugh this function should be run through stats_soup function beforehand.'''
    
    #feet
    height = soup[0].text.strip().split(':')[1].strip().split(' ')[0].replace("'", "")
    
   #inches
    if height == '--':
        pass
    else:
        
        height = int(height)
        
        inches = int(soup[0].text.strip().split(':')[1].strip().split(' ')[1].replace("'", "").replace('"', '').replace("--", "0"))

        height_to_inches = (height * 12) 
    
        final_height = height_to_inches + inches
    
        return (inches + height_to_inches)

In [20]:
def basic_stats_weight(soup):
    
    weight = soup[1].text.strip().split(':')[1].strip().replace(' lbs.', '')
    
    if weight == '--':
        pass
    else:
        weight = int(weight)
    return weight 

In [21]:
def basic_stats_reach(soup):
    
    reach = soup[2].text.strip().split(':')[1].strip().replace('"', '')
    
    if reach == '--':
        pass
    else:
        reach = int(reach)
    return reach

In [22]:
def basic_stats_stance(soup):
    
    stance = soup[3].text.strip().split(':')[1].strip()
    return stance

In [23]:
def basic_stats_DOB(soup):
    
    DOB = soup[4].text.strip().split(':')[1].strip()
    return DOB

In [24]:
fighter_names = list(map(fighter_name, basic_soup_list))

fighter_records = list(map(fighter_record, basic_soup_list))

fighter_heights =  list(map(basic_stats_height, stats_soups))

fighter_weights = list(map(basic_stats_weight, stats_soups))

fighter_reachs = list(map(basic_stats_reach, stats_soups))

fighter_stances = list(map(basic_stats_stance, stats_soups))

fighter_DOBs = list(map(basic_stats_DOB, stats_soups))

In [25]:
fighter_basic_stats_df = pd.DataFrame()

fighter_basic_stats_df['name'] = fighter_names

fighter_basic_stats_df['record'] = fighter_records

fighter_basic_stats_df['height'] = fighter_heights

fighter_basic_stats_df['weight'] = fighter_weights

fighter_basic_stats_df['reach'] = fighter_reachs

fighter_basic_stats_df['stance'] = fighter_stances

fighter_basic_stats_df['DOB'] = fighter_DOBs

In [26]:
fighter_basic_stats_df.head()

Unnamed: 0,name,record,height,weight,reach,stance,DOB
0,Jim Alers,13-3-0 (1 NC),69.0,145,71,Orthodox,"Oct 14, 1986"
1,Mike Aina,12-6-1 (1 NC),69.0,155,--,Orthodox,--
2,Alex Andrade,10-5-0,71.0,200,--,Orthodox,"May 14, 1974"
3,Juan Alcain,1-2-0,,--,--,,--
4,Jose Aldo,28-6-0,67.0,135,70,Orthodox,"Sep 09, 1986"


In [27]:
fighter_basic_stats_df.to_csv('fighter_basic_stats_csv', index=False)

### Career Statistics DataFrame Construction

In [28]:
def career_stats_soup(soup):
    
    career_stats_soup = soup.findChildren('li', attrs={'class': re.compile('b-list')})
    career_stats = career_stats_soup[5:]
    
    return career_stats_soup

In [29]:
def career_stats_SLpM(soup):

    SLpM = soup[5].text.split(':')[1].replace('%', '').strip()
    
    if SLpM == '--':
        pass
    else:
        SLpM = float(SLpM)
        
    return SLpM

In [30]:
def career_stats_StrAcc(soup):
    StrAcc =  soup[6].text.split(':')[1].replace('%', '').strip()
    
    if StrAcc == '--':
        pass
    else:
        StrAcc = float(StrAcc)
        
    return StrAcc

In [31]:
def career_stats_SApM(soup):
    SApM = soup[7].text.split(':')[1].strip()
    
    if SApM == '--':
        pass
    else:
        SApM = float(SApM)
        
    return SApM

In [32]:
def career_stats_StrDef(soup):
    StrDef = soup[8].text.split(':')[1].replace('%', '').strip()
    
    if StrDef == '--':
        pass
    else:
        StrDef = float(StrDef)
        
    return StrDef

In [33]:
def career_stats_TDAvg(soup):
    TDAvg = soup[10].text.split(':')[1].strip()
    
    if TDAvg == '--':
        pass
    else:
        TDAvg = float(TDAvg)
        
    return TDAvg

In [34]:
def career_stats_TDAcc(soup):
    TDAcc = soup[11].text.split(':')[1].strip().replace('%', '')
    
    if TDAcc == '--':
        pass
    else:
        TDAcc = float(TDAcc)
        
    return TDAcc

In [35]:
def career_stats_TDDef(soup):
    TDDef = soup[12].text.split(':')[1].replace('%', '').strip()
    
    if TDDef == '--':
        pass
    else:
        TDDef = float(TDDef)
        
    return TDDef

In [36]:
def career_stats_SubAvg(soup):
    SubAvg =  soup[13].text.split(':')[1].strip().replace('%', '')  
    
    if SubAvg == '--':
        pass
    else:
        SubAvg = float(SubAvg)
        
    return SubAvg

In [37]:
fighter_SLpMs = list(map(career_stats_SLpM, stats_soups))

fighter_StrAccs = list(map(career_stats_StrAcc, stats_soups))

fighter_SApMs = list(map(career_stats_SApM, stats_soups))

fighter_StrDefs =list(map(career_stats_StrDef, stats_soups))

fighter_TDAvgs = list(map(career_stats_TDAvg, stats_soups))

fighter_TDAccs = list(map(career_stats_TDAcc, stats_soups))

fighter_TDDefs = list(map(career_stats_TDDef, stats_soups))

fighter_SubAvgs = list(map(career_stats_SubAvg, stats_soups))

In [38]:
fighter_career_stats_df = pd.DataFrame()

fighter_career_stats_df['name'] = fighter_names

fighter_career_stats_df['SLpMs'] = fighter_SLpMs

fighter_career_stats_df['StrAccs'] = fighter_StrAccs

fighter_career_stats_df['SApMs'] = fighter_SApMs

fighter_career_stats_df['StrDefs'] = fighter_StrDefs

fighter_career_stats_df['TDAvgs'] = fighter_TDAvgs

fighter_career_stats_df['TDAccs'] = fighter_TDAccs

fighter_career_stats_df['TDDefs'] = fighter_TDDefs

fighter_career_stats_df['SubAvgs'] = fighter_SubAvgs

In [39]:
fighter_career_stats_df.head()

Unnamed: 0,name,SLpMs,StrAccs,SApMs,StrDefs,TDAvgs,TDAccs,TDDefs,SubAvgs
0,Jim Alers,2.9,40.0,4.38,56.0,2.26,41.0,80.0,0.3
1,Mike Aina,2.87,33.0,6.33,59.0,0.0,0.0,71.0,0.0
2,Alex Andrade,0.2,36.0,2.6,53.0,0.0,0.0,25.0,0.8
3,Juan Alcain,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Jose Aldo,3.47,44.0,3.18,65.0,0.64,65.0,91.0,0.1


In [40]:
fighter_career_stats_df.to_csv('fighter_career_stats_csv', index=False)

### Constructing Fight Event and Fight List Database

In [14]:
#creating a list of unique fighter URLs available on website
event_url_list = []
response = requests.get('http://www.ufcstats.com/statistics/events/completed?page=all')
soup = BeautifulSoup(response.content, 'html.parser')

#targeting URLs that contain fighter data
events = soup.findAll('a', attrs={'href': re.compile('http:')})
for event in events:
    event_url_list.append(event.get('href'))

#removing duplicate URLs
event_url_list = list(set(event_url_list))

In [443]:
#removing invalid URLs
for url in event_url_list:
    if 'event-details' not in url:
        event_url_list.remove(url)
    
for url in event_url_list:  
    if len(url) <  54:
        event_url_list.remove(url)

In [None]:
event_soups = list(map(info_generator, event_url_list))

In [17]:
def event_date_fetcher(event_soup):
    date_holder = event_soup.findChildren('li', attrs={'class': re.compile('b-list__')})
    
    try:
        date = date_holder[0].text.split(':')[1].strip()
        
        return date 
        
    except:
        
        pass    

In [18]:
def event_fights_parser(event_soup):
        
    holder = event_soup.findAll('a', attrs={'href': re.compile('fight-details')})
    
    event_fights_holder = []
    
    for x in range(0, len(holder)):
        
        event_fights_holder.append(holder[x].get('href'))
        
    return event_fights_holder

In [19]:
def event_name_fetcher(event_soup):
    event_title = event_soup.find('h2', attrs={'class': re.compile('b-content__')}).text.strip()
    return event_title

In [20]:
event_database_df = pd.DataFrame()

#adding event_urls to dataframe
event_database_df['event'] = event_url_list

#adding event_dates to dataframe and converting to time series objects
event_dates_list = list(map(event_date_fetcher, event_soups))

event_database_df['date'] = event_dates_list

event_database_df['date'] = pd.to_datetime(event_database_df['date'])

#adding event_name to dataframe
event_names_list = list(map(event_name_fetcher,event_soups))

event_database_df['name'] = event_names_list

In [21]:
event_database_df_dropped = event_database_df[event_database_df.date > current_datetime]

pending_events_indexes = event_database_df_dropped.index.tolist()

for index in pending_events_indexes:
    event_soups.pop(index)

In [22]:
event_database_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 521 entries, 0 to 520
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   event   521 non-null    object        
 1   date    521 non-null    datetime64[ns]
 2   name    521 non-null    object        
dtypes: datetime64[ns](1), object(2)
memory usage: 12.3+ KB


In [111]:
def fight_participants_fetcher(event_soup):
    
    fight_pairs = []
    
    name_holder = event_soup.findChildren('a', attrs={'class': re.compile('b-link')})

    fighter1_list_holder, fighter2_list_holder = name_holder[0::2], name_holder[1::2]

    fighter1_list = []
    fighter2_list = []

    for fighter1 in fighter1_list_holder:
        fighter1_list.append(fighter1.text.strip())
        
    for fighter2 in fighter2_list_holder:
        fighter2_list.append(fighter2.text.strip())
      
    fighters_in_ring = zip(fighter1_list, fighter2_list)
    
    for duo in fighters_in_ring:
        fight_pairs.append(duo)
        
    return fight_pairs

In [24]:
event_fighter_pairs_list = list(map(fight_participants_fetcher, event_soups))

In [114]:
def fight_strikes(event_soup):
        
    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    event_strike_list = []
    
    for x in range(0, len(parser)):

        holder = parser[x].findAll('p', attrs={'class': re.compile('b-fight-details__table-text')})
    
        fighter1_strikes = holder[3].text.strip()
        fighter2_strikes = holder[4].text.strip()
        
        fighters_strikes = (int(fighter1_strikes), int(fighter2_strikes))
        
        event_strike_list.append(fighters_strikes)
    
    return event_strike_list

In [116]:
def fight_takedowns(event_soup):
        
    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    event_takedowns_list = []
    
    for x in range(0, len(parser)):

        holder = parser[x].findAll('p', attrs={'class': re.compile('b-fight-details__table-text')})
    
        fighter1_takedowns = holder[5].text.strip()
        fighter2_takedowns = holder[6].text.strip()
        
        fighters_takedowns = (int(fighter1_takedowns), int(fighter2_takedowns))
        
        event_takedowns_list.append(fighters_takedowns)
    
    return event_takedowns_list

In [117]:
def fight_submissions(event_soup):    
    
    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})

    event_submissions_list = []

    for x in range(0, len(parser)):

        holder = parser[x].findAll('p', attrs={'class': re.compile('b-fight-details__table-text')})

        fighter1_submissions = holder[7].text.strip()
        fighter2_submissions = holder[8].text.strip()

        fighters_submissions = (int(fighter1_submissions), int(fighter2_submissions))

        event_submissions_list.append(fighters_submissions)

    return event_submissions_list


In [118]:
def fight_passes(event_soup):
    
    parser = event_soups.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})

    event_passes_list = []

    for x in range(0, len(parser)):

        holder = parser[x].findAll('p', attrs={'class': re.compile('b-fight-details__table-text')})

        fighter1_passes = holder[9].text.strip()
        fighter2_passes = holder[10].text.strip()

        fighters_passes = (int(fighter1_passes), int(fighter2_passes))

        event_passes_list.append(fighters_passes)

    return event_passes_list

In [191]:
def event_fight_weight_classes(event_soup):
    
    event_weight_class_list = []

    parser = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})

    for x in range(0, len(parser)):

        holder = parser[x].findAll('td', attrs={'class': re.compile('b-fight-details')})

        weight_class = holder[6].text.strip()

        event_weight_class_list.append(weight_class)

    return event_weight_class_list
    
    #for x in range(0, len(parser)):
     #   holder = parser[x].findAll('p', attrs={'class': re.compile('b-fight-details__table-text')})
        
      #  print(holder[11].text.strip())

In [306]:
def fight_win_method(event_soup):
        
    win_methods_holder = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    win_methods_list = []    
    
    for x in range(0, len(win_methods_holder)):
        
        win_method_holder = win_methods_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[7]
        
        win_method = win_method_holder.findAll('p', attrs={'class': re.compile('b-fight-details')})[0].text.strip()
        
        win_methods_list.append(win_method)
    
    return win_methods_list

In [368]:
def fight_win_method_details(event_soup):
        
    win_methods_holder = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    win_methods_list = []    
    
    for x in range(0, len(win_methods_holder)):
        
        win_method_holder = win_methods_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[7]
        
        win_method = win_method_holder.findAll('p', attrs={'class': re.compile('b-fight-details')})[1].text.strip()
        
        win_methods_list.append(win_method)
    
    return win_methods_list

In [397]:
def fight_final_round(event_soup):
    
    ending_round_holder = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    ending_round_list = []
    
    for x in range(0, len(ending_round_holder)):
        
        ending_round = int(ending_round_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[8].text.strip())
        
        ending_round_list.append(ending_round)
        
    return ending_round_list
    

In [427]:
def fight_final_round_time(event_soup):
    
    ending_round_time_list = []
    
    ending_round_time_holder = event_soup.findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})
    
    for x in range(0, len(ending_round_time_holder)):
    
        ending_round_time = ending_round_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[-1].text.strip()
        
        ending_round_time_list.append(ending_round_time)

    return ending_round_time_list

In [442]:
test = list(map(fight_final_round_time, event_soups))

IndexError: list index out of range

--------------------------------------FIX IT FIX IT FIX IT FIX IT FIX IT FIX IT--------------------------------------------

In [433]:
event_soups[8]

<!DOCTYPE html>

<!--[if lt IE 7]>      <html class="no-js lt-ie9 lt-ie8 lt-ie7"> <![endif]-->
<!--[if IE 7]>         <html class="no-js lt-ie9 lt-ie8"> <![endif]-->
<!--[if IE 8]>         <html class="no-js ie8 lt-ie9"> <![endif]-->
<!--[if gt IE 8]><!--> <html class="no-js"> <!--<![endif]-->
<head>
<meta charset="utf-8"/>
<meta content="IE=edge" http-equiv="X-UA-Compatible"/>
<title>
    Stats | UFC
  </title>
<meta content="" name="description"/>
<meta content="" name="viewport"/>
<link href="/blocks/main.css?ver=332441" rel="stylesheet"/>
<script src="/js/vendor/modernizr-2.6.2.min.js"></script>
<script>
    (function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
    (i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
    m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
    })(window,document,'script','//www.google-analytics.com/analytics.js','ga');

    ga('create', 'UA-2855164-1', 'auto');
    ga('send

In [402]:
test = list(map(fight_final_round, event_soups))
test

[[2, 5, 1, 1, 1, 2, 1, 2, 3],
 [2, 2, 1, 1, 1, 1, 2, 1, 3, 3, 2, 1],
 [3, 3, 2, 1, 3, 2, 3, 3, 3, 1, 3],
 [1, 3, 3, 1, 3, 1, 1, 2, 3, 3, 2],
 [3, 3, 2, 3, 2, 3, 1, 3, 3, 3, 3],
 [3, 1, 3, 1, 2, 2, 1, 3, 3, 2],
 [2, 3, 2, 2, 2, 1, 1],
 [5, 1, 3, 3, 3, 1, 1, 3, 3, 3, 1, 3],
 [2, 3, 1, 1, 3, 3, 3, 2, 3, 3, 1, 2],
 [5, 3, 1, 1, 3, 3, 3, 3, 3, 3, 2],
 [1, 3, 2, 3, 3, 3, 3, 3, 2, 3],
 [4, 1, 2, 3, 3, 3, 3, 1, 2, 2, 3, 2],
 [4, 3, 3, 1, 3, 2, 3, 1, 3, 1, 3, 1],
 [5, 1, 3, 3, 3, 3, 3, 1, 3, 1],
 [3, 3, 3, 2, 3, 3, 3, 3, 1, 3, 1, 3, 3],
 [3, 1, 1, 1, 3, 1, 1, 3, 2, 1],
 [5, 2, 2, 3, 3, 1, 1, 1, 3, 3, 2, 1],
 [2, 2, 2, 1, 1, 1, 2, 2, 1],
 [1, 3, 3, 3, 2, 3, 1, 3, 3],
 [1, 1, 3, 1, 1, 2, 2, 2, 1],
 [5, 1, 1, 1, 2, 3, 1, 1, 2, 3, 3, 1],
 [1, 3, 1, 1, 3, 3, 3, 3, 3, 3, 2, 3],
 [2, 1, 2, 2, 3, 3, 3, 1, 3, 1, 3],
 [2, 3, 2, 3, 1, 1, 3, 3, 3, 3, 3, 1, 3],
 [5, 3, 1, 1, 3, 3, 1, 1, 1, 3, 1, 1],
 [1, 3, 3, 1, 3, 2, 1, 3, 1, 3],
 [4, 1, 3, 3, 3, 1, 1, 1, 3, 3],
 [1, 5, 3, 2, 3, 2, 3, 2, 1, 3, 3, 1],
 [1,

In [388]:
ending_round_list = []
    
ending_round_holder = event_soups[2].findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})   
        
for x in range(0, len(ending_round_holder)):
    
    ending_round = int(ending_round_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[8].text.strip())
    
    ending_round_list.append(ending_round)
    
ending_round_list

[3, 3, 2, 1, 3, 2, 3, 3, 3, 1, 3]

In [342]:
len(ending_round_holder)

0

In [300]:
win_methods_holder = event_soups[2].findAll('tr', attrs={'class': 'b-fight-details__table-row b-fight-details__table-row__hover js-fight-details-click'})

for x in range(0, len(win_methods_holder)):
    win_method_holder = win_methods_holder[x].findAll('td', attrs={'class': re.compile('b-fight-details')})[7]
    
    win_method = win_method_holder.findAll('p', attrs={'class': re.compile('b-fight-details')})[0].text.strip()
    win_method_details = win_method_holder.findAll('p', attrs={'class': re.compile('b-fight-details')})[1].text.strip()
    
    print(win_method, win_method_details)

SUB Rear Naked Choke
U-DEC 
CNC 
SUB Guillotine Choke
S-DEC 
SUB Armbar
U-DEC 
U-DEC 
U-DEC 
SUB Armbar
U-DEC 


--------------------------------------FIX IT FIX IT FIX IT FIX IT FIX IT FIX IT--------------------------------------------

In [133]:
event_fight_list = list(map(event_fights_parser, event_soups))

matchup_list = list(map(fight_participants_fetcher, event_soups))

fighter_strike_count_list = list(map(fight_strikes,event_soups))
    
fight_takedown_list = list(map(fight_takedowns,event_soups))

#fight_submission_list = list(map(fight_submissions,event_soups))
    
#fight_pass_list = list(map(fight_passes,event_soups))

#fight_weigth_class_list = list(map(fight_weight_class,event_soups))
    
fight_win_method_list = list(map(fight_win_method,event_soups))

#fight_final_round_list = list(map(fight_final_round,event_soups))
    
fight_final_round_time_list = list(map(fight_final_round_time,event_soups))

In [137]:
len(fighter_strike_count_list)

520

In [20]:
event_database_df['matchups'] = matchup

In [17]:
event_database_df['fight_list'] = event_fight_list

In [135]:
event_database_df.to_csv('event_database_csv', index=False)

PROTOCODE ==================================

In [None]:
except:
        fighter1_strikes = event_soups.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[4].text.strip()
        fighter2_strikes = event_soups.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[5].text.strip()

        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
        fighter2_strikes = 0 if ''== fighter1_strikes else int(fighter2_strikes)

        print(number, fighter1_strikes, fighter2_strikes) '''try: 
        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
    except:
        print(f'Edge Case: {number}, {event_url_list[number]}')
        edge_case_number.append(number)
        
    try: 
        fighter2_strikes = 0 if ''==fighter1_strikes else int(fighter2_strikes)
    except:
        print(f'Edge Case: {number}, {event_url_list[number]}')
        edge_case_number.append(number)'''

In [137]:
edge_case_number = []
for number, event_soup in enumerate(event_soups):
    fighter1_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[3].text.strip()
    fighter2_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[4].text.strip()
    
    try: 
        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
        fighter2_strikes = 0 if ''== fighter1_strikes else int(fighter2_strikes)
    except:
        fighter1_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[4].text.strip()
        fighter2_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[5].text.strip()
        
        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
        fighter2_strikes = 0 if ''== fighter1_strikes else int(fighter2_strikes)
    
    print(number, fighter1_strikes, fighter2_strikes)

0 88 58
1 59 27
2 90 13
3 124 42
4 168 53
5 20 14
6 86 72
7 6 13
8 104 55
9 43 20
10 25 21
11 48 5
12 55 32
13 2 0
14 6 2
15 96 80
16 54 55
17 23 23
18 19 7
19 28 4
20 22 12
21 209 123
22 7 0
23 13 11
24 12 4
25 138 90
26 97 42
27 112 109
28 2 0
29 74 12
30 54 53
31 1 1
32 65 58
33 28 5
34 18 20
35 26 2
36 53 38
37 83 48
38 40 32
39 53 34
40 41 13
41 84 14
42 62 51
43 21 23
44 112 43
45 290 110
46 68 29
47 14 6
48 59 43
49 85 78
50 36 39
51 59 16
52 27 19
53 158 150
54 88 16
55 6 4
56 25 46
57 102 27
58 134 41
59 29 16
60 42 3
61 16 0
62 59 22
63 37 30
64 46 22
65 127 61
66 76 45
67 13 27
68 61 42
69 13 13
70 24 7
71 22 19
72 86 77
73 4 1
74 8 40
75 138 53
76 94 10
77 31 11
78 0 0
79 29 7
80 41 13
81 29 38
82 9 4
83 130 62
84 21 10
85 68 67
86 32 9
87 19 9
88 17 9
89 23 6
90 13 10
91 16 2
92 57 0
93 79 72
94 18 9
95 39 21
96 10 26
97 68 43
98 23 4
99 42 18
100 67 32
101 63 11
102 31 47
103 17 8
104 71 36
105 23 13
106 31 23
107 24 60
108 90 57
109 122 110
110 27 20
111 134 110
112 110 

In [76]:
edge_case_list = sorted(set(edge_case_number))
edge_case_list

[96, 222, 228, 281, 324, 354, 476]

In [None]:
Kelvin Gastelum