# UFC_Stats Web Scraping

## importing pertinent libraries

In [1]:
#importing pertinent libraries
import pandas as pd
from urllib.request import urlopen
from bs4 import BeautifulSoup
import requests, re
import json
import itertools
import dask.dataframe as dd
from datetime import datetime

In [2]:
from datetime import datetime

today = datetime.today()
current_date= today.strftime("%B %d, %Y")
current_datetime = datetime.strptime(current_date, "%B %d, %Y" ) 

## Extracting UFC urls

In [3]:
#creating a list of unique fighter URLs available on website
fighter_url_list = []
response = requests.get('http://www.ufcstats.com/statistics/fighters?char=a&page=all')
soup = BeautifulSoup(response.content, 'html.parser')

#targeting URLs that contain fighter data
fighters = soup.findAll('a', attrs={'href': re.compile('http:')})
for fighter in fighters:
    fighter_url_list.append(fighter.get('href'))

#removing duplicate URLs
fighter_url_list = list(set(fighter_url_list))

In [4]:
fighter_url_list[:15]

['http://www.ufcstats.com/fighter-details/c487223b0289bda9',
 'http://www.ufcstats.com/fighter-details/da603332ad41f165',
 'http://www.ufcstats.com/fighter-details/ae071698e1a3ccd4',
 'http://www.ufcstats.com/fighter-details/2b074403b7c6cdb4',
 'http://www.ufcstats.com/fighter-details/a77633a989013265',
 'http://www.ufcstats.com/fighter-details/d317a5e2b3f88c5f',
 'http://www.ufcstats.com/fighter-details/70fa1c64a2c439ef',
 'http://www.ufcstats.com/statistics/fighters?char=a&page=4',
 'http://www.ufcstats.com/fighter-details/e70de1859b7ee78e',
 'http://www.ufcstats.com/fighter-details/36541f1e6c5d4955',
 'http://www.ufcstats.com/fighter-details/26387c19f32dda0f',
 'http://www.ufcstats.com/fighter-details/fd7acf42bd6e7e95',
 'http://www.ufcstats.com/fighter-details/4b37a0bc2ab4cae1',
 'http://www.ufcstats.com/fighter-details/dbd198f780286aca',
 'http://www.ufcstats.com/fighter-details/e741536153227386']

Looks like there are some URLs we don't need in out list. Let's weed them out.

In [5]:
#removing invalid URLs
for url in fighter_url_list:
    if 'fighter-details' not in url:
        fighter_url_list.remove(url)

## Creating Dataframe Building Blocks

In [6]:
def info_generator(url):
    '''Meant to parse any url via beuatiful soup
    
    Args:
        url(str)
        '''
    
    response = requests.get(url)
    
    soup = BeautifulSoup(response.content, 'html.parser')
    
    return soup

In [7]:
#creating a soup list to reduce number of website requests
basic_soup_list = list(map(info_generator, fighter_url_list))

In [8]:
def fighter_history_fetcher(soup):
    '''This function is meant to extract the html information for each fighter as a list of urls. '''
    
    soup_holder = soup.findAll('a', attrs={'href': re.compile('fight-details')})
    
    list_of_fights = []
    
    for x in soup_holder:
        fight_holder = []
        
        fight_holder.append(x.get('href'))
        
        list_of_fights.append(fight_holder) 
        
    return list_of_fights

### Generating basic statistics lists

In [9]:
def base_stats_soup_generator(soup):
    '''Meant to identiy statistics in the context of UFC Stats urls.
    
    Args:
        soup(BeautifulSoup object) : must be an unaltered soup'''
    
    base_stats = soup.findChildren('li', attrs={'class': re.compile('b-list')})
    
    return base_stats

In [10]:
stats_soups = list(map(base_stats_soup_generator, basic_soup_list))

In [11]:
fighter_history_url_list = list(map(fighter_history_fetcher, basic_soup_list))
fighter_history_url_list[:3]

[[['http://www.ufcstats.com/fight-details/1d26ceb1c995655e'],
  ['http://www.ufcstats.com/fight-details/629c0eb319e01374'],
  ['http://www.ufcstats.com/fight-details/b125025990755616']],
 [['http://www.ufcstats.com/fight-details/704cf5fe7c155e66']],
 [['http://www.ufcstats.com/fight-details/58e72b1a38880ba9']]]

fighter_history_url_list generated lists 

In [12]:
def compound_flattener(fight_url_list):
    flattened_fight_urls = list(itertools.chain(*fight_url_list))
    return flattened_fight_urls

In [13]:
fighter_history_url_list = list(map(compound_flattener, fighter_history_url_list))

In [14]:
fighter_history_url_list[:3]

[['http://www.ufcstats.com/fight-details/1d26ceb1c995655e',
  'http://www.ufcstats.com/fight-details/629c0eb319e01374',
  'http://www.ufcstats.com/fight-details/b125025990755616'],
 ['http://www.ufcstats.com/fight-details/704cf5fe7c155e66'],
 ['http://www.ufcstats.com/fight-details/58e72b1a38880ba9']]

### Basic Statistics DataFrame Construction

In [17]:
def fighter_name(soup):
    
    name = soup.find('span', attrs={'class': re.compile('b-content__title-highlight')})
    
    return name.text.strip()#[0].text.strip()

In [18]:
def fighter_record(soup):
    
    record = soup.findChildren('span', attrs={'class': re.compile('b-content__title-record')})
    
    return record[0].text.strip().split(':')[1].strip()

In [19]:
def basic_stats_height(soup):
    '''soups run thorugh this function should be run through stats_soup function beforehand.'''
    
    #feet
    height = soup[0].text.strip().split(':')[1].strip().split(' ')[0].replace("'", "")
    
   #inches
    if height == '--':
        pass
    else:
        
        height = int(height)
        
        inches = int(soup[0].text.strip().split(':')[1].strip().split(' ')[1].replace("'", "").replace('"', '').replace("--", "0"))

        height_to_inches = (height * 12) 
    
        final_height = height_to_inches + inches
    
        return (inches + height_to_inches)

In [20]:
def basic_stats_weight(soup):
    
    weight = soup[1].text.strip().split(':')[1].strip().replace(' lbs.', '')
    
    if weight == '--':
        pass
    else:
        weight = int(weight)
    return weight 

In [21]:
def basic_stats_reach(soup):
    
    reach = soup[2].text.strip().split(':')[1].strip().replace('"', '')
    
    if reach == '--':
        pass
    else:
        reach = int(reach)
    return reach

In [22]:
def basic_stats_stance(soup):
    
    stance = soup[3].text.strip().split(':')[1].strip()
    return stance

In [23]:
def basic_stats_DOB(soup):
    
    DOB = soup[4].text.strip().split(':')[1].strip()
    return DOB

In [24]:
fighter_names = list(map(fighter_name, basic_soup_list))

fighter_records = list(map(fighter_record, basic_soup_list))

fighter_heights =  list(map(basic_stats_height, stats_soups))

fighter_weights = list(map(basic_stats_weight, stats_soups))

fighter_reachs = list(map(basic_stats_reach, stats_soups))

fighter_stances = list(map(basic_stats_stance, stats_soups))

fighter_DOBs = list(map(basic_stats_DOB, stats_soups))

In [25]:
fighter_basic_stats_df = pd.DataFrame()

fighter_basic_stats_df['name'] = fighter_names

fighter_basic_stats_df['record'] = fighter_records

fighter_basic_stats_df['height'] = fighter_heights

fighter_basic_stats_df['weight'] = fighter_weights

fighter_basic_stats_df['reach'] = fighter_reachs

fighter_basic_stats_df['stance'] = fighter_stances

fighter_basic_stats_df['DOB'] = fighter_DOBs

In [26]:
fighter_basic_stats_df.head()

Unnamed: 0,name,record,height,weight,reach,stance,DOB
0,Jim Alers,13-3-0 (1 NC),69.0,145,71,Orthodox,"Oct 14, 1986"
1,Mike Aina,12-6-1 (1 NC),69.0,155,--,Orthodox,--
2,Alex Andrade,10-5-0,71.0,200,--,Orthodox,"May 14, 1974"
3,Juan Alcain,1-2-0,,--,--,,--
4,Jose Aldo,28-6-0,67.0,135,70,Orthodox,"Sep 09, 1986"


In [27]:
fighter_basic_stats_df.to_csv('fighter_basic_stats_csv', index=False)

### Career Statistics DataFrame Construction

In [28]:
def career_stats_soup(soup):
    
    career_stats_soup = soup.findChildren('li', attrs={'class': re.compile('b-list')})
    career_stats = career_stats_soup[5:]
    
    return career_stats_soup

In [29]:
def career_stats_SLpM(soup):

    SLpM = soup[5].text.split(':')[1].replace('%', '').strip()
    
    if SLpM == '--':
        pass
    else:
        SLpM = float(SLpM)
        
    return SLpM

In [30]:
def career_stats_StrAcc(soup):
    StrAcc =  soup[6].text.split(':')[1].replace('%', '').strip()
    
    if StrAcc == '--':
        pass
    else:
        StrAcc = float(StrAcc)
        
    return StrAcc

In [31]:
def career_stats_SApM(soup):
    SApM = soup[7].text.split(':')[1].strip()
    
    if SApM == '--':
        pass
    else:
        SApM = float(SApM)
        
    return SApM

In [32]:
def career_stats_StrDef(soup):
    StrDef = soup[8].text.split(':')[1].replace('%', '').strip()
    
    if StrDef == '--':
        pass
    else:
        StrDef = float(StrDef)
        
    return StrDef

In [33]:
def career_stats_TDAvg(soup):
    TDAvg = soup[10].text.split(':')[1].strip()
    
    if TDAvg == '--':
        pass
    else:
        TDAvg = float(TDAvg)
        
    return TDAvg

In [34]:
def career_stats_TDAcc(soup):
    TDAcc = soup[11].text.split(':')[1].strip().replace('%', '')
    
    if TDAcc == '--':
        pass
    else:
        TDAcc = float(TDAcc)
        
    return TDAcc

In [35]:
def career_stats_TDDef(soup):
    TDDef = soup[12].text.split(':')[1].replace('%', '').strip()
    
    if TDDef == '--':
        pass
    else:
        TDDef = float(TDDef)
        
    return TDDef

In [36]:
def career_stats_SubAvg(soup):
    SubAvg =  soup[13].text.split(':')[1].strip().replace('%', '')  
    
    if SubAvg == '--':
        pass
    else:
        SubAvg = float(SubAvg)
        
    return SubAvg

In [37]:
fighter_SLpMs = list(map(career_stats_SLpM, stats_soups))

fighter_StrAccs = list(map(career_stats_StrAcc, stats_soups))

fighter_SApMs = list(map(career_stats_SApM, stats_soups))

fighter_StrDefs =list(map(career_stats_StrDef, stats_soups))

fighter_TDAvgs = list(map(career_stats_TDAvg, stats_soups))

fighter_TDAccs = list(map(career_stats_TDAcc, stats_soups))

fighter_TDDefs = list(map(career_stats_TDDef, stats_soups))

fighter_SubAvgs = list(map(career_stats_SubAvg, stats_soups))

In [38]:
fighter_career_stats_df = pd.DataFrame()

fighter_career_stats_df['name'] = fighter_names

fighter_career_stats_df['SLpMs'] = fighter_SLpMs

fighter_career_stats_df['StrAccs'] = fighter_StrAccs

fighter_career_stats_df['SApMs'] = fighter_SApMs

fighter_career_stats_df['StrDefs'] = fighter_StrDefs

fighter_career_stats_df['TDAvgs'] = fighter_TDAvgs

fighter_career_stats_df['TDAccs'] = fighter_TDAccs

fighter_career_stats_df['TDDefs'] = fighter_TDDefs

fighter_career_stats_df['SubAvgs'] = fighter_SubAvgs

In [39]:
fighter_career_stats_df.head()

Unnamed: 0,name,SLpMs,StrAccs,SApMs,StrDefs,TDAvgs,TDAccs,TDDefs,SubAvgs
0,Jim Alers,2.9,40.0,4.38,56.0,2.26,41.0,80.0,0.3
1,Mike Aina,2.87,33.0,6.33,59.0,0.0,0.0,71.0,0.0
2,Alex Andrade,0.2,36.0,2.6,53.0,0.0,0.0,25.0,0.8
3,Juan Alcain,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Jose Aldo,3.47,44.0,3.18,65.0,0.64,65.0,91.0,0.1


In [40]:
fighter_career_stats_df.to_csv('fighter_career_stats_csv', index=False)

### Constructing Fight Event and Fight List Database

In [15]:
#creating a list of unique fighter URLs available on website
event_url_list = []
response = requests.get('http://www.ufcstats.com/statistics/events/completed?page=all')
soup = BeautifulSoup(response.content, 'html.parser')

#targeting URLs that contain fighter data
events = soup.findAll('a', attrs={'href': re.compile('http:')})
for event in events:
    event_url_list.append(event.get('href'))

#removing duplicate URLs
event_url_list = list(set(event_url_list))

In [16]:
#removing invalid URLs
for url in event_url_list:
    if 'event-details' not in url:
        event_url_list.remove(url)
    
for url in event_url_list:  
    if len(url) <  54:
        event_url_list.remove(url)

In [276]:
event_soups = list(map(info_generator, event_url_list))

In [20]:
def event_date_fetcher(event_soup):
    date_holder = event_soup.findChildren('li', attrs={'class': re.compile('b-list__')})
    
    try:
        date = date_holder[0].text.split(':')[1].strip()
        
        return date 
        
    except:
        
        pass    

In [21]:
def event_fights_parser(event_soup):
        
    holder = event_soup.findAll('a', attrs={'href': re.compile('fight-details')})
    
    event_fights_holder = []
    
    for x in range(0, len(holder)):
        
        event_fights_holder.append(holder[x].get('href'))
        
    return event_fights_holder

In [22]:
def event_name_fetcher(event_soup):
    event_title = event_soup.find('h2', attrs={'class': re.compile('b-content__')}).text.strip()
    return event_title

In [110]:
event_database_df = pd.DataFrame()

#adding event_urls to dataframe
event_database_df['event'] = event_url_list

#adding event_dates to dataframe and converting to time series objects
event_dates_list = list(map(event_date_fetcher, event_soups))

event_database_df['date'] = event_dates_list

event_database_df['date'] = pd.to_datetime(event_database_df['date'])

#adding event_name to dataframe
event_names_list = list(map(event_name_fetcher,event_soups))

event_database_df['name'] = event_names_list

In [111]:
event_database_df_dropped = event_database_df[event_database_df.date > current_datetime]

pending_events_indexes = event_database_df_dropped.index.tolist()

for index in pending_events_indexes:
    event_soups.pop(index)

In [115]:
event_database_df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 521 entries, 0 to 520
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype         
---  ------  --------------  -----         
 0   event   521 non-null    object        
 1   date    521 non-null    datetime64[ns]
 2   name    521 non-null    object        
dtypes: datetime64[ns](1), object(2)
memory usage: 12.3+ KB


In [164]:
def fight_participants_fetcher(event_soup):
    
    fight_pairs = []
    
    name_holder = event_soup.findChildren('a', attrs={'class': re.compile('b-link')})

    fighter1_list_holder, fighter2_list_holder = name_holder[0::2], name_holder[1::2]

    fighter1_list = []
    fighter2_list = []

    for fighter1 in fighter1_list_holder:
        fighter1_list.append(fighter1.text.strip())
        
    for fighter2 in fighter2_list_holder:
        fighter2_list.append(fighter2.text.strip())
      
    fighters_in_ring = zip(fighter1_list, fighter2_list)
    
    for duo in fighters_in_ring:
        fight_pairs.append(duo)
        
    return fight_pairs

--------------------------------------FIX IT FIX IT FIX IT FIX IT FIX IT FIX IT--------------------------------------------

In [179]:
print(event_url_list[0])
fight_participants_fetcher(event_soups[0])

http://www.ufcstats.com/event-details/82f5c81f4e3c3eb5


[('Demetrious Johnson', 'John Dodson'),
 ('Andrei Arlovski', 'Frank Mir'),
 ('Anthony Johnson', 'Jimi Manuwa'),
 ('Corey Anderson', 'Jan Blachowicz'),
 ('Paige VanZant', 'Alex Chambers'),
 ('Ross Pearson', 'Paul Felder'),
 ('John Lineker', 'Francisco Rivera'),
 ('Raquel Pennington', 'Jessica Andrade'),
 ('Tiago Trator', 'Clay Collard'),
 ('Joe Riggs', 'Ron Stallings'),
 ('Joaquim Silva', 'Nazareno Malegarie')]

In [286]:
for thing in fighter1_strikes_holder[0::8]:
    print(thing.text.strip())

88
39
25
84
73
62
28
41
52
35
49


In [291]:
test_event_soup = event_soups[0]

fight_strike_pairs = []

fighter1_strikes_holder = test_event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[3::2]
fighter2_strikes_holder = test_event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[4::2]

fighter1_strikes_list = []
fighter2_strikes_list = []

for fighter1 in fighter1_strikes_holder[0::8]:
    fighter1_strikes_list.append(fighter1.text.strip())
    
for fighter2 in fighter2_strikes_holder[0::8]:
    fighter2_strikes_list.append(fighter2.text.strip())


#for fighter2 in fighter2_strikes_holder:
#    fighter2_strikes_list.append(fighter2.text.strip())

fighters_in_ring = zip(fighter1_strikes_list, fighter2_strikes_list)

for duo in fighters_in_ring:
    fight_strike_pairs.append(duo)
print(fight_strike_pairs)

[('88', '58'), ('39', '24'), ('25', '8'), ('84', '20'), ('73', '30'), ('62', '60'), ('28', '16'), ('41', '41'), ('52', '84'), ('35', '24'), ('49', '58')]


In [None]:
try: 
        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
        fighter2_strikes = 0 if ''== fighter1_strikes else int(fighter2_strikes)
    except:
        fighter1_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[4].text.strip()
        fighter2_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[5].text.strip()
        
    fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
    fighter2_strikes = 0 if ''== fighter1_strikes else int(fighter2_strikes)
    
    return fighter1_strikes, fighter2_strikes

In [163]:
fight_strikes(event_soups[0]), event_url_list[0]

((88, 58), 'http://www.ufcstats.com/event-details/82f5c81f4e3c3eb5')

In [153]:
fight_strikes_list = list(map(fight_strikes, event_soups))

In [154]:
def fight_takedowns(event_soup):
        
    fighter1_takedowns = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[5].text.strip()
    fighter2_takedowns = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[6].text.strip()

    
    fighter1_takedowns = 0 if ''==fighter1_takedowns else int(fighter1_takedowns) 
    fighter2_takedowns = 0 if ''==fighter2_takedowns else int(fighter2_takedowns)
    
    return fighter1_takedowns, fighter2_takedowns

In [156]:
fight_takedowns_list = list(map(fight_takedowns, event_soups))

In [159]:
for number, fight_takedown in enumerate(fight_takedowns_list):
    print(number, fight_takedown, event_url_list[number])

0 (4, 1) http://www.ufcstats.com/event-details/82f5c81f4e3c3eb5
1 (3, 0) http://www.ufcstats.com/event-details/a25b71fe5e31fa97
2 (3, 0) http://www.ufcstats.com/event-details/1c5879330d42255f
3 (6, 0) http://www.ufcstats.com/event-details/4c12aa7ca246e7a4
4 (1, 1) http://www.ufcstats.com/event-details/8dc4f34c1f50d00d
5 (0, 5) http://www.ufcstats.com/event-details/4a01dc8376736ef5
6 (2, 0) http://www.ufcstats.com/event-details/d86e913c548c07c2
7 (0, 0) http://www.ufcstats.com/event-details/9b5b5a75523728f3
8 (0, 0) http://www.ufcstats.com/event-details/1507214bbc7a79e2
9 (0, 3) http://www.ufcstats.com/event-details/9ccdd2ce45903f34
10 (1, 3) http://www.ufcstats.com/event-details/0ff11cc094e887bc
11 (1, 1) http://www.ufcstats.com/event-details/f1f9e48a0d150757
12 (0, 0) http://www.ufcstats.com/event-details/b732b326c362fb62
13 (0, 0) http://www.ufcstats.com/event-details/31652c9267606d54
14 (1, 0) http://www.ufcstats.com/event-details/1a49e0670dfaca31
15 (1, 0) http://www.ufcstats.com/e

--------------------------------------FIX IT FIX IT FIX IT FIX IT FIX IT FIX IT--------------------------------------------

In [71]:
def fight_submissions(event_soup):
        
    fighter1_submissions = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[7].text.strip()
    fighter2_submissions = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[8].text.strip()
    
    fighter1_submissions = 0 if ''==fighter1_submissions else int(fighter1_submissions) 
    fighter2_submissions = 0 if ''==fighter2_submissions else int(fighter2_submissions)
    
    return fighter1_submissions, fighter2_submissions

In [72]:
def fight_passes(event_soup):
        
    fighter1_passes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[9].text.strip()
    fighter2_passes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[10].text.strip()
    
    fighter1_passes = 0 if ''==fighter1_passes else int(fighter1_passes) 
    fighter2_passes = 0 if ''==fighter2_passes else int(fighter2_passes)
    
    return fighter1_passes, fighter2_passes

In [73]:
def fight_weight_class(event_soup):
        
    weight_class = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[11]
    
    return weight_class.text.strip()

In [74]:
def fight_win_method(event_soup):
        
    win_method = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[12]
    win_method_detail = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[13]
    
    return win_method.text.strip(), win_method_detail.text.strip()

In [75]:
def fight_final_round(event_soup):
        
    ending_round = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[14].text.strip()
    
    blank = ''
    
    ending_round = 0 if blank==ending_round else int(ending_round)
    
    return ending_round

In [76]:
def fight_final_round_time(event_soup):
    
    ending_round_time = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[15].text.strip()
    
    return ending_round_time

In [133]:
event_fight_list = list(map(event_fights_parser, event_soups))

matchup_list = list(map(fight_participants_fetcher, event_soups))

fighter_strike_count_list = list(map(fight_strikes,event_soups))
    
fight_takedown_list = list(map(fight_takedowns,event_soups))

#fight_submission_list = list(map(fight_submissions,event_soups))
    
#fight_pass_list = list(map(fight_passes,event_soups))

#fight_weigth_class_list = list(map(fight_weight_class,event_soups))
    
fight_win_method_list = list(map(fight_win_method,event_soups))

#fight_final_round_list = list(map(fight_final_round,event_soups))
    
fight_final_round_time_list = list(map(fight_final_round_time,event_soups))

In [137]:
len(fighter_strike_count_list)

520

In [20]:
event_database_df['matchups'] = matchup

In [17]:
event_database_df['fight_list'] = event_fight_list

In [135]:
event_database_df.to_csv('event_database_csv', index=False)

PROTOCODE ==================================

In [None]:
except:
        fighter1_strikes = event_soups.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[4].text.strip()
        fighter2_strikes = event_soups.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[5].text.strip()

        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
        fighter2_strikes = 0 if ''== fighter1_strikes else int(fighter2_strikes)

        print(number, fighter1_strikes, fighter2_strikes) '''try: 
        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
    except:
        print(f'Edge Case: {number}, {event_url_list[number]}')
        edge_case_number.append(number)
        
    try: 
        fighter2_strikes = 0 if ''==fighter1_strikes else int(fighter2_strikes)
    except:
        print(f'Edge Case: {number}, {event_url_list[number]}')
        edge_case_number.append(number)'''

In [137]:
edge_case_number = []
for number, event_soup in enumerate(event_soups):
    fighter1_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[3].text.strip()
    fighter2_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[4].text.strip()
    
    try: 
        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
        fighter2_strikes = 0 if ''== fighter1_strikes else int(fighter2_strikes)
    except:
        fighter1_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[4].text.strip()
        fighter2_strikes = event_soup.findAll('p', attrs={'class': re.compile('b-fight-details__table')})[5].text.strip()
        
        fighter1_strikes = 0 if ''== fighter1_strikes else int(fighter1_strikes)
        fighter2_strikes = 0 if ''== fighter1_strikes else int(fighter2_strikes)
    
    print(number, fighter1_strikes, fighter2_strikes)

0 88 58
1 59 27
2 90 13
3 124 42
4 168 53
5 20 14
6 86 72
7 6 13
8 104 55
9 43 20
10 25 21
11 48 5
12 55 32
13 2 0
14 6 2
15 96 80
16 54 55
17 23 23
18 19 7
19 28 4
20 22 12
21 209 123
22 7 0
23 13 11
24 12 4
25 138 90
26 97 42
27 112 109
28 2 0
29 74 12
30 54 53
31 1 1
32 65 58
33 28 5
34 18 20
35 26 2
36 53 38
37 83 48
38 40 32
39 53 34
40 41 13
41 84 14
42 62 51
43 21 23
44 112 43
45 290 110
46 68 29
47 14 6
48 59 43
49 85 78
50 36 39
51 59 16
52 27 19
53 158 150
54 88 16
55 6 4
56 25 46
57 102 27
58 134 41
59 29 16
60 42 3
61 16 0
62 59 22
63 37 30
64 46 22
65 127 61
66 76 45
67 13 27
68 61 42
69 13 13
70 24 7
71 22 19
72 86 77
73 4 1
74 8 40
75 138 53
76 94 10
77 31 11
78 0 0
79 29 7
80 41 13
81 29 38
82 9 4
83 130 62
84 21 10
85 68 67
86 32 9
87 19 9
88 17 9
89 23 6
90 13 10
91 16 2
92 57 0
93 79 72
94 18 9
95 39 21
96 10 26
97 68 43
98 23 4
99 42 18
100 67 32
101 63 11
102 31 47
103 17 8
104 71 36
105 23 13
106 31 23
107 24 60
108 90 57
109 122 110
110 27 20
111 134 110
112 110 

In [76]:
edge_case_list = sorted(set(edge_case_number))
edge_case_list

[96, 222, 228, 281, 324, 354, 476]

In [None]:
Kelvin Gastelum