In [None]:
'''
Overview
this notebook parses all past ufc fight stats when run, it does not include upcoming fights


scrape ufc fight stats
get all event details, name, url, date, location for all ufc events
for each event, get fight details all fights on card
parse each fight to get fight stats of both fighters
'''

In [28]:
# imports
import pandas as pd
from tqdm.notebook import tqdm_notebook

# import library
import scrape_ufc_stats_library as LIB

# import config
import yaml
config = yaml.safe_load(open('scrape_ufc_stats_config.yaml'))

# Parse Event Details
Includes:
<br>
Event
<br>
URL
<br>
Date
<br>
Location
<br>

In [14]:
# define url to parse
events_url = config['completed_events_all_url']

In [22]:
# get soup
soup = LIB.get_soup(events_url)

# parse event details
all_event_details_df = LIB.parse_event_details(soup)

# show event details
display(all_event_details_df)

# write event details to file
all_event_details_df.to_csv(config['event_details_file_name'], index=False)

Unnamed: 0,EVENT,URL,DATE,LOCATION
0,UFC Fight Night: Allen vs. Curtis 2,http://ufcstats.com/event-details/899eaba48fa8...,"April 06, 2024","Las Vegas, Nevada, USA"
1,UFC Fight Night: Blanchfield vs. Fiorot,http://ufcstats.com/event-details/dba230fe3301...,"March 30, 2024","Atlantic City, New Jersey, USA"
2,UFC Fight Night: Ribas vs. Namajunas,http://ufcstats.com/event-details/79ff6545b0ab...,"March 23, 2024","Las Vegas, Nevada, USA"
3,UFC Fight Night: Tuivasa vs. Tybura,http://ufcstats.com/event-details/c398235fcaf8...,"March 16, 2024","Las Vegas, Nevada, USA"
4,UFC 299: O'Malley vs. Vera 2,http://ufcstats.com/event-details/a9df5ae20a97...,"March 09, 2024","Miami, Florida, USA"
...,...,...,...,...
680,UFC 6: Clash of the Titans,http://ufcstats.com/event-details/1c3f5e85b59e...,"July 14, 1995","Casper, Wyoming, USA"
681,UFC 5: The Return of the Beast,http://ufcstats.com/event-details/dedc3bb440d0...,"April 07, 1995","Charlotte, North Carolina, USA"
682,UFC 4: Revenge of the Warriors,http://ufcstats.com/event-details/b60391da771d...,"December 16, 1994","Tulsa, Oklahoma, USA"
683,UFC 3: The American Dream,http://ufcstats.com/event-details/1a49e0670dfa...,"September 09, 1994","Charlotte, North Carolina, USA"


# Parse Fight Details
Includes:
<br>
Event
<br>
Bout
<br>
URL

In [23]:
all_event_details_df = all_event_details_df.iloc[0:2]
all_event_details_df

Unnamed: 0,EVENT,URL,DATE,LOCATION
0,UFC Fight Night: Allen vs. Curtis 2,http://ufcstats.com/event-details/899eaba48fa8...,"April 06, 2024","Las Vegas, Nevada, USA"
1,UFC Fight Night: Blanchfield vs. Fiorot,http://ufcstats.com/event-details/dba230fe3301...,"March 30, 2024","Atlantic City, New Jersey, USA"


In [24]:
# create empty df to store fight details
all_fight_details_df = pd.DataFrame(columns=config['fight_details_column_names'])

# loop through each event and parse fight details
for index, row in all_event_details_df.iterrows():

    # get soup
    soup = LIB.get_soup(row['URL'])

    # parse fight links
    fight_details_df = LIB.parse_fight_details(soup)
    
    fight_details_df['DATE'] = row['DATE']
    fight_details_df['LOCATION'] = row['LOCATION']
    
    # concat fight details
    all_fight_details_df = pd.concat([all_fight_details_df, fight_details_df])

    all_fight_details_df = all_fight_details_df[['EVENT', 'BOUT', 'DATE', 'LOCATION', 'URL']]

# write fight details to file
all_fight_details_df.to_csv(config['fight_details_file_name'], index=False)

# Parse Fight Results and Fight Stats

Fight Results Includes:
<br>
Event
<br>
Bout
<br>
Weightclass
<br>
Method
<br>
Round
<br>
Time
<br>
Time Format
<br>
Referee
<br>
Details
<br>

Fight Stats Includes:
<br>
Event
<br>
Bout
<br>
Round
<br>
Fighter
<br>
Kd
<br>
Sig.Str.
<br>
Sig.Str. %
<br>
Total Str.
<br>
Td
<br>
Td %
<br>
Sub.Att
<br>
Rev.
<br>
Ctrl
<br>
Head
<br>
Body
<br>
Leg
<br>
Distance
<br>
Clinch
<br>
Ground
<br>

In [33]:
# create empty df to store fight results
all_fight_results_df = pd.DataFrame(columns=config['fight_results_column_names'] + ['DATE', 'LOCATION'])
# create empty df to store fight stats
all_fight_stats_df = pd.DataFrame(columns=config['fight_stats_column_names'] + ['DATE', 'LOCATION'])

# loop through each fight and parse fight results and stats
for index, row in tqdm(all_fight_details_df.iterrows(), total=all_fight_details_df.shape[0]):

    # get soup
    soup = LIB.get_soup(row['URL'])

    # parse fight results and fight stats
    fight_results_df, fight_stats_df = LIB.parse_organise_fight_results_and_stats(
        soup,
        row['URL'],
        config['fight_results_column_names'],
        config['totals_column_names'],
        config['significant_strikes_column_names']
        )

    fight_results_df['DATE'] = row['DATE']
    fight_results_df['LOCATION'] = row['LOCATION']

    fight_stats_df['DATE'] = row['DATE']
    fight_stats_df['LOCATION'] = row['LOCATION']
    
    # concat fight results
    all_fight_results_df = pd.concat([all_fight_results_df, fight_results_df])
    # concat fight stats
    all_fight_stats_df = pd.concat([all_fight_stats_df, fight_stats_df])



all_fight_results_df['DATE'] = pd.to_datetime(all_fight_results_df['DATE'], errors='coerce')
all_fight_stats_df['DATE'] = pd.to_datetime(all_fight_stats_df['DATE'], errors='coerce')

# show all fight results
display(all_fight_results_df)
# show all fight stats
display(all_fight_stats_df)

# write to file
all_fight_results_df.to_csv(config['fight_results_file_name'], index=False)
# write to file
all_fight_stats_df.to_csv(config['fight_stats_file_name'], index=False)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 25/25 [00:11<00:00,  2.14it/s]


Unnamed: 0,EVENT,BOUT,OUTCOME,WEIGHTCLASS,METHOD,ROUND,TIME,TIME FORMAT,REFEREE,DETAILS,URL,DATE,LOCATION
0,UFC Fight Night: Allen vs. Curtis 2,Brendan Allen vs. Chris Curtis,W/L,Middleweight Bout,Decision - Split,5,5:00,5 Rnd (5-5-5-5-5),Mark Smith,Derek Cleary 47 - 48.Mike Bell 48 - 47.Eric Co...,http://ufcstats.com/fight-details/3ce96bb0c69b...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Alexander Hernandez vs. Damon Jackson,L/W,Featherweight Bout,Decision - Split,3,5:00,3 Rnd (5-5-5),Jason Herzog,Bryan Miner 30 - 27.Junichiro Kamijo 28 - 29.D...,http://ufcstats.com/fight-details/9c22ddb8d8fc...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Morgan Charriere vs. Chepe Mariscal,L/W,Featherweight Bout,Decision - Split,3,5:00,3 Rnd (5-5-5),Keith Peterson,Mike Bell 28 - 29.Adalaide Byrd 30 - 27.Eric C...,http://ufcstats.com/fight-details/32375b4435fe...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Ignacio Bahamondes vs. Christos Giagos,W/L,Lightweight Bout,KO/TKO,1,3:34,3 Rnd (5-5-5),Chris Tognoni,Kick to Head At Distance,http://ufcstats.com/fight-details/6ff21ea2c75c...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Trevor Peek vs. Charlie Campbell,L/W,Lightweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Keith Peterson,Chris Flores 27 - 30.Chris Leben 27 - 30.Bryan...,http://ufcstats.com/fight-details/47f8812a335b...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Court McGee vs. Alex Morono,L/W,Welterweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Jason Herzog,Mike Bell 28 - 29.Adalaide Byrd 28 - 29.Derek ...,http://ufcstats.com/fight-details/0b9cd718d7e3...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Valter Walker vs. Lukasz Brzeski,L/W,Heavyweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Mark Smith,Eric Colon 28 - 29.Junichiro Kamijo 28 - 29.Ri...,http://ufcstats.com/fight-details/ff5a8078ea85...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Norma Dumont vs. Germaine de Randamie,W/L,Women's Bantamweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Keith Peterson,Mike Bell 28 - 29.Derek Cleary 28 - 29.Chris F...,http://ufcstats.com/fight-details/188e6416157e...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Pedro Falcao vs. Victor Hugo,L/W,Bantamweight Bout,Decision - Unanimous,3,5:00,3 Rnd (5-5-5),Chris Tognoni,Chris Leben 27 - 30.Adalaide Byrd 28 - 29.Brya...,http://ufcstats.com/fight-details/5c3b8819dbe9...,2024-04-06,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Allen vs. Curtis 2,Dan Argueta vs. Jean Matsumoto,L/W,Bantamweight Bout,Submission,2,4:59,3 Rnd (5-5-5),Jason Herzog,Guillotine Choke After Drop to Guard,http://ufcstats.com/fight-details/1f7566c90343...,2024-04-06,"Las Vegas, Nevada, USA"


Unnamed: 0,EVENT,BOUT,ROUND,FIGHTER,KD,SIG.STR.,SIG.STR. %,TOTAL STR.,TD,TD %,...,REV.,CTRL,HEAD,BODY,LEG,DISTANCE,CLINCH,GROUND,DATE,LOCATION
0,UFC Fight Night: Allen vs. Curtis 2,Brendan Allen vs. Chris Curtis,Round 1,Brendan Allen,0,22 of 40,55%,24 of 42,1 of 1,100%,...,0,1:17,12 of 26,7 of 10,3 of 4,22 of 40,0 of 0,0 of 0,2024-04-06,"Las Vegas, Nevada, USA"
1,UFC Fight Night: Allen vs. Curtis 2,Brendan Allen vs. Chris Curtis,Round 2,Brendan Allen,0,29 of 66,43%,29 of 66,0 of 1,0%,...,0,0:07,20 of 55,6 of 8,3 of 3,28 of 64,1 of 2,0 of 0,2024-04-06,"Las Vegas, Nevada, USA"
2,UFC Fight Night: Allen vs. Curtis 2,Brendan Allen vs. Chris Curtis,Round 3,Brendan Allen,0,23 of 42,54%,31 of 52,1 of 1,100%,...,0,2:03,14 of 31,4 of 6,5 of 5,22 of 40,1 of 2,0 of 0,2024-04-06,"Las Vegas, Nevada, USA"
3,UFC Fight Night: Allen vs. Curtis 2,Brendan Allen vs. Chris Curtis,Round 4,Brendan Allen,0,25 of 39,64%,28 of 44,2 of 3,66%,...,0,1:24,16 of 30,6 of 6,3 of 3,25 of 39,0 of 0,0 of 0,2024-04-06,"Las Vegas, Nevada, USA"
4,UFC Fight Night: Allen vs. Curtis 2,Brendan Allen vs. Chris Curtis,Round 5,Brendan Allen,0,13 of 35,37%,16 of 38,2 of 7,28%,...,0,1:14,10 of 30,2 of 4,1 of 1,11 of 31,2 of 4,0 of 0,2024-04-06,"Las Vegas, Nevada, USA"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1,UFC Fight Night: Blanchfield vs. Fiorot,Angel Pacheco vs. Caolan Loughran,Round 2,Angel Pacheco,0,17 of 38,44%,21 of 42,0 of 0,---,...,0,0:09,9 of 27,7 of 10,1 of 1,15 of 36,2 of 2,0 of 0,2024-03-30,"Atlantic City, New Jersey, USA"
2,UFC Fight Night: Blanchfield vs. Fiorot,Angel Pacheco vs. Caolan Loughran,Round 3,Angel Pacheco,0,25 of 67,37%,28 of 70,0 of 0,---,...,0,0:00,17 of 56,6 of 8,2 of 3,19 of 61,6 of 6,0 of 0,2024-03-30,"Atlantic City, New Jersey, USA"
0,UFC Fight Night: Blanchfield vs. Fiorot,Angel Pacheco vs. Caolan Loughran,Round 1,Caolan Loughran,0,35 of 89,39%,35 of 89,0 of 0,---,...,0,0:00,29 of 76,5 of 11,1 of 2,35 of 89,0 of 0,0 of 0,2024-03-30,"Atlantic City, New Jersey, USA"
1,UFC Fight Night: Blanchfield vs. Fiorot,Angel Pacheco vs. Caolan Loughran,Round 2,Caolan Loughran,0,51 of 89,57%,59 of 107,1 of 1,100%,...,0,3:19,47 of 83,2 of 4,2 of 2,16 of 37,1 of 2,34 of 50,2024-03-30,"Atlantic City, New Jersey, USA"


In [9]:
# 誕生日を条件付きでdate型に変換（無効な日付はNaTになる）
all_fight_results_df['DATE'] = pd.to_datetime(all_fight_results_df['DATE'], errors='coerce')
all_fight_results_df
all_fight_results_df.to_csv(config['fight_results_file_name'], index=False)

all_fight_stats_df['DATE'] = pd.to_datetime(all_fight_stats_df['DATE'], errors='coerce')
all_fight_stats_df
all_fight_stats_df.to_csv(config['fight_stats_file_name'], index=False)

In [8]:
all_fight_stats_df['DATE'] = pd.to_datetime(all_fight_stats_df['DATE'], errors='coerce')
all_fight_stats_df

Unnamed: 0,EVENT,BOUT,ROUND,FIGHTER,KD,SIG.STR.,SIG.STR. %,TOTAL STR.,TD,TD %,...,REV.,CTRL,HEAD,BODY,LEG,DISTANCE,CLINCH,GROUND,DATE,LOCATION
0,UFC Fight Night: Tuivasa vs. Tybura,Tai Tuivasa vs. Marcin Tybura,Round 1,Tai Tuivasa,0,15 of 24,62%,18 of 27,0 of 0,---,...,0,0:00,9 of 17,1 of 2,5 of 5,8 of 17,7 of 7,0 of 0,2024-03-16,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Tuivasa vs. Tybura,Tai Tuivasa vs. Marcin Tybura,Round 1,Marcin Tybura,0,27 of 41,65%,73 of 96,1 of 4,25%,...,0,2:48,23 of 34,4 of 7,0 of 0,8 of 20,0 of 0,19 of 21,2024-03-16,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Tuivasa vs. Tybura,Bryan Battle vs. Ange Loosa,Round 1,Bryan Battle,0,23 of 63,36%,24 of 66,1 of 2,50%,...,0,1:33,12 of 45,10 of 17,1 of 1,20 of 57,3 of 5,0 of 1,2024-03-16,"Las Vegas, Nevada, USA"
1,UFC Fight Night: Tuivasa vs. Tybura,Bryan Battle vs. Ange Loosa,Round 2,Bryan Battle,0,4 of 12,33%,4 of 12,0 of 0,---,...,0,0:00,4 of 11,0 of 1,0 of 0,4 of 12,0 of 0,0 of 0,2024-03-16,"Las Vegas, Nevada, USA"
0,UFC Fight Night: Tuivasa vs. Tybura,Bryan Battle vs. Ange Loosa,Round 1,Ange Loosa,0,17 of 44,38%,20 of 48,0 of 1,0%,...,0,0:00,4 of 29,6 of 6,7 of 9,16 of 43,1 of 1,0 of 0,2024-03-16,"Las Vegas, Nevada, USA"
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,UFC 2: No Way Out,Johnny Rhodes vs. David Levicki,Round 1,David Levicki,0,4 of 5,80%,95 of 102,0 of 0,---,...,0,--,4 of 5,0 of 0,0 of 0,1 of 2,2 of 2,1 of 1,1994-03-11,"Denver, Colorado, USA"
0,UFC 2: No Way Out,Patrick Smith vs. Ray Wizard,Round 1,Patrick Smith,0,1 of 1,100%,1 of 1,0 of 1,0%,...,0,--,0 of 0,1 of 1,0 of 0,0 of 0,1 of 1,0 of 0,1994-03-11,"Denver, Colorado, USA"
0,UFC 2: No Way Out,Patrick Smith vs. Ray Wizard,Round 1,Ray Wizard,0,1 of 1,100%,2 of 2,0 of 0,---,...,0,--,0 of 0,0 of 0,1 of 1,1 of 1,0 of 0,0 of 0,1994-03-11,"Denver, Colorado, USA"
0,UFC 2: No Way Out,Scott Morris vs. Sean Daugherty,Round 1,Scott Morris,0,1 of 1,100%,2 of 2,1 of 1,100%,...,0,--,1 of 1,0 of 0,0 of 0,0 of 0,1 of 1,0 of 0,1994-03-11,"Denver, Colorado, USA"
