In [186]:
# note: run this script when you already have the "Complete Stats.csv" dataframe saved. This script finds the latest fights by referencing that table and updates both the "Complete Stats.csv" and the "Normalized Stats Table.csv"

import pandas as pd
import requests
import time
from bs4 import BeautifulSoup

In [187]:
# Getting latest date from our current dataset
current_df = pd.read_csv("Complete Stats.csv")
current_df["dates"] = pd.to_datetime(current_df["dates"])

latest_date = current_df["dates"].max()
print(latest_date)

2025-04-26 00:00:00


In [188]:
r = requests.get("http://www.ufcstats.com/statistics/events/completed?page=all")
soup = BeautifulSoup(r.content, 'html.parser')
s = soup.find('section', class_ = 'b-statistics__section' )
pages = s.find_all('a', class_ = 'b-link b-link_style_black')
hrefs_start = [a['href'] for a in pages]
# Pulling in the 500 latest events.
hrefs_start = hrefs_start[0:499]

In [189]:
# Initializing variables
stats_url = []
fighter1=[]
fighter2=[]
weight = []
method =[]
rounds =[]
times = []
dates = []
locations = []
event = []
index_value = 0

# Getting first date to start the loop
time.sleep(1)
url = hrefs_start[index_value]
r = requests.get(url)
soup = BeautifulSoup(r.text, 'html.parser')
table = soup.find('table')
rows = table.find_all('tr')
rows = rows[1:]
for row in rows:
    date = soup.find('li', class_ = 'b-list__box-list-item')
    date = date.get_text(strip=True)[5:]
    date = pd.to_datetime(date)

# Running while loop to find URLS before our latest date to avoid reading in unneccessary data
while date > latest_date:
    time.sleep(1)
    url = hrefs_start[index_value]
    r = requests.get(url)
    soup = BeautifulSoup(r.text, 'html.parser')
    table = soup.find('table')
    rows = table.find_all('tr')
    rows = rows[1:]
    for row in rows:
        cols = row.find_all('td')
        data = [col.get_text(strip=True) for col in cols][6:]
        weight.append(data[0])
        method.append(data[1])
        rounds.append(data[2])
        times.append(data[3])
        fighters = row.find_all('a', class_ ="b-link b-link_style_black")
        names = [a.get_text(strip = True) for a in fighters]
        fighter1.append(names[0])
        fighter2.append(names[1])
        stat_link = row.get('data-link')
        
        if stat_link != '' or stat_link is not None:
            stats_url.append(row.get('data-link'))
        else: stats_url.append("No Link")

        location = soup.find_all('li', class_ = 'b-list__box-list-item')
        locations.append(location[1].get_text(strip=True)[9:])
        date = soup.find('li', class_ = 'b-list__box-list-item')
        date = date.get_text(strip=True)[5:]
        date = pd.to_datetime(date)
        dates.append(date)
        event.append(soup.find('h2').get_text(strip=True))
    index_value += 1

In [190]:
new_df = pd.DataFrame({
"stats_url" : stats_url,
"fighter1" : fighter1,
"fighter2" : fighter2,
"weight" : weight,
"method" : method,
"rounds" : rounds,
"times" : times,
"dates" : dates,
"locations" : locations,
"event" : event
})

len(new_df)

49

In [191]:
new_df.head(23)

Unnamed: 0,stats_url,fighter1,fighter2,weight,method,rounds,times,dates,locations,event
0,http://www.ufcstats.com/fight-details/a81ad236...,Michael Morales,Gilbert Burns,Welterweight,KO/TKOPunches,1,3:39,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
1,http://www.ufcstats.com/fight-details/dcb27eef...,Mairon Santos,Sodiq Yusuff,Lightweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
2,http://www.ufcstats.com/fight-details/b3044bf6...,Nursulton Ruziboev,Dustin Stoltzfus,Middleweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
3,http://www.ufcstats.com/fight-details/fe2acfda...,Melquizael Costa,Julian Erosa,Featherweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
4,http://www.ufcstats.com/fight-details/a9a6cca4...,Gabe Green,Matheus Camilo,Lightweight,SUBRear Naked Choke,2,3:43,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
5,http://www.ufcstats.com/fight-details/8ba31cf0...,Jared Gordon,Thiago Moises,Lightweight,KO/TKOPunch,1,3:37,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
6,http://www.ufcstats.com/fight-details/d6e2bc6e...,Yadier del Valle,Connor Matthews,Featherweight,SUBRear Naked Choke,1,2:54,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
7,http://www.ufcstats.com/fight-details/f75fe8b3...,Luana Santos,Tainara Lisboa,Women's Bantamweight,SUBKeylock,2,4:59,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
8,http://www.ufcstats.com/fight-details/66887532...,Denise Gomes,Elise Reed,Women's Strawweight,KO/TKOPunches,2,0:30,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales
9,http://www.ufcstats.com/fight-details/18a7dc86...,HyunSung Park,Carlos Hernandez,Flyweight,SUBRear Naked Choke,1,2:26,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales


In [192]:
# Using URL's from new_df to scrape the rest of the data
fight_details_links = new_df['stats_url']

f1 = []
f2 = []
f1_kd = []
f1_sigstr = []
f1_sigstr_pct = []
f1_totstr = []
f1_td = []
f1_td_pct = []
f1_subatt = []
f1_rev = []
f1_ctrl = []
row2 = []
f2_kd = []
f2_sigstr = []
f2_sigstr_pct = []
f2_totstr = []
f2_td = []
f2_td_pct = []
f2_subatt = []
f2_rev = []
f2_ctrl = []
stats_url = []


connect_timeout = 6
read_timeout = 60
count = 0


for url in fight_details_links:
    time.sleep(1)
    r = requests.get(url, timeout=(connect_timeout, read_timeout))
    soup = BeautifulSoup(r.content, 'html.parser')
    fight_details = soup.find_all('section', class_ = 'b-fight-details__section js-fight-section')
    fight_details = fight_details[1]
    tds = fight_details.find_all('td')
    row1 = []
    for td in tds:
        p_tags = td.find_all('p')
        row1.append(p_tags[0].get_text(strip=True))
    f1.append(row1[0])
    f1_kd.append(row1[1])
    f1_sigstr.append(row1[2])
    f1_sigstr_pct.append(row1[3])
    f1_totstr.append(row1[4])
    f1_td.append(row1[5])
    f1_td_pct.append(row1[6])
    f1_subatt.append(row1[7])
    f1_rev.append(row1[8])
    f1_ctrl.append(row1[9])
    row2 = []
    for td in tds:
        p_tags = td.find_all('p')
        row2.append(p_tags[1].get_text(strip=True))
    f2.append(row2[0])
    f2_kd.append(row2[1])
    f2_sigstr.append(row2[2])
    f2_sigstr_pct.append(row2[3])
    f2_totstr.append(row2[4])
    f2_td.append(row2[5])
    f2_td_pct.append(row2[6])
    f2_subatt.append(row2[7])
    f2_rev.append(row2[8])
    f2_ctrl.append(row2[9])
    stats_url.append(url)

    count += 1
    print(f"Succcess count: {count}")

Succcess count: 1
Succcess count: 2
Succcess count: 3
Succcess count: 4
Succcess count: 5
Succcess count: 6
Succcess count: 7
Succcess count: 8
Succcess count: 9
Succcess count: 10
Succcess count: 11
Succcess count: 12
Succcess count: 13
Succcess count: 14
Succcess count: 15
Succcess count: 16
Succcess count: 17
Succcess count: 18
Succcess count: 19
Succcess count: 20
Succcess count: 21
Succcess count: 22
Succcess count: 23
Succcess count: 24
Succcess count: 25
Succcess count: 26
Succcess count: 27
Succcess count: 28
Succcess count: 29
Succcess count: 30
Succcess count: 31
Succcess count: 32
Succcess count: 33
Succcess count: 34
Succcess count: 35
Succcess count: 36
Succcess count: 37
Succcess count: 38
Succcess count: 39
Succcess count: 40
Succcess count: 41
Succcess count: 42
Succcess count: 43
Succcess count: 44
Succcess count: 45
Succcess count: 46
Succcess count: 47
Succcess count: 48
Succcess count: 49


In [193]:
new_stats_df = pd.DataFrame({
'fighter1' : f1,
'fighter2': f2,
'f1_kd' : f1_kd,
'f1_sigstr' : f1_sigstr,
'f1_sigstr_pct' : f1_sigstr_pct,
'f1_totstr' : f1_totstr,
'f1_td' : f1_td,
'f1_td_pct' : f1_td_pct,
'f1_subatt' : f1_subatt,
'f1_rev' : f1_rev,
'f1_ctrl' : f1_ctrl,
'f2_kd' : f2_kd,
'f2_sigstr' : f2_sigstr,
'f2_sigstr_pct' : f2_sigstr_pct,
'f2_totstr' : f2_totstr,
'f2_td' : f2_td,
'f2_td_pct' : f2_td_pct,
'f2_subatt' : f2_subatt,
'f2_rev' : f2_rev,
'f2_ctrl' : f2_ctrl,
# 'event' : event,
'stats_url' : stats_url
})

In [194]:
new_stats_df.head(23)

Unnamed: 0,fighter1,fighter2,f1_kd,f1_sigstr,f1_sigstr_pct,f1_totstr,f1_td,f1_td_pct,f1_subatt,f1_rev,...,f2_kd,f2_sigstr,f2_sigstr_pct,f2_totstr,f2_td,f2_td_pct,f2_subatt,f2_rev,f2_ctrl,stats_url
0,Gilbert Burns,Michael Morales,0,5 of 18,27%,5 of 18,1 of 2,50%,0,0,...,2,33 of 56,58%,35 of 58,0 of 0,---,0,0,0:10,http://www.ufcstats.com/fight-details/a81ad236...
1,Sodiq Yusuff,Mairon Santos,0,36 of 86,41%,60 of 115,1 of 3,33%,0,0,...,0,40 of 83,48%,71 of 114,0 of 0,---,0,0,1:57,http://www.ufcstats.com/fight-details/dcb27eef...
2,Dustin Stoltzfus,Nursulton Ruziboev,0,21 of 37,56%,41 of 62,2 of 5,40%,2,0,...,0,35 of 68,51%,53 of 94,1 of 2,50%,0,2,3:21,http://www.ufcstats.com/fight-details/b3044bf6...
3,Julian Erosa,Melquizael Costa,0,69 of 173,39%,85 of 202,3 of 10,30%,0,0,...,0,102 of 204,50%,128 of 236,2 of 3,66%,1,0,0:23,http://www.ufcstats.com/fight-details/fe2acfda...
4,Gabe Green,Matheus Camilo,0,12 of 27,44%,36 of 57,0 of 0,---,1,0,...,0,13 of 27,48%,33 of 48,2 of 3,66%,1,0,2:30,http://www.ufcstats.com/fight-details/a9a6cca4...
5,Jared Gordon,Thiago Moises,1,7 of 13,53%,21 of 36,1 of 1,100%,0,0,...,0,5 of 9,55%,11 of 16,0 of 0,---,0,0,0:00,http://www.ufcstats.com/fight-details/8ba31cf0...
6,Yadier del Valle,Connor Matthews,0,9 of 12,75%,28 of 34,1 of 1,100%,1,0,...,0,3 of 10,30%,3 of 10,0 of 0,---,0,0,0:00,http://www.ufcstats.com/fight-details/d6e2bc6e...
7,Luana Santos,Tainara Lisboa,0,15 of 19,78%,50 of 57,2 of 2,100%,1,0,...,0,6 of 34,17%,7 of 35,0 of 1,0%,0,0,0:00,http://www.ufcstats.com/fight-details/f75fe8b3...
8,Elise Reed,Denise Gomes,0,11 of 21,52%,35 of 47,0 of 0,---,0,0,...,1,28 of 42,66%,69 of 86,3 of 3,100%,0,0,4:14,http://www.ufcstats.com/fight-details/66887532...
9,HyunSung Park,Carlos Hernandez,0,14 of 22,63%,17 of 27,1 of 1,100%,1,0,...,0,4 of 17,23%,4 of 17,0 of 0,---,0,0,0:00,http://www.ufcstats.com/fight-details/18a7dc86...


In [195]:
# Filtering out unusual fight results
unusal_meths = ['CNC', 'M-DEC', 'DQ',  'OverturnedKick', 'DQRear Naked Choke', 'OverturnedTriangle Choke','OverturnedElbows', 'Other']
new_df1 = new_df[~new_df['method'].isin(unusal_meths)]

complete_df = new_df1.merge(new_stats_df, on = ['stats_url'], how="right")

In [196]:
complete_df.head(23)
# complete_df.columns

Unnamed: 0,stats_url,fighter1_x,fighter2_x,weight,method,rounds,times,dates,locations,event,...,f1_ctrl,f2_kd,f2_sigstr,f2_sigstr_pct,f2_totstr,f2_td,f2_td_pct,f2_subatt,f2_rev,f2_ctrl
0,http://www.ufcstats.com/fight-details/a81ad236...,Michael Morales,Gilbert Burns,Welterweight,KO/TKOPunches,1,3:39,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,0:30,2,33 of 56,58%,35 of 58,0 of 0,---,0,0,0:10
1,http://www.ufcstats.com/fight-details/dcb27eef...,Mairon Santos,Sodiq Yusuff,Lightweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,2:02,0,40 of 83,48%,71 of 114,0 of 0,---,0,0,1:57
2,http://www.ufcstats.com/fight-details/b3044bf6...,Nursulton Ruziboev,Dustin Stoltzfus,Middleweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,5:11,0,35 of 68,51%,53 of 94,1 of 2,50%,0,2,3:21
3,http://www.ufcstats.com/fight-details/fe2acfda...,Melquizael Costa,Julian Erosa,Featherweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,3:34,0,102 of 204,50%,128 of 236,2 of 3,66%,1,0,0:23
4,http://www.ufcstats.com/fight-details/a9a6cca4...,Gabe Green,Matheus Camilo,Lightweight,SUBRear Naked Choke,2,3:43,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,2:12,0,13 of 27,48%,33 of 48,2 of 3,66%,1,0,2:30
5,http://www.ufcstats.com/fight-details/8ba31cf0...,Jared Gordon,Thiago Moises,Lightweight,KO/TKOPunch,1,3:37,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,2:36,0,5 of 9,55%,11 of 16,0 of 0,---,0,0,0:00
6,http://www.ufcstats.com/fight-details/d6e2bc6e...,Yadier del Valle,Connor Matthews,Featherweight,SUBRear Naked Choke,1,2:54,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,1:21,0,3 of 10,30%,3 of 10,0 of 0,---,0,0,0:00
7,http://www.ufcstats.com/fight-details/f75fe8b3...,Luana Santos,Tainara Lisboa,Women's Bantamweight,SUBKeylock,2,4:59,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,7:57,0,6 of 34,17%,7 of 35,0 of 1,0%,0,0,0:00
8,http://www.ufcstats.com/fight-details/66887532...,Denise Gomes,Elise Reed,Women's Strawweight,KO/TKOPunches,2,0:30,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,0:00,1,28 of 42,66%,69 of 86,3 of 3,100%,0,0,4:14
9,http://www.ufcstats.com/fight-details/18a7dc86...,HyunSung Park,Carlos Hernandez,Flyweight,SUBRear Naked Choke,1,2:26,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,0:33,0,4 of 17,23%,4 of 17,0 of 0,---,0,0,0:00


In [197]:
complete_df = complete_df.rename(columns={
'fighter1_x' : 'fighter1',
'fighter2_x': 'fighter2'
})

complete_df = complete_df.drop(columns= ["fighter1_y", "fighter2_y"])

In [198]:
complete_df.head(23)

Unnamed: 0,stats_url,fighter1,fighter2,weight,method,rounds,times,dates,locations,event,...,f1_ctrl,f2_kd,f2_sigstr,f2_sigstr_pct,f2_totstr,f2_td,f2_td_pct,f2_subatt,f2_rev,f2_ctrl
0,http://www.ufcstats.com/fight-details/a81ad236...,Michael Morales,Gilbert Burns,Welterweight,KO/TKOPunches,1,3:39,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,0:30,2,33 of 56,58%,35 of 58,0 of 0,---,0,0,0:10
1,http://www.ufcstats.com/fight-details/dcb27eef...,Mairon Santos,Sodiq Yusuff,Lightweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,2:02,0,40 of 83,48%,71 of 114,0 of 0,---,0,0,1:57
2,http://www.ufcstats.com/fight-details/b3044bf6...,Nursulton Ruziboev,Dustin Stoltzfus,Middleweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,5:11,0,35 of 68,51%,53 of 94,1 of 2,50%,0,2,3:21
3,http://www.ufcstats.com/fight-details/fe2acfda...,Melquizael Costa,Julian Erosa,Featherweight,U-DEC,3,5:00,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,3:34,0,102 of 204,50%,128 of 236,2 of 3,66%,1,0,0:23
4,http://www.ufcstats.com/fight-details/a9a6cca4...,Gabe Green,Matheus Camilo,Lightweight,SUBRear Naked Choke,2,3:43,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,2:12,0,13 of 27,48%,33 of 48,2 of 3,66%,1,0,2:30
5,http://www.ufcstats.com/fight-details/8ba31cf0...,Jared Gordon,Thiago Moises,Lightweight,KO/TKOPunch,1,3:37,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,2:36,0,5 of 9,55%,11 of 16,0 of 0,---,0,0,0:00
6,http://www.ufcstats.com/fight-details/d6e2bc6e...,Yadier del Valle,Connor Matthews,Featherweight,SUBRear Naked Choke,1,2:54,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,1:21,0,3 of 10,30%,3 of 10,0 of 0,---,0,0,0:00
7,http://www.ufcstats.com/fight-details/f75fe8b3...,Luana Santos,Tainara Lisboa,Women's Bantamweight,SUBKeylock,2,4:59,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,7:57,0,6 of 34,17%,7 of 35,0 of 1,0%,0,0,0:00
8,http://www.ufcstats.com/fight-details/66887532...,Denise Gomes,Elise Reed,Women's Strawweight,KO/TKOPunches,2,0:30,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,0:00,1,28 of 42,66%,69 of 86,3 of 3,100%,0,0,4:14
9,http://www.ufcstats.com/fight-details/18a7dc86...,HyunSung Park,Carlos Hernandez,Flyweight,SUBRear Naked Choke,1,2:26,2025-05-17,"Las Vegas, Nevada, USA",UFC Fight Night: Burns vs. Morales,...,0:33,0,4 of 17,23%,4 of 17,0 of 0,---,0,0,0:00


In [199]:
# complete_df = complete_df.drop(columns=[
# 'weight_y',	'rounds_y',	'times_y',	'method_y',	'locations_y',	'dates_y', 'event_y', 'key'

# ], axis=1)


# complete_df = complete_df.rename(columns={
# 'event_x' : 'event',
# 'weight_x': 'weight',
# 'rounds_x' : 'rounds',
# 'times_x' : 'times',
# 'method_x' : 'method',
# 'locations_x' : 'location',
# 'dates_x' : 'dates'
# })

In [200]:
complete_df[['f1_sigstr_landed', 'f1_sigstr_attempt']] = complete_df['f1_sigstr'].str.split(' of ', expand=True).astype(int)
complete_df[['f2_sigstr_landed', 'f2_sigstr_attempt']] = complete_df['f2_sigstr'].str.split(' of ', expand=True).astype(int)

complete_df[['f1_totstr_landed', 'f1_totstr_attempt']] = complete_df['f1_totstr'].str.split(' of ', expand=True).astype(int)
complete_df[['f2_totstr_landed', 'f2_totstr_attempt']] = complete_df['f2_totstr'].str.split(' of ', expand=True).astype(int)

complete_df[['f1_td_landed', 'f1_td_attempt']] = complete_df['f1_td'].str.split(' of ', expand=True).astype(int)
complete_df[['f2_td_landed', 'f2_td_attempt']] = complete_df['f2_td'].str.split(' of ', expand=True).astype(int)


complete_df = complete_df.drop(columns=[
'f1_sigstr', 'f2_sigstr', 'f1_totstr', 'f2_totstr', 'f1_td', 'f2_td'
], axis=1)


complete_df['f1_sigstr_pct'] = complete_df.apply(lambda row: row['f1_sigstr_landed']/row['f1_sigstr_attempt'] * 100 if row['f1_sigstr_attempt'] != 0 else 0, axis=1)
complete_df['f2_sigstr_pct'] = complete_df.apply(lambda row: row['f2_sigstr_landed']/row['f2_sigstr_attempt'] * 100 if row['f2_sigstr_attempt'] != 0 else 0, axis=1)

complete_df['f2_totstr_pct'] = complete_df.apply(lambda row: row['f2_totstr_landed']/row['f2_totstr_attempt'] * 100 if row['f2_totstr_attempt'] != 0 else 0, axis=1)
complete_df['f1_totstr_pct'] = complete_df.apply(lambda row: row['f1_totstr_landed']/row['f1_totstr_attempt'] * 100 if row['f1_totstr_attempt'] != 0 else 0, axis=1)

complete_df['f1_td_pct'] = complete_df.apply(lambda row: row['f1_td_landed']/row['f1_td_attempt'] * 100 if row['f1_td_attempt'] != 0 else 0, axis=1)
complete_df['f2_td_pct'] = complete_df.apply(lambda row: row['f2_td_landed']/row['f2_td_attempt'] * 100 if row['f2_td_attempt'] != 0 else 0, axis=1)

In [201]:
# Get MM:SS time format into just seconds to help aggregate / total time in the fight
def standard_time_format(mmss):
    if mmss == '--':
        return 0
    else: minutes, seconds = map(int, mmss.split(":"))
    return (minutes*60) + seconds

def total_fight_time(row):
    remainder = standard_time_format(row['times'])
    x = row['rounds'] - 1
    return (x*5*60) + remainder

In [None]:
complete_df = complete_df[~complete_df['rounds']]
complete_df['rounds'] = complete_df['rounds'].astype(int)
complete_df['rounds'].dtypes

dtype('int64')

In [203]:
complete_df['f1_ctrl_sec'] = complete_df['f1_ctrl'].apply(standard_time_format)
complete_df['f2_ctrl_sec'] = complete_df['f2_ctrl'].apply(standard_time_format)


complete_df['tot_fight_secs'] = complete_df.apply(total_fight_time, axis=1)

In [204]:
complete_df['more_totstr_landed'] = complete_df.apply(lambda row: 'fighter1' if row['f1_totstr_landed'] > row['f2_totstr_landed'] else ('fighter2' if row['f1_totstr_landed'] < row['f2_totstr_landed'] else "equal"), axis=1)
complete_df['more_totstr_attempt'] = complete_df.apply(lambda row: 'fighter1' if row['f1_totstr_attempt'] > row['f2_totstr_attempt'] else ('fighter2' if row['f1_totstr_attempt'] < row['f2_totstr_attempt'] else "equal"), axis=1)

complete_df['more_sigstr_attempt'] = complete_df.apply(lambda row: 'fighter1' if row['f1_sigstr_attempt'] > row['f2_sigstr_attempt'] else ('fighter2' if row['f1_sigstr_attempt'] < row['f2_sigstr_attempt'] else "equal"),axis=1)
complete_df['more_sigstr_landed'] = complete_df.apply(lambda row: 'fighter1' if row['f1_sigstr_landed'] > row['f2_sigstr_landed'] else ('fighter2' if row['f1_sigstr_landed'] < row['f2_sigstr_landed'] else "equal"),axis=1)

In [205]:
# Add to Complete Stats Table
new_total_stats = pd.concat([current_df, complete_df], ignore_index=True)

new_total_stats.drop_duplicates(inplace=True)

new_total_stats.to_csv("Complete Stats.csv", index=False)

In [206]:
# current_df['rounds'] = current_df['rounds'].astype(int)
# current_df['tot_fight_secs'] = current_df.apply(total_fight_time, axis=1)
# current_df.to_csv("Complete Stats.csv")

In [207]:
new_total_stats['dates'] = pd.to_datetime(new_total_stats['dates'])


new_total_stats.sort_values(by='dates', ascending=False).head(23)

Unnamed: 0.1,Unnamed: 0,event,fighter1,fighter2,weight,rounds,times,method,locations,dates,...,f2_td_attempt,f2_totstr_pct,f1_totstr_pct,f1_ctrl_sec,f2_ctrl_sec,more_totstr_landed,more_totstr_attempt,more_sigstr_attempt,more_sigstr_landed,tot_fight_secs
7287,,UFC Fight Night: Burns vs. Morales,Michael Morales,Gilbert Burns,Welterweight,1,3:39,KO/TKOPunches,"Las Vegas, Nevada, USA",2025-05-17,...,0,60.344828,27.777778,30,10,fighter2,fighter2,fighter2,fighter2,219
7292,,UFC Fight Night: Burns vs. Morales,Jared Gordon,Thiago Moises,Lightweight,1,3:37,KO/TKOPunch,"Las Vegas, Nevada, USA",2025-05-17,...,0,68.75,58.333333,156,0,fighter1,fighter1,fighter1,fighter1,217
7297,,UFC Fight Night: Burns vs. Morales,Tecia Pennington,Luana Pinheiro,Women's Strawweight,3,5:00,U-DEC,"Las Vegas, Nevada, USA",2025-05-17,...,10,38.983051,52.348993,44,102,fighter1,fighter1,fighter1,fighter1,900
7296,,UFC Fight Night: Burns vs. Morales,HyunSung Park,Carlos Hernandez,Flyweight,1,2:26,SUBRear Naked Choke,"Las Vegas, Nevada, USA",2025-05-17,...,0,23.529412,62.962963,33,0,fighter1,fighter1,fighter1,fighter1,146
7295,,UFC Fight Night: Burns vs. Morales,Denise Gomes,Elise Reed,Women's Strawweight,2,0:30,KO/TKOPunches,"Las Vegas, Nevada, USA",2025-05-17,...,3,80.232558,74.468085,0,254,fighter2,fighter2,fighter2,fighter2,330
7293,,UFC Fight Night: Burns vs. Morales,Yadier del Valle,Connor Matthews,Featherweight,1,2:54,SUBRear Naked Choke,"Las Vegas, Nevada, USA",2025-05-17,...,0,30.0,82.352941,81,0,fighter1,fighter1,fighter1,fighter1,174
7294,,UFC Fight Night: Burns vs. Morales,Luana Santos,Tainara Lisboa,Women's Bantamweight,2,4:59,SUBKeylock,"Las Vegas, Nevada, USA",2025-05-17,...,1,20.0,87.719298,477,0,fighter1,fighter1,fighter2,fighter1,599
7291,,UFC Fight Night: Burns vs. Morales,Gabe Green,Matheus Camilo,Lightweight,2,3:43,SUBRear Naked Choke,"Las Vegas, Nevada, USA",2025-05-17,...,3,68.75,63.157895,132,150,fighter1,fighter1,equal,fighter2,523
7290,,UFC Fight Night: Burns vs. Morales,Melquizael Costa,Julian Erosa,Featherweight,3,5:00,U-DEC,"Las Vegas, Nevada, USA",2025-05-17,...,3,54.237288,42.079208,214,23,fighter2,fighter2,fighter2,fighter2,900
7289,,UFC Fight Night: Burns vs. Morales,Nursulton Ruziboev,Dustin Stoltzfus,Middleweight,3,5:00,U-DEC,"Las Vegas, Nevada, USA",2025-05-17,...,2,56.382979,66.129032,311,201,fighter2,fighter2,fighter2,fighter2,900


In [208]:
stats_df = pd.read_csv("Complete Stats.csv")

# separate into two tables then append each other to normalize the data
# stats_df.columns
f1_df = stats_df[['event', 'fighter1', 'weight', 'rounds',
       'times', 'method', 'locations', 'dates', 'stats_url', 'f1_kd',
       'f1_sigstr_pct', 'f1_td_pct', 'f1_subatt', 'f1_rev', 'f1_ctrl',
       'f1_sigstr_landed', 'f1_sigstr_attempt', 'f1_totstr_landed', 'f1_totstr_attempt', 'f1_td_landed',
       'f1_td_attempt',
       'f1_totstr_pct', 'f1_ctrl_sec', 'more_totstr_landed',
       'more_totstr_attempt', 'more_sigstr_attempt', 'more_sigstr_landed', 'tot_fight_secs']].copy()

f1_df["is_winner"] = True
f1_df["fighter_num"] = "fighter1"
f1_df.rename(columns={"fighter1" : "fighter" , 'f1_kd' : 'kd',
       'f1_sigstr_pct' : 'sigstr_pct', 'f1_td_pct' : 'td_pct', 'f1_subatt' : 'subatt', 'f1_rev' : 'rev', 'f1_ctrl' : 'ctrl', 'f1_sigstr_landed' : 'sigstr_landed',
       'f1_sigstr_attempt' : 'sigstr_attempt',
       'f1_totstr_landed' : 'totstr_landed', 'f1_totstr_attempt' : 'totstr_attempt', 'f1_td_landed' : 'td_landed', 'f1_td_attempt' : 'td_attempt', 'f1_totstr_pct' : 'totstr_pct', 'f1_ctrl_sec' : 'ctrl_sec'}, inplace=True)

f2_df = stats_df[['event', 'fighter2', 'weight', 'rounds',
       'times', 'method', 'locations', 'dates', 'stats_url', 'f2_kd',
       'f2_sigstr_pct', 'f2_td_pct', 'f2_subatt', 'f2_rev', 'f2_ctrl', 'f2_sigstr_landed',
       'f2_sigstr_attempt',
       'f2_totstr_landed', 'f2_totstr_attempt', 'f2_td_landed', 'f2_td_attempt', 'f2_totstr_pct', 'f2_ctrl_sec', 'more_totstr_landed',
       'more_totstr_attempt', 'more_sigstr_attempt', 'more_sigstr_landed','tot_fight_secs']].copy()

f2_df["is_winner"] = False
f2_df["fighter_num"] = "fighter2"
f2_df.rename(columns={"fighter2" : "fighter", "f2_kd": "kd",
       'f2_sigstr_pct': 'sigstr_pct', 'f2_td_pct': 'td_pct', 'f2_subatt' : 'subatt', 'f2_rev': 'rev', 'f2_ctrl': 'ctrl', 'f2_sigstr_landed': 'sigstr_landed',
       'f2_sigstr_attempt' : 'sigstr_attempt',
       'f2_totstr_landed' : 'totstr_landed', 'f2_totstr_attempt' : 'totstr_attempt', 'f2_td_landed' : 'td_landed', 'f2_td_attempt' : 'td_attempt', 'f2_totstr_pct' : 'totstr_pct', 'f2_ctrl_sec' : 'ctrl_sec'}, inplace=True)

In [209]:
norm_df = pd.concat([f1_df, f2_df], ignore_index = True)
norm_df.to_csv("Normalized Stats Table.csv", index=False)