In [11]:
from unittest.mock import inplace

from IPython.display import display, HTML
from docutils.nodes import abbreviation
from werkzeug.routing.rules import Weighting

display(HTML("<style>.container { width:100% !important; }</style>"))

In [12]:
import statsmodels.formula.api as smf
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, confusion_matrix
from sklearn import neighbors
from sklearn.neighbors import KNeighborsClassifier
import seaborn as sns
from sklearn.metrics import mean_squared_error
from sklearn.metrics import r2_score
from sklearn import metrics
from sklearn.neighbors import KNeighborsRegressor
from sklearn.model_selection import cross_val_score

%matplotlib inline

In [13]:
import requests                 # How Python gets the webpages
from bs4 import BeautifulSoup   # Creates structured, searchable object
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import json
from datetime import datetime

In [14]:
from pylab import rcParams

rcParams['figure.dpi'] = 150
rcParams['lines.linewidth'] = 1.2
rcParams['axes.facecolor'] = 'white'
rcParams['patch.edgecolor'] = 'white'
rcParams['font.family'] = 'DejaVu Sans'
rcParams['figure.figsize'] = 5,3
rcParams['font.size'] = 10

In [15]:
rcParams['axes.labelsize'] = 'medium'
rcParams['xtick.labelsize'] = 8
rcParams['ytick.labelsize'] = 8

In [16]:
# I don't think we will need this table, but we can keep it to show the process of getting the data.
url_ranking = "http://ufcstats.com/statistics/fighters?page=all"
headers = {
    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
}
response_ranking = requests.get(url_ranking, headers=headers)

# Always check if the request was successful
if response_ranking.status_code != 200:
    print(f"Error: Received status code {response_ranking.status_code}")
    exit()

print(f"Response status: {response_ranking.status_code}")

Response status: 200


In [17]:
# I don't think we will need this table, but we can keep it to show the process of getting the data.
fighters_page = response_ranking.content
scraping = BeautifulSoup(fighters_page, "lxml")
tables_fighters = scraping.find_all('table')
tables_fighters_df = pd.read_html(str(tables_fighters))
fighters_table = tables_fighters_df[0]
fighters_table

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt
0,,,,,,,,,,,
1,Tom,Aaron,,--,155 lbs.,--,,5.0,3.0,0.0,
2,Danny,Abbadi,The Assassin,"5' 11""",155 lbs.,--,Orthodox,4.0,6.0,0.0,
3,Nariman,Abbasov,Bayraktar,"5' 8""",155 lbs.,"66.0""",Orthodox,28.0,4.0,0.0,
4,David,Abbott,Tank,"6' 0""",265 lbs.,--,Switch,10.0,15.0,0.0,
...,...,...,...,...,...,...,...,...,...,...,...
222,Abu,Azaitar,Captain Morocco,"5' 9""",185 lbs.,"76.0""",Orthodox,14.0,4.0,1.0,
223,Ottman,Azaitar,Bulldozer,"5' 8""",155 lbs.,"71.0""",Switch,13.0,3.0,0.0,
224,Luiz,Azeredo,,"5' 9""",154 lbs.,--,Orthodox,15.0,10.0,0.0,
225,Luciano,Azevedo,,"6' 3""",161 lbs.,--,Orthodox,17.0,9.0,1.0,


In [18]:
fighters_table.to_csv('./csv/fighters_table.csv', index=False)

In [8]:
base_api_url = 'https://api.sportradar.com/mma/trial/v2/en'
headers = {
    "accept": "application/json",
    "x-api-key": '#use your API Key'
}

In [12]:
url = base_api_url + "/rankings.json"

response = requests.get(url, headers=headers)

In [13]:
def get_rankings():

    res = json.loads(response.content)
    rankings = res.get('rankings',[])

    records = []
    for cat_ranking in rankings:
        category = cat_ranking.get('name')
        year = cat_ranking.get('year')
        week = cat_ranking.get('week')
        competitor_rankings = cat_ranking.get('competitor_rankings', [])
        for competitor in competitor_rankings:
            rank = competitor.get('rank')
            comp = competitor.get('competitor')
            comp_id = comp.get('id')
            name = comp.get('name')
            gender = comp.get('gender')
            records.append({
                'Name': name,
                'Id': comp_id,
                'Ranking': rank,
                "Gender": gender,
                'Category': category,
                'Year': year,
                "Week": week
            })
    return records

Unnamed: 0,Name,Id,Ranking,Gender,Category,Year,Week
0,"Makhachev, Islam",sr:competitor:251835,1,male,pound_for_pound,2025,19
1,"Jones, Jon",sr:competitor:253371,2,male,pound_for_pound,2025,19
2,"Topuria, Ilia",sr:competitor:750503,3,male,pound_for_pound,2025,19
3,"Dvalishvili, Merab",sr:competitor:399183,4,male,pound_for_pound,2025,19
4,"Du Plessis, Dricus",sr:competitor:400461,5,male,pound_for_pound,2025,19
...,...,...,...,...,...,...,...
186,"Cavalcanti, Jacqueline",sr:competitor:1049265,11,female,womens_bantamweight,2025,19
187,"Cornolle, Nora",sr:competitor:1027333,12,female,womens_bantamweight,2025,19
188,"Tate, Miesha",sr:competitor:246049,13,female,womens_bantamweight,2025,19
189,"Edwards, Joselyne",sr:competitor:768194,14,female,womens_bantamweight,2025,19


In [None]:
df_rankings_fighters = pd.DataFrame(get_rankings())
df_rankings_fighters.sort_values(by='Id', ascending=False)
df_rankings_fighters.to_csv('./csv/df_ranking_fighters.csv', index=False)
df_rankings_fighters

In [41]:
def get_profiles():
    profiles = []
    for index, fighter in df_rankings_fighters.iterrows():
        fighterId = fighter['Id'].replace(":", "%3A")
        profile_url = "https://api.sportradar.com/mma/trial/v2/en/competitors/" + fighterId + "/profile.json"
        response_fighter = requests.get(profile_url, headers=headers)
        res_fighter = json.loads(response_fighter.content)
        competitor_profile = res_fighter.get('competitor')
        if competitor_profile is not None:
            profile_id = competitor_profile.get('id')
            info = res_fighter.get('info')
            country = info.get('birth_country')
            Birth_country_code = info.get('birth_country_code')
            DOB = info.get('birth_date')
            reach = info.get('reach')
            Height = info.get('height')
            Weight = info.get('weight')
            Nickname = info.get('nickname')
            record = res_fighter.get('record')
            Wins = record.get('wins')
            Draws = record.get('draws')
            Losses = record.get('losses')
            profiles.append({
                'Id': profile_id,
                'Country': country,
                'Brith_Code': Birth_country_code,
                'DOB': DOB,
                'Reach': reach,
                'Height': Height,
                'Weight': Weight,
                'Nickname': Nickname,
                'Wins': Wins,
                'Draws': Draws,
                'Losses': Losses
            })
    return profiles

Unnamed: 0,Id,Country,Brith_Code,DOB,Reach,Height,Weight,Nickname,Wins,Draws,Losses
0,sr:competitor:251835,RUSSIAN FEDERATION,RUS,1991-09-27,179,178,70.3,,27,0,2
1,sr:competitor:750503,GERMANY,DEU,1997-01-21,175,170,65.5,El Matador,16,0,0
2,sr:competitor:399183,GEORGIA,GEO,1991-01-10,173,168,60.8,The Machine,18,0,4
3,sr:competitor:400461,,,1994-01-14,193,185,83.9,Stillknocks,23,0,2
4,sr:competitor:419867,RUSSIAN FEDERATION,RUS,1992-06-02,191,191,93.0,,21,1,1
...,...,...,...,...,...,...,...,...,...,...,...
184,sr:competitor:1049265,BRAZIL,BRA,1997-08-29,178,175,61.2,,9,0,1
185,sr:competitor:1027333,,,1989-06-12,170,169,62.6,,9,0,2
186,sr:competitor:246049,UNITED STATES,USA,1986-08-18,165,168,61.5,Cupcake,20,0,10
187,sr:competitor:768194,PANAMA,PAN,1995-09-29,178,173,61.7,La Pantera,15,0,6


In [15]:
df_profiles_fighters = pd.DataFrame(get_profiles())
df_profiles_fighters.to_csv('./csv/df_profiles_fighters.csv', index=False)
df_profiles_fighters

NameError: name 'df_profiles_fighters' is not defined

In [66]:
# In the Dataframe fighter_df_info we have the best 15 rankings per category
fighters_df = pd.merge(df_rankings_fighters, df_profiles_fighters, on='Id', how='left')
fighters_df_info = fighters_df.drop_duplicates(subset=['Id', 'Category'])
fighters_df_info = fighters_df_info.rename(columns={'Year': 'Year_Ranking', "Week": 'Week_ranking'})
fighters_df_info

Unnamed: 0,Name,Id,Ranking,Gender,Category,Year_Ranking,Week_ranking,Country,Brith_Code,DOB,Reach,Height,Weight,Nickname,Wins,Draws,Losses
0,"Makhachev, Islam",sr:competitor:251835,1,male,pound_for_pound,2025,19,RUSSIAN FEDERATION,RUS,1991-09-27,179.0,178.0,70.3,,27.0,0.0,2.0
2,"Jones, Jon",sr:competitor:253371,2,male,pound_for_pound,2025,19,UNITED STATES,USA,1987-07-19,215.0,194.0,112.5,Bones,28.0,0.0,1.0
3,"Topuria, Ilia",sr:competitor:750503,3,male,pound_for_pound,2025,19,GERMANY,DEU,1997-01-21,175.0,170.0,65.5,El Matador,16.0,0.0,0.0
5,"Dvalishvili, Merab",sr:competitor:399183,4,male,pound_for_pound,2025,19,GEORGIA,GEO,1991-01-10,173.0,168.0,60.8,The Machine,18.0,0.0,4.0
7,"Du Plessis, Dricus",sr:competitor:400461,5,male,pound_for_pound,2025,19,,,1994-01-14,193.0,185.0,83.9,Stillknocks,23.0,0.0,2.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
220,"Cavalcanti, Jacqueline",sr:competitor:1049265,11,female,womens_bantamweight,2025,19,BRAZIL,BRA,1997-08-29,178.0,175.0,61.2,,9.0,0.0,1.0
221,"Cornolle, Nora",sr:competitor:1027333,12,female,womens_bantamweight,2025,19,,,1989-06-12,170.0,169.0,62.6,,9.0,0.0,2.0
222,"Tate, Miesha",sr:competitor:246049,13,female,womens_bantamweight,2025,19,UNITED STATES,USA,1986-08-18,165.0,168.0,61.5,Cupcake,20.0,0.0,10.0
223,"Edwards, Joselyne",sr:competitor:768194,14,female,womens_bantamweight,2025,19,PANAMA,PAN,1995-09-29,178.0,173.0,61.7,La Pantera,15.0,0.0,6.0


In [None]:
fighters_df_info.to_csv('./csv/Fighters_info.csv', index=False)

In [42]:
def get_all_seasons():
    url_seasons = base_api_url + "/seasons.json"
    response_seasons = requests.get(url_seasons, headers=headers)
    seasons_content = json.loads(response_seasons.content)
    seasons_values = seasons_content.get('seasons')
    seasons_result = []
    for season in seasons_values:
        start_date = season.get("start_date")
        season_name = season.get('name')
        seasons_result.append({
            'Date': start_date,
            'Name' : season_name
        })
    return seasons_result


In [41]:
def get_all_fights_by_date(date):
    url_fights = base_api_url + '/schedules/' + date + '/summaries.json'
    response_fights = requests.get(url_fights, headers=headers)
    fights_content = json.loads(response_fights.content)
    fights_values = fights_content.get('summaries')
    fights_results = []
    if fights_values is not None:
        for fight in fights_values:
            sport_event = fight.get('sport_event')
            sport_event_context = sport_event.get('sport_event_context')
            competition = sport_event_context.get('competition')
            competition_name = competition.get('name')
            competitors = sport_event.get('competitors')
            sport_event_status = fight.get('sport_event_status')
            event_status = sport_event_status.get('status')
            if event_status == 'closed':
                winner_id = sport_event_status.get('winner_id')
                final_round = sport_event_status.get('final_round')
                final_round_length = sport_event_status.get('final_round_length')
                method = sport_event_status.get('method')
                scheduled_length = sport_event_status.get('scheduled_length')
                weight_class = sport_event_status.get('weight_class')
                winner = None
                loser = None
                for compet in competitors:
                    if compet.get('id') == winner_id:
                        winner = compet
                    else:
                        loser = compet
                if winner is not None and loser is not None:
                    winner_name = winner.get('name')
                    winner_abbrev = winner.get('abbreviation')
                    winner_gender = winner.get('gender')
                    winner_qualifier = winner.get('qualifier')
                    loser_name = loser.get('name')
                    loser_abbrev = loser.get('abbreviation')
                    loser_gender = loser.get('gender')
                    loser_qualifier = loser.get('qualifier')
                    loser_id = loser.get('id')
                    fights_results.append({
                        "Date": date,
                        'Competition' : competition_name,
                        "Winner": winner_name,
                        'Winner_abbreviation': winner_abbrev,
                        'Winner_gender': winner_gender,
                        "Winner_qualifier": winner_qualifier,
                        'Winner_id': winner_id,
                        "Loser": loser_name,
                        'Loser_abbreviation': loser_abbrev,
                        'Loser_gender': loser_gender,
                        "Loser_qualifier": loser_qualifier,
                        'Loser_id': loser_id,
                        'Final_round': final_round,
                        'Final_round_length': final_round_length,
                        'Method': method,
                        'Scheduled_length': scheduled_length,
                        "Weight_class": weight_class
                    })
    return fights_results


In [43]:
def get_all_fights_data():
    seasons = get_all_seasons()
    all_fights = []
    for season in seasons:
        season_fights = get_all_fights_by_date(season['Date'])
        all_fights.extend(season_fights)
    return all_fights


In [44]:
all_fights_data = get_all_fights_data()
df_all_fights = pd.DataFrame(all_fights_data)
df_all_fights.to_csv('./csv/df_all_fights.csv', index= False)
df_all_fights

Unnamed: 0,Date,Competition,Winner,Winner_abbreviation,Winner_gender,Winner_qualifier,Winner_id,Loser,Loser_abbreviation,Loser_gender,Loser_qualifier,Loser_id,Final_round,Final_round_length,Method,Scheduled_length,Weight_class
0,2018-07-14,"UFC Fight Night - Boise, Idaho","Carmouche, Liz",CAR,female,home,sr:competitor:399181,"Maia, Jennifer",MAI,female,away,sr:competitor:542105,3,5:00,decision_unanimous,3,flyweight(116-125)
1,2018-07-14,"UFC Fight Night - Boise, Idaho","de la Rosa, Mark",DE,male,home,sr:competitor:401833,"Garcia, Elias",GAR,male,away,sr:competitor:456579,2,2:00,submission,3,flyweight(116-125)
2,2018-07-14,"UFC Fight Night - Boise, Idaho","Aguilar, Jessica",AGU,female,home,sr:competitor:333645,"Esquibel, Jodie",ESQ,female,away,sr:competitor:542045,3,5:00,decision_unanimous,3,strawweight(106-115)
3,2018-07-07,"The Ultimate Fighter Finale - Las Vegas, week 27","de la Rosa, Montana",DE,female,home,sr:competitor:457887,"Ostovich, Rachael",OST,female,away,sr:competitor:542123,3,4:21,submission,3,flyweight(116-125)
4,2018-07-07,"The Ultimate Fighter Finale - Las Vegas, week 27","Pena, Luis Antonio",PEN,male,home,sr:competitor:457889,"Smullen, Richie",SMU,male,away,sr:competitor:542149,1,3:32,submission,3,lightweight(146-155)
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1614,2025-05-17,UFC Fight Night: Burns vs. Morales,"Delvalle, Yadier",DEL,male,home,sr:competitor:1191035,"Matthews, Connor",MAT,male,away,sr:competitor:909485,1,2:54,submission,3,featherweight(136-145)
1615,2025-05-17,UFC Fight Night: Burns vs. Morales,"Gordon, Jared",GOR,male,home,sr:competitor:333581,"Moises, Thiago",MOI,male,away,sr:competitor:515356,1,3:37,ko_tko,3,lightweight(146-155)
1616,2025-05-17,UFC Fight Night: Burns vs. Morales,"Green, Gabriel",GRE,male,home,sr:competitor:658401,"Camilo, Matheus",CAM,male,away,sr:competitor:1247773,2,3:43,submission,3,lightweight(146-155)
1617,2025-05-17,UFC Fight Night: Burns vs. Morales,"Costa, Melquizael",COS,male,away,sr:competitor:980481,"Erosa, Julian",ERO,male,home,sr:competitor:246021,3,5:00,decision_unanimous,3,featherweight(136-145)
