In [3]:
from clickhouse_driver import Client
import pandas as pd
from datetime import datetime
from dotenv import load_dotenv
import os

load_dotenv()

password = os.getenv('CLICKHOUSE_PASSWORD')

client = Client(
    host='localhost',
    port=9000,
    database='mma_stats_bronze',
    user='default',
    password=password
)

fights_columns = [
    "Athlete_ID",
    "Opponent_ID",
    "Fight_Date",
    "Last_Round",
    "End_Time",
    "Victory_Method",
    "Fight_Result",
]

fights_query = "SELECT * FROM ufc_athletes_records"
fights = pd.DataFrame(client.execute(fights_query), columns=fights_columns)

athletes_columns = ['Athlete_ID',
    'Name',
    'Nickname',
    'Wins',
    'Losses',
    'Draws',
    'Weight_Class',
    'Significant_Strikes_Landed',
    'Significant_Strikes_Attempted',
    'Takedowns_Landed',
    'Takedowns_Attempted',
    'Age',
    'Height',
    'Weight',
    'UFC_Debut',
    'Reach',
    'Leg_Reach',
    'Significant_Strikes_Landed_Per_Min',
    'Significant_Strikes_Absorbed_Per_Min',
    'Takedowns_Average_Per_15min',
    'Submissions_Average_Per_15min',
    'Significant_Strike_Defense_Percentage',
    'Takedown_Defense',
    'Knockdowns_Average',
    'Average_Fight_Time',
    'Head_Strikes_Percent',
    'Head_Strikes_Count',
    'Body_Strikes_Percent',
    'Body_Strikes_Count',
    'Leg_Strikes_Percent',
    'Leg_Strikes_Count',
    'Win_by_KO/TKO_Count',
    'Win_by_KO/TKO_Percentage',
    'Win_by_DEC_Count',
    'Win_by_DEC_Percentage',
    'Win_by_FIN_Count',
    'Win_by_FIN_Percentage',
    'Fighting_Style',
    'Hometown',
   ]

athletes_query = "SELECT * FROM ufc_athletes"
athletes = pd.DataFrame(client.execute(athletes_query), columns=athletes_columns)

In [2]:
import pandas as pd
import numpy as np

def time_to_seconds(time_str):
    # Remove espaços em branco e divide a string em partes
    
    def convert_to_int(part):
        try:
            return int(part)
        except ValueError:
            return 0
        
    if time_str == '':
        return 0
    
    parts = time_str.strip().split(':')
    
    # Converte cada parte para inteiro
    if len(parts) == 3:  # Formato HH:MM:SS
        hours = convert_to_int(parts[0])
        minutes = convert_to_int(parts[1])
        seconds = convert_to_int(parts[2])
    elif len(parts) == 2:  # Formato MM:SS
        hours = 0
        minutes = convert_to_int(parts[0])
        seconds = convert_to_int(parts[1])
    else:  # Caso inválido
        return 0  # Ou outro valor padrão
    
    # Calcula o total de segundos
    total_seconds = hours * 3600 + minutes * 60 + seconds
    return total_seconds

In [3]:
athletes['Average_Fight_Time'] = athletes['Average_Fight_Time'].apply(time_to_seconds)

In [4]:
fill_values = {
"Name": "NO NAME",
"Nickname": "NO NICKNAME",
"Wins": 0,
"Losses": 0,
"Draws": 0,
"Weight_Class": "NO WEIGHT CLASS",
"Significant_Strikes_Landed": 0,
"Significant_Strikes_Attempted": 0,
"Takedowns_Landed": 0,
"Takedowns_Attempted": 0,
"Age": 0,
"Height": 0,
"Weight": 0,
"UFC_Debut": "NO UFC DEBUT",
"Reach": 0,
"Leg_Reach": 0,
"Significant_Strikes_Landed_Per_Min": 0,
"Significant_Strikes_Absorbed_Per_Min": 0,
"Takedowns_Average_Per_15min": 0,
"Submissions_Average_Per_15min": 0,
"Significant_Strike_Defense_Percentage": 0,
"Takedown_Defense": 0,
"Knockdowns_Average": 0,
"Average_Fight_Time": 0,
"Head_Strikes_Percent": 0,
"Head_Strikes_Count": 0,
"Body_Strikes_Percent": 0,
"Body_Strikes_Count": 0,
"Leg_Strikes_Percent": 0,
"Leg_Strikes_Count": 0,
"Win_by_KO/TKO_Count": 0,
"Win_by_KO/TKO_Percentage": 0,
"Win_by_DEC_Count": 0,
"Win_by_DEC_Percentage": 0,
"Win_by_FIN_Count": 0,
"Win_by_FIN_Percentage": 0,
"Fighting_Style": "NO FIGHTING STYLE",
"Hometown": "NO HOMETOWN",
}

athletes = athletes.fillna(value=fill_values)


In [5]:
mapping_weight_class = {
    "Featherweight": "Featherweight",
    "Lightweight": "Lightweight",
    "Heavyweight": "Heavyweight",
    "Middleweight": "Middleweight",
    "Welterweight": "Welterweight",
    "Peso-galo": "Bantamweight",
    "Light Heavyweight": "Light Heavyweight",
    "Peso-médio": "Middleweight",
    "Peso Meio-Médio": "Welterweight",
    "Flyweight": "Flyweight",
    "Peso-galo feminino": "Bantamweight",
    "Women's Strawweight": "Strawweight",
    "Peso-leve": "Lightweight",
    "Peso-pesado": "Heavyweight",
    "Bantamweight": "Bantamweight",
    "N/A": "N/A",
    "Peso-pena": "Featherweight",
    "Peso meio-pesado": "Light Heavyweight",
    "Peso-mosca": "Flyweight",
    "Peso-mosca feminino": "Flyweight",
    "Women's Bantamweight": "Bantamweight",
    "Peso-palha feminino": "Strawweight",
    "Women's Flyweight": "Flyweight",
    "Women's Featherweight": "Featherweight",
    "Peso-Pena Feminino": "Featherweight",
}

athletes['Weight_Class'] = athletes['Weight_Class'].replace(mapping_weight_class)
athletes['Nickname'] = athletes['Nickname'].replace({"": "NO NICKNAME"})

int_cols_to_fill = [
    "Wins",
    "Losses",
    "Draws",
    "Significant_Strikes_Landed",
    "Significant_Strikes_Attempted",
    "Takedowns_Landed",
    "Takedowns_Attempted",
    "Age",
    "Head_Strikes_Count",
    "Body_Strikes_Count",
    "Leg_Strikes_Count",
    "Win_by_KO/TKO_Count",
    "Win_by_DEC_Count",
    "Win_by_FIN_Count",
]

float_cols_to_fill = [
    "Height",
    "Weight",
    "Reach",
    "Leg_Reach",
    "Significant_Strikes_Landed_Per_Min",
    "Significant_Strikes_Absorbed_Per_Min",
    "Takedowns_Average_Per_15min",
    "Submissions_Average_Per_15min",
    "Significant_Strike_Defense_Percentage",
    "Takedown_Defense",
    "Knockdowns_Average",
    "Average_Fight_Time",
    "Head_Strikes_Percent",
    "Body_Strikes_Percent",
    "Leg_Strikes_Percent",
    "Win_by_KO/TKO_Percentage",
    "Win_by_DEC_Percentage",
    "Win_by_FIN_Percentage"
]
for column in int_cols_to_fill:
    athletes[column] = athletes[column].replace("N/A", "0")
    athletes[column] = athletes[column].replace("", "0")
    athletes[column] = athletes[column].astype(int)

for column in float_cols_to_fill:
    athletes[column] = athletes[column].replace("N/A", "0")
    athletes[column] = athletes[column].replace("", "0")
    athletes[column] = athletes[column].astype(float)


In [6]:
athletes["Significant_Strikes_Ratio"] = athletes["Significant_Strikes_Landed"] / athletes["Significant_Strikes_Attempted"]
athletes["Takedowns_Ratio"] = athletes["Takedowns_Landed"] / athletes["Takedowns_Attempted"]

athletes["Significant_Strikes_Ratio"] = athletes["Significant_Strikes_Ratio"].replace(np.inf, 0)
athletes["Takedowns_Ratio"] = athletes["Takedowns_Ratio"].replace(np.inf, 0)

athletes["Significant_Strikes_Ratio"] = athletes["Significant_Strikes_Ratio"].replace(np.nan, 0)
athletes["Takedowns_Ratio"] = athletes["Takedowns_Ratio"].replace(np.nan, 0)

In [7]:
import pandas as pd

# Dicionário de mapeamento inicial
style_mapping = {
    "Jiu-Jitsu": "Grappling",
    "Brazilian Jiu-Jitsu": "Grappling",
    "Wrestling": "Grappling",
    "Wrestler": "Grappling",
    "Judo": "Grappling",
    "Sambo": "Grappling",
    "Grappler": "Grappling",
    "Striker": "Striking",
    "Muay Thai": "Striking",
    "Kickboxer": "Striking",
    "Boxing": "Striking",
    "Boxer": "Striking",
    "Karate": "Striking",
    "Taekwondo": "Striking",
    "Kung Fu": "Striking",
    "Kung-Fu": "Striking",
    "Brawler": "Striking",
    "MMA": "Mixed",
    "Freestyle": "Mixed",
    "N/A": "Mixed"
}

# Aplicar o mapeamento inicial
athletes["Fighting_Category"] = athletes["Fighting_Style"].replace(style_mapping)

# Função para definir a categoria com base nas estatísticas
def define_category(row):
    if row["Fighting_Category"] == "Mixed":
        # Calcular proporção de strikes e takedowns
        strike_ratio = row["Significant_Strikes_Ratio"]
        takedown_ratio = row["Takedowns_Ratio"]
        
        # Definir categoria com base na proporção dominante
        if strike_ratio > takedown_ratio:
            return "Striking"
        else:
            return "Grappling"
    else:
        return row["Fighting_Category"]

# Aplicar a função para definir a categoria final
athletes["Fighting_Category"] = athletes.apply(define_category, axis=1)

In [8]:
athletes

Unnamed: 0,Athlete_ID,Name,Nickname,Wins,Losses,Draws,Weight_Class,Significant_Strikes_Landed,Significant_Strikes_Attempted,Takedowns_Landed,...,Win_by_KO/TKO_Percentage,Win_by_DEC_Count,Win_by_DEC_Percentage,Win_by_FIN_Count,Win_by_FIN_Percentage,Fighting_Style,Hometown,Significant_Strikes_Ratio,Takedowns_Ratio,Fighting_Category
0,Muhammad-Naimov,Muhammad Naimov,"""Hillman""",11,3,0,Featherweight,159,364,0,...,55.0,3,27.0,2,18.0,MMA,"Dushanbe, Tajikistan",0.436813,0.000000,Striking
1,aalon-cruz,Aalon Cruz,NO NICKNAME,8,4,0,Lightweight,134,343,0,...,25.0,4,50.0,2,25.0,MMA,"Parrish, United States",0.390671,0.000000,Striking
2,aaron-brink,Aaron Brink,NO NICKNAME,8,3,0,Heavyweight,0,0,0,...,0.0,0,0.0,0,0.0,,Estados Unidos,0.000000,0.000000,Grappling
3,aaron-jeffery,Aaron Jeffery,NO NICKNAME,6,2,0,Middleweight,57,125,0,...,0.0,0,0.0,0,0.0,MMA,"Tillsonburg, Canada",0.456000,0.000000,Striking
4,aaron-lott,Aaron Lott,NO NICKNAME,0,0,0,Welterweight,0,0,0,...,0.0,0,0.0,0,0.0,,,0.000000,0.000000,Grappling
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2980,zhang-lipeng,Zhang Lipeng,"""The Warrior""",9,9,1,Lightweight,77,158,0,...,22.0,4,44.0,3,33.0,,United States,0.487342,0.000000,Striking
2981,zu-anyanwu,Zu Anyanwu,"""8th Wonder""",14,5,0,Heavyweight,57,133,0,...,57.0,4,29.0,2,14.0,,"Trenton, United States",0.428571,0.000000,Striking
2982,zubaira-tukhugov,Zubaira Tukhugov,"""Warrior""",20,6,1,Lightweight,357,894,7,...,35.0,12,60.0,1,5.0,Striker,"USSR, Rússia",0.399329,0.170732,Striking
2983,zviad-lazishvili,Zviad Lazishvili,NO NICKNAME,13,1,0,Bantamweight,63,160,0,...,0.0,4,31.0,9,69.0,Freestyle,"Kobuleti, Geórgia",0.393750,0.000000,Striking


In [11]:
from pandas import DataFrame
import pandas as pd
import numpy as np


import pandas as pd

def time_to_seconds(time_str):
    # Remove espaços em branco e divide a string em partes
    
    def convert_to_int(part):
        try:
            return int(part)
        except ValueError:
            return 0
        
    if pd.isna(time_str) or time_str == '':
        return 0
    
    parts = time_str.strip().split(':')
    
    # Converte cada parte para inteiro
    if len(parts) == 3:  # Formato HH:MM:SS
        hours = convert_to_int(parts[0])
        minutes = convert_to_int(parts[1])
        seconds = convert_to_int(parts[2])
    elif len(parts) == 2:  # Formato MM:SS
        hours = 0
        minutes = convert_to_int(parts[0])
        seconds = convert_to_int(parts[1])
    else:  # Caso inválido
        return 0  # Ou outro valor padrão
    
    # Calcula o total de segundos
    total_seconds = hours * 3600 + minutes * 60 + seconds
    return total_seconds

def ufc_athletes_handler(athletes: DataFrame) -> DataFrame:
    athletes['Average_Fight_Time'] = athletes['Average_Fight_Time'].apply(time_to_seconds)
        
    fill_values = {
    "Name": "NO NAME",
    "Nickname": "NO NICKNAME",
    "Wins": 0,
    "Losses": 0,
    "Draws": 0,
    "Weight_Class": "NO WEIGHT CLASS",
    "Significant_Strikes_Landed": 0,
    "Significant_Strikes_Attempted": 0,
    "Takedowns_Landed": 0,
    "Takedowns_Attempted": 0,
    "Age": 0,
    "Height": 0,
    "Weight": 0,
    "UFC_Debut": "NO UFC DEBUT",
    "Reach": 0,
    "Leg_Reach": 0,
    "Significant_Strikes_Landed_Per_Min": 0,
    "Significant_Strikes_Absorbed_Per_Min": 0,
    "Takedowns_Average_Per_15min": 0,
    "Submissions_Average_Per_15min": 0,
    "Significant_Strike_Defense_Percentage": 0,
    "Takedown_Defense": 0,
    "Knockdowns_Average": 0,
    "Average_Fight_Time": 0,
    "Head_Strikes_Percent": 0,
    "Head_Strikes_Count": 0,
    "Body_Strikes_Percent": 0,
    "Body_Strikes_Count": 0,
    "Leg_Strikes_Percent": 0,
    "Leg_Strikes_Count": 0,
    "Win_by_KO/TKO_Count": 0,
    "Win_by_KO/TKO_Percentage": 0,
    "Win_by_DEC_Count": 0,
    "Win_by_DEC_Percentage": 0,
    "Win_by_FIN_Count": 0,
    "Win_by_FIN_Percentage": 0,
    "Fighting_Style": "NO FIGHTING STYLE",
    "Hometown": "NO HOMETOWN",
    }

    athletes = athletes.fillna(value=fill_values)

    mapping_weight_class = {
        "Featherweight": "Featherweight",
        "Lightweight": "Lightweight",
        "Heavyweight": "Heavyweight",
        "Middleweight": "Middleweight",
        "Welterweight": "Welterweight",
        "Peso-galo": "Bantamweight",
        "Light Heavyweight": "Light Heavyweight",
        "Peso-médio": "Middleweight",
        "Peso Meio-Médio": "Welterweight",
        "Flyweight": "Flyweight",
        "Peso-galo feminino": "Bantamweight",
        "Women's Strawweight": "Strawweight",
        "Peso-leve": "Lightweight",
        "Peso-pesado": "Heavyweight",
        "Bantamweight": "Bantamweight",
        "N/A": "N/A",
        "Peso-pena": "Featherweight",
        "Peso meio-pesado": "Light Heavyweight",
        "Peso-mosca": "Flyweight",
        "Peso-mosca feminino": "Flyweight",
        "Women's Bantamweight": "Bantamweight",
        "Peso-palha feminino": "Strawweight",
        "Women's Flyweight": "Flyweight",
        "Women's Featherweight": "Featherweight",
        "Peso-Pena Feminino": "Featherweight",
    }

    athletes['Weight_Class'] = athletes['Weight_Class'].replace(mapping_weight_class)
    athletes['Nickname'] = athletes['Nickname'].replace({"": "NO NICKNAME"})

    int_cols_to_fill = [
        "Wins",
        "Losses",
        "Draws",
        "Significant_Strikes_Landed",
        "Significant_Strikes_Attempted",
        "Takedowns_Landed",
        "Takedowns_Attempted",
        "Age",
        "Head_Strikes_Count",
        "Body_Strikes_Count",
        "Leg_Strikes_Count",
        "Win_by_KO/TKO_Count",
        "Win_by_DEC_Count",
        "Win_by_FIN_Count",
    ]

    float_cols_to_fill = [
        "Height",
        "Weight",
        "Reach",
        "Leg_Reach",
        "Significant_Strikes_Landed_Per_Min",
        "Significant_Strikes_Absorbed_Per_Min",
        "Takedowns_Average_Per_15min",
        "Submissions_Average_Per_15min",
        "Significant_Strike_Defense_Percentage",
        "Takedown_Defense",
        "Knockdowns_Average",
        "Average_Fight_Time",
        "Head_Strikes_Percent",
        "Body_Strikes_Percent",
        "Leg_Strikes_Percent",
        "Win_by_KO/TKO_Percentage",
        "Win_by_DEC_Percentage",
        "Win_by_FIN_Percentage"
    ]
    for column in int_cols_to_fill:
        athletes[column] = athletes[column].replace("N/A", "0")
        athletes[column] = athletes[column].replace("", "0")
        athletes[column] = athletes[column].astype(int)

    for column in float_cols_to_fill:
        athletes[column] = athletes[column].replace("N/A", "0")
        athletes[column] = athletes[column].replace("", "0")
        athletes[column] = athletes[column].astype(float)

    athletes["Significant_Strikes_Ratio"] = athletes["Significant_Strikes_Landed"] / athletes["Significant_Strikes_Attempted"]
    athletes["Takedowns_Ratio"] = athletes["Takedowns_Landed"] / athletes["Takedowns_Attempted"]

    athletes["Significant_Strikes_Ratio"] = athletes["Significant_Strikes_Ratio"].replace(np.inf, 0)
    athletes["Takedowns_Ratio"] = athletes["Takedowns_Ratio"].replace(np.inf, 0)

    athletes["Significant_Strikes_Ratio"] = athletes["Significant_Strikes_Ratio"].replace(np.nan, 0)
    athletes["Takedowns_Ratio"] = athletes["Takedowns_Ratio"].replace(np.nan, 0)

    # Dicionário de mapeamento inicial
    style_mapping = {
        "Jiu-Jitsu": "Grappling",
        "Brazilian Jiu-Jitsu": "Grappling",
        "Wrestling": "Grappling",
        "Wrestler": "Grappling",
        "Judo": "Grappling",
        "Sambo": "Grappling",
        "Grappler": "Grappling",
        "Striker": "Striking",
        "Muay Thai": "Striking",
        "Kickboxer": "Striking",
        "Boxing": "Striking",
        "Boxer": "Striking",
        "Karate": "Striking",
        "Taekwondo": "Striking",
        "Kung Fu": "Striking",
        "Kung-Fu": "Striking",
        "Brawler": "Striking",
        "MMA": "Mixed",
        "Freestyle": "Mixed",
        "N/A": "Mixed"
    }

    # Aplicar o mapeamento inicial
    athletes["Fighting_Category"] = athletes["Fighting_Style"].replace(style_mapping)

    # Função para definir a categoria com base nas estatísticas
    def define_category(row):
        if row["Fighting_Category"] == "Mixed":
            # Calcular proporção de strikes e takedowns
            strike_ratio = row["Significant_Strikes_Ratio"]
            takedown_ratio = row["Takedowns_Ratio"]
            
            # Definir categoria com base na proporção dominante
            if strike_ratio > takedown_ratio:
                return "Striking"
            else:
                return "Grappling"
        else:
            return row["Fighting_Category"]

    # Aplicar a função para definir a categoria final
    athletes["Fighting_Category"] = athletes.apply(define_category, axis=1)

    return athletes

In [12]:
athletes = ufc_athletes_handler(athletes)

In [13]:
athletes

Unnamed: 0,Athlete_ID,Name,Nickname,Wins,Losses,Draws,Weight_Class,Significant_Strikes_Landed,Significant_Strikes_Attempted,Takedowns_Landed,...,Win_by_KO/TKO_Percentage,Win_by_DEC_Count,Win_by_DEC_Percentage,Win_by_FIN_Count,Win_by_FIN_Percentage,Fighting_Style,Hometown,Significant_Strikes_Ratio,Takedowns_Ratio,Fighting_Category
0,Muhammad-Naimov,Muhammad Naimov,"""Hillman""",11,3,0,Featherweight,159,364,0,...,55.0,3,27.0,2,18.0,MMA,"Dushanbe, Tajikistan",0.436813,0.000000,Striking
1,aalon-cruz,Aalon Cruz,NO NICKNAME,8,4,0,Lightweight,134,343,0,...,25.0,4,50.0,2,25.0,MMA,"Parrish, United States",0.390671,0.000000,Striking
2,aaron-brink,Aaron Brink,NO NICKNAME,8,3,0,Heavyweight,0,0,0,...,0.0,0,0.0,0,0.0,,Estados Unidos,0.000000,0.000000,Grappling
3,aaron-jeffery,Aaron Jeffery,NO NICKNAME,6,2,0,Middleweight,57,125,0,...,0.0,0,0.0,0,0.0,MMA,"Tillsonburg, Canada",0.456000,0.000000,Striking
4,aaron-lott,Aaron Lott,NO NICKNAME,0,0,0,Welterweight,0,0,0,...,0.0,0,0.0,0,0.0,,,0.000000,0.000000,Grappling
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2980,zhang-lipeng,Zhang Lipeng,"""The Warrior""",9,9,1,Lightweight,77,158,0,...,22.0,4,44.0,3,33.0,,United States,0.487342,0.000000,Striking
2981,zu-anyanwu,Zu Anyanwu,"""8th Wonder""",14,5,0,Heavyweight,57,133,0,...,57.0,4,29.0,2,14.0,,"Trenton, United States",0.428571,0.000000,Striking
2982,zubaira-tukhugov,Zubaira Tukhugov,"""Warrior""",20,6,1,Lightweight,357,894,7,...,35.0,12,60.0,1,5.0,Striker,"USSR, Rússia",0.399329,0.170732,Striking
2983,zviad-lazishvili,Zviad Lazishvili,NO NICKNAME,13,1,0,Bantamweight,63,160,0,...,0.0,4,31.0,9,69.0,Freestyle,"Kobuleti, Geórgia",0.393750,0.000000,Striking


In [18]:
fights_columns = [
    "Athlete_ID",
    "Opponent_ID",
    "Fight_Date",
    "Last_Round",
    "End_Time",
    "Victory_Method",
    "Fight_Result",
]

fights_query = "SELECT * FROM ufc_athletes_records"
fights = pd.DataFrame(client.execute(fights_query), columns=fights_columns)


In [5]:
fights

Unnamed: 0,Athlete_ID,Opponent_ID,Fight_Date,Last_Round,End_Time,Victory_Method,Fight_Result
0,Muhammad-Naimov,erik-silva,25.02.24,1,0:44,KO/TKO,Victory
1,Muhammad-Naimov,felipe-lima,22.06.24,3,1:15,Submissão,Loss
2,Muhammad-Naimov,jamie-mullarkey,03.06.23,2,2:59,KO/TKO,Victory
3,Muhammad-Naimov,nathaniel-wood,21.10.23,3,5:00,Decision - Unanimous,Victory
4,aalon-cruz,spike-carlyle,29.02.20,1,1:25,TKO,
...,...,...,...,...,...,...,...
15603,zubaira-tukhugov,phillipe-nover,10.12.15,3,05:00,DEC,Victory
15604,zubaira-tukhugov,renato-moicano,14.05.16,3,05:00,DEC,Loss
15605,zubaira-tukhugov,ricardo-ramos,30.10.21,3,5:00,Decision - Unanimous,Victory
15606,zviad-lazishvili,jonathan-martinez,23.10.21,3,5:00,Decision - Unanimous,Loss


In [6]:
fights["Victory_Method"].unique()

array(['KO/TKO', 'Submissão', 'Decision - Unanimous', 'TKO', 'FIN',
       'Decision - Split', "TKO - Doctor's Stoppage",
       'Decision - Majority', 'Could Not Continue', 'DEC', 'N/A', 'DQ',
       'ko/tko', 'Overturned', 'Dec', 'sub', 'dec', 'TBD', 'DRAW',
       'No Contest', 'Majority Decision', 'TKO - Doctor Stoppage',
       'submission', 'KO', 'DECISION - UNANIMOUS', 'DECISION-UNANIMOUS',
       'Fin', 'DEc', 'Unanimous Decision', 'DECISION - MAJORITY',
       'Split Decision', 'Outros', 'Decision- Unanimous', 'UD', '2'],
      dtype=object)

In [7]:
victory_method_mapping = {
    # Categoria: KO/TKO
    'KO/TKO': 'KO/TKO',
    'TKO': 'KO/TKO',
    'ko/tko': 'KO/TKO',
    'KO': 'KO/TKO',
    "TKO - Doctor's Stoppage": 'KO/TKO',
    'TKO - Doctor Stoppage': 'KO/TKO',
    'FIN': 'KO/TKO',
    'Fin': 'KO/TKO',

    # Categoria: Submission
    'Submissão': 'Submission',
    'sub': 'Submission',
    'submission': 'Submission',

    # Categoria: Unanimous Decision
    'Decision - Unanimous': 'Unanimous Decision',
    'DECISION - UNANIMOUS': 'Unanimous Decision',
    'DECISION-UNANIMOUS': 'Unanimous Decision',
    'Unanimous Decision': 'Unanimous Decision',
    'Decision- Unanimous': 'Unanimous Decision',
    'UD': 'Unanimous Decision',
    'DEC': 'Unanimous Decision',
    'Dec': 'Unanimous Decision',
    'DEc': 'Unanimous Decision',

    # Categoria: Split Decision
    'Decision - Split': 'Split Decision',
    'Split Decision': 'Split Decision',

    # Categoria: Majority Decision
    'Decision - Majority': 'Majority Decision',
    'DECISION - MAJORITY': 'Majority Decision',
    'Majority Decision': 'Majority Decision',

    # Categoria: Draw
    'DRAW': 'Draw',
    'Draw': 'Draw',

    # Categoria: No Contest
    'No Contest': 'No Contest',
    'Could Not Continue': 'No Contest',
    'Overturned': 'No Contest',

    # Categoria: Disqualification
    'DQ': 'Disqualification',

    # Categoria: Outros
    'TBD': 'Outros',
    '2': 'Outros',
    'Outros': 'Outros',
    'N/A': 'Outros',
}

In [8]:
fights["Victory_Method"] = fights["Victory_Method"].replace(victory_method_mapping)

In [9]:
fights

Unnamed: 0,Athlete_ID,Opponent_ID,Fight_Date,Last_Round,End_Time,Victory_Method,Fight_Result
0,Muhammad-Naimov,erik-silva,25.02.24,1,0:44,KO/TKO,Victory
1,Muhammad-Naimov,felipe-lima,22.06.24,3,1:15,Submission,Loss
2,Muhammad-Naimov,jamie-mullarkey,03.06.23,2,2:59,KO/TKO,Victory
3,Muhammad-Naimov,nathaniel-wood,21.10.23,3,5:00,Unanimous Decision,Victory
4,aalon-cruz,spike-carlyle,29.02.20,1,1:25,KO/TKO,
...,...,...,...,...,...,...,...
15603,zubaira-tukhugov,phillipe-nover,10.12.15,3,05:00,Unanimous Decision,Victory
15604,zubaira-tukhugov,renato-moicano,14.05.16,3,05:00,Unanimous Decision,Loss
15605,zubaira-tukhugov,ricardo-ramos,30.10.21,3,5:00,Unanimous Decision,Victory
15606,zviad-lazishvili,jonathan-martinez,23.10.21,3,5:00,Unanimous Decision,Loss


In [10]:
def time_to_seconds(time_str):
    # Remove espaços em branco e divide a string em partes
    
    def convert_to_int(part):
        try:
            return int(part)
        except ValueError:
            return 0
        
    if pd.isna(time_str) or time_str == '':
        return 0
    
    parts = time_str.strip().split(':')
    
    # Converte cada parte para inteiro
    if len(parts) == 3:  # Formato HH:MM:SS
        hours = convert_to_int(parts[0])
        minutes = convert_to_int(parts[1])
        seconds = convert_to_int(parts[2])
    elif len(parts) == 2:  # Formato MM:SS
        hours = 0
        minutes = convert_to_int(parts[0])
        seconds = convert_to_int(parts[1])
    else:  # Caso inválido
        return 0  # Ou outro valor padrão
    
    # Calcula o total de segundos
    total_seconds = hours * 3600 + minutes * 60 + seconds
    return total_seconds

In [11]:
fights["End_Time"] = fights["End_Time"].apply(time_to_seconds)

In [12]:
fights

Unnamed: 0,Athlete_ID,Opponent_ID,Fight_Date,Last_Round,End_Time,Victory_Method,Fight_Result
0,Muhammad-Naimov,erik-silva,25.02.24,1,44,KO/TKO,Victory
1,Muhammad-Naimov,felipe-lima,22.06.24,3,75,Submission,Loss
2,Muhammad-Naimov,jamie-mullarkey,03.06.23,2,179,KO/TKO,Victory
3,Muhammad-Naimov,nathaniel-wood,21.10.23,3,300,Unanimous Decision,Victory
4,aalon-cruz,spike-carlyle,29.02.20,1,85,KO/TKO,
...,...,...,...,...,...,...,...
15603,zubaira-tukhugov,phillipe-nover,10.12.15,3,300,Unanimous Decision,Victory
15604,zubaira-tukhugov,renato-moicano,14.05.16,3,300,Unanimous Decision,Loss
15605,zubaira-tukhugov,ricardo-ramos,30.10.21,3,300,Unanimous Decision,Victory
15606,zviad-lazishvili,jonathan-martinez,23.10.21,3,300,Unanimous Decision,Loss


In [13]:
fights.dtypes

Athlete_ID        object
Opponent_ID       object
Fight_Date        object
Last_Round        object
End_Time           int64
Victory_Method    object
Fight_Result      object
dtype: object

In [14]:
fights["Last_Round"].unique()

array(['1', '3', '2', 'N/A', '5', '4'], dtype=object)

In [15]:
fights["Last_Round"] = fights["Last_Round"].replace({"N/A": "0"}).astype(int)

In [16]:
fights["Fight_Time"] = fights["End_Time"] + (fights["Last_Round"] - 1) * 360


In [17]:
fights

Unnamed: 0,Athlete_ID,Opponent_ID,Fight_Date,Last_Round,End_Time,Victory_Method,Fight_Result,Fight_Time
0,Muhammad-Naimov,erik-silva,25.02.24,1,44,KO/TKO,Victory,44
1,Muhammad-Naimov,felipe-lima,22.06.24,3,75,Submission,Loss,795
2,Muhammad-Naimov,jamie-mullarkey,03.06.23,2,179,KO/TKO,Victory,539
3,Muhammad-Naimov,nathaniel-wood,21.10.23,3,300,Unanimous Decision,Victory,1020
4,aalon-cruz,spike-carlyle,29.02.20,1,85,KO/TKO,,85
...,...,...,...,...,...,...,...,...
15603,zubaira-tukhugov,phillipe-nover,10.12.15,3,300,Unanimous Decision,Victory,1020
15604,zubaira-tukhugov,renato-moicano,14.05.16,3,300,Unanimous Decision,Loss,1020
15605,zubaira-tukhugov,ricardo-ramos,30.10.21,3,300,Unanimous Decision,Victory,1020
15606,zviad-lazishvili,jonathan-martinez,23.10.21,3,300,Unanimous Decision,Loss,1020


In [19]:
fights["End_Time"] = fights["End_Time"].apply(time_to_seconds)

victory_method_mapping = {
    # Categoria: KO/TKO
    'KO/TKO': 'KO/TKO',
    'TKO': 'KO/TKO',
    'ko/tko': 'KO/TKO',
    'KO': 'KO/TKO',
    "TKO - Doctor's Stoppage": 'KO/TKO',
    'TKO - Doctor Stoppage': 'KO/TKO',
    'FIN': 'KO/TKO',
    'Fin': 'KO/TKO',

    # Categoria: Submission
    'Submissão': 'Submission',
    'sub': 'Submission',
    'submission': 'Submission',

    # Categoria: Unanimous Decision
    'Decision - Unanimous': 'Unanimous Decision',
    'DECISION - UNANIMOUS': 'Unanimous Decision',
    'DECISION-UNANIMOUS': 'Unanimous Decision',
    'Unanimous Decision': 'Unanimous Decision',
    'Decision- Unanimous': 'Unanimous Decision',
    'UD': 'Unanimous Decision',
    'DEC': 'Unanimous Decision',
    'Dec': 'Unanimous Decision',
    'DEc': 'Unanimous Decision',

    # Categoria: Split Decision
    'Decision - Split': 'Split Decision',
    'Split Decision': 'Split Decision',

    # Categoria: Majority Decision
    'Decision - Majority': 'Majority Decision',
    'DECISION - MAJORITY': 'Majority Decision',
    'Majority Decision': 'Majority Decision',

    # Categoria: Draw
    'DRAW': 'Draw',
    'Draw': 'Draw',

    # Categoria: No Contest
    'No Contest': 'No Contest',
    'Could Not Continue': 'No Contest',
    'Overturned': 'No Contest',

    # Categoria: Disqualification
    'DQ': 'Disqualification',

    # Categoria: Outros
    'TBD': 'Outros',
    '2': 'Outros',
    'Outros': 'Outros',
    'N/A': 'Outros',
}

fights["Victory_Method"] = fights["Victory_Method"].replace(victory_method_mapping)
fights["Last_Round"] = fights["Last_Round"].replace({"N/A": "0"}).astype(int)

fights["Fight_Time"] = fights["End_Time"] + (fights["Last_Round"] - 1) * 360

In [20]:
fights

Unnamed: 0,Athlete_ID,Opponent_ID,Fight_Date,Last_Round,End_Time,Victory_Method,Fight_Result,Fight_Time
0,Muhammad-Naimov,erik-silva,25.02.24,1,44,KO/TKO,Victory,44
1,Muhammad-Naimov,felipe-lima,22.06.24,3,75,Submission,Loss,795
2,Muhammad-Naimov,jamie-mullarkey,03.06.23,2,179,KO/TKO,Victory,539
3,Muhammad-Naimov,nathaniel-wood,21.10.23,3,300,Unanimous Decision,Victory,1020
4,aalon-cruz,spike-carlyle,29.02.20,1,85,KO/TKO,,85
...,...,...,...,...,...,...,...,...
15603,zubaira-tukhugov,phillipe-nover,10.12.15,3,300,Unanimous Decision,Victory,1020
15604,zubaira-tukhugov,renato-moicano,14.05.16,3,300,Unanimous Decision,Loss,1020
15605,zubaira-tukhugov,ricardo-ramos,30.10.21,3,300,Unanimous Decision,Victory,1020
15606,zviad-lazishvili,jonathan-martinez,23.10.21,3,300,Unanimous Decision,Loss,1020


In [21]:
fights.dtypes

Athlete_ID        object
Opponent_ID       object
Fight_Date        object
Last_Round         int64
End_Time           int64
Victory_Method    object
Fight_Result      object
Fight_Time         int64
dtype: object