In [1]:
import pandas as pd
import numpy as np
import matplotlib as pt
import ast


In [2]:
fighters = pd.read_csv('../fightersData.csv')
fights = pd.read_csv('../UFC.csv')
rank =  pd.read_csv('../rankings.csv')


In [3]:
fighter_cols_red = [
    "r_name", "r_height", "r_weight", "r_reach", "r_wins", "r_losses", "r_draws",
    "r_str_acc", "r_sapm", "r_str_def", "r_td_avg", "r_td_def", "r_sub_avg",
    "r_stance", "r_dob", "r_nick_name", "r_splm", "r_td_avg_acc", "division"
]

fighter_cols_blue = [
    "b_name", "b_height", "b_weight", "b_reach", "b_wins", "b_losses", "b_draws",
    "b_str_acc", "b_sapm", "b_str_def", "b_td_avg", "b_td_def", "b_sub_avg",
    "b_stance", "b_dob", "b_nick_name", "b_splm", "b_td_avg_acc", "division"
]

# Create red corner dataframe
red_df = fights[fighter_cols_red].rename(columns=lambda x: x.replace("r_", "") if x != "division" else x)

# Create blue corner dataframe  
blue_df = fights[fighter_cols_blue].rename(columns=lambda x: x.replace("b_", "") if x != "division" else x)

# Combine both dataframes
fighters_df = pd.concat([red_df, blue_df], ignore_index=True)

# Create comprehensive fighter averages
fighter_avg = fighters_df.groupby("name").agg({
    "height": "mean",
    "weight": "mean", 
    "reach": "mean",
    "wins": "max",  # Take max as it's cumulative
    "losses": "max",  # Take max as it's cumulative
    "draws": "max",  # Take max as it's cumulative
    "str_acc": "mean",  # Strike accuracy average
    "sapm": "mean",  # Significant strikes absorbed per minute
    "str_def": "mean",  # Strike defense percentage
    "td_avg": "mean",  # Takedown average per 15 minutes
    "td_def": "mean",  # Takedown defense percentage
    "sub_avg": "mean",  # Submission attempts per 15 minutes
    "td_avg_acc": "mean",  # Takedown accuracy average
    "splm": "mean",  # Significant strikes landed per minute
    "stance": lambda x: x.mode()[0] if not x.mode().empty else None,  # Most common stance
    "nick_name": "first",  # Take first occurrence
    "dob": "first",  # Date of birth (should be consistent)
    "division": lambda x: list(x.unique())  # List of all divisions fought in
}).reset_index()

In [4]:
#only 1 weightclass
fighter_cols_red = [
    "r_name", "r_height", "r_weight", "r_reach", "r_wins", "r_losses", "r_draws",
    "r_str_acc", "r_sapm", "r_str_def", "r_td_avg", "r_td_def", "r_sub_avg",
    "r_stance", "r_dob", "r_nick_name", "r_splm", "r_td_avg_acc", 
    "r_kd", "r_sub_att", "r_ctrl", "r_sig_str_landed", "r_sig_str_atmpted",
    "r_td_landed", "r_td_atmpted", "division", "winner", "method", "finish_round", "total_rounds", "title_fight"
]

fighter_cols_blue = [
    "b_name", "b_height", "b_weight", "b_reach", "b_wins", "b_losses", "b_draws",
    "b_str_acc", "b_sapm", "b_str_def", "b_td_avg", "b_td_def", "b_sub_avg",
    "b_stance", "b_dob", "b_nick_name", "b_splm", "b_td_avg_acc",
    "b_kd", "b_sub_att", "b_ctrl", "b_sig_str_landed", "b_sig_str_atmpted", 
    "b_td_landed", "b_td_atmpted", "division", "winner", "method", "finish_round", "total_rounds", "title_fight"
]

# Create red corner dataframe
red_df = fights[fighter_cols_red].rename(columns=lambda x: x.replace("r_", "") if x not in ["division", "winner", "method", "finish_round", "total_rounds"] else x)
red_df['corner'] = 'red'

# Create blue corner dataframe  
blue_df = fights[fighter_cols_blue].rename(columns=lambda x: x.replace("b_", "") if x not in ["division", "winner", "method", "finish_round", "total_rounds"] else x)
blue_df['corner'] = 'blue'

# Add winner information and conditionally assign method data
red_df['won_fight'] = red_df['winner'] == red_df['name']
blue_df['won_fight'] = blue_df['winner'] == blue_df['name']

red_df['method_losses'] = red_df['method'].where(~red_df['won_fight'])
blue_df['method_losses'] = blue_df['method'].where(~blue_df['won_fight'])

# Only keep method/finish data for winners
red_df.loc[~red_df['won_fight'], ['method', 'finish_round']] = None
blue_df.loc[~blue_df['won_fight'], ['method', 'finish_round']] = None

red_df['won_title_fight'] = (red_df['won_fight'] == True) & (red_df['title_fight'] == 1)
blue_df['won_title_fight'] = (blue_df['won_fight'] == True) & (blue_df['title_fight'] == 1)

# Combine both dataframes
fighters_df = pd.concat([red_df, blue_df], ignore_index=True)
fighters_df['main_div'] = fighters_df['division']

# Enhanced fighter averages with additional stats
fighter_avg = fighters_df.groupby("name").agg({
    # Physical attributes
    "height": "mean",
    "weight": "mean", 
    "reach": "mean",
        
    # Record (cumulative stats)
    "wins": "max",
    "losses": "max", 
    "draws": "max",
        
    # Core striking stats
    "str_acc": "mean",          # Strike accuracy average
    "sapm": "mean",             # Significant strikes absorbed per minute
    "str_def": "mean",          # Strike defense percentage
    "splm": "mean",             # Significant strikes landed per minute
        
    # Grappling stats
    "td_avg": "mean",           # Takedown average per 15 minutes
    "td_def": "mean",           # Takedown defense percentage
    "td_avg_acc": "mean",       # Takedown accuracy average
    "sub_avg": "mean",          # Submission attempts per 15 minutes
        
    # Additional performance metrics
    "kd": "sum",                # Total knockdowns across all fights
    "sub_att": "sum",           # Total submission attempts
    "ctrl": "sum",              # Total control time (seconds)
    "sig_str_landed": "sum",    # Total significant strikes landed
    "sig_str_atmpted": "sum",   # Total significant strikes attempted
    "td_landed": "sum",         # Total takedowns landed
    "td_atmpted": "sum",        # Total takedowns attempted
        
    # Fighter info
    "stance": lambda x: x.mode()[0] if not x.mode().empty else "Unknown",
    "nick_name": "first",
    "dob": "first",
    "division": lambda x: list(x.unique()),  # All divisions fought in
    "main_div": lambda x: x.mode()[0] if not x.mode().empty else x.iloc[0], 
    
    # Fight outcomes for finish rate calculation - ONLY for wins now
    "method": lambda x: [m for m in x if pd.notna(m)],  # Only methods where they won
    "method_losses": lambda x: [m for m in x if pd.notna(m)],
    "finish_round": lambda x: [r for r in x if pd.notna(r)],  # Only finish rounds where they won
    "total_rounds": "sum",  # Total rounds fought (this stays the same)
    
    "title_fight": "sum",
    "won_title_fight": "sum",
    
    # Add win count for finish rate calculations
    "corner": "count",
    "won_fight": "sum"  # Total number of wins
}).reset_index()

fighter_avg = fighter_avg.rename(columns={"corner": "total_ufc_fights"})

In [6]:
def count_win_methods(method_list):
    if not isinstance(method_list, list) or len(method_list) == 0:
        return 0, 0, 0
    
    ko_count = 0
    sub_count = 0
    dec_count = 0
    
    for method in method_list:
        method_lower = str(method).lower()
        
        if 'ko' in method_lower or 'tko' in method_lower:
            ko_count += 1
        elif 'submission' in method_lower or 'sub' in method_lower:
            sub_count += 1
        elif 'decision' in method_lower:
            dec_count += 1
    
    return ko_count, sub_count, dec_count

def count_loss_methods(method_list):
    if not isinstance(method_list, list) or len(method_list) == 0:
        return 0, 0, 0
    
    ko_count = 0
    sub_count = 0
    dec_count = 0
    nocon = 0
    for method in method_list:
        method_lower = str(method).lower()
        method_str = str(method).lower().strip()
        
        if method_str in ['', 'nan', 'draw', 'no contest', 'nc', 'Overturned', 'overturned']:
            nocon +=1
        if 'ko' in method_lower or 'tko' in method_lower:
            ko_count += 1
        elif 'submission' in method_lower or 'sub' in method_lower:
            sub_count += 1
        elif 'decision' in method_lower:
            dec_count += 1
    
    return ko_count, sub_count, dec_count, nocon
fighter_avg[['ko_losses', 'totsub_losses', 'totdec_losses', 'nocon_losses']] = fighter_avg['method_losses'].apply(
    lambda x: pd.Series(count_loss_methods(x))
)

if isinstance(fighter_avg['method'].iloc[0], str):
    fighter_avg['method'] = fighter_avg['method'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) and x != '[]' else [])

if isinstance(fighter_avg['method_losses'].iloc[0], str):
    fighter_avg['method_losses'] = fighter_avg['method_losses'].apply(lambda x: ast.literal_eval(x) if pd.notna(x) and x != '[]' else [])

fighter_avg['loss_fights'] = fighter_avg['total_ufc_fights'] - fighter_avg['won_fight'] - fighter_avg['nocon_losses']

fighter_avg[['ko_t', 'totsub', 'totdec']] = fighter_avg['method'].apply(
    lambda x: pd.Series(count_win_methods(x))
)



In [7]:
pfp_variants = ["Pound-for-Pound", "Men's Pound-for-Pound", "Women's Pound-for-Pound"]

pfp_ranks = (
    rank[rank['weightclass'].isin(pfp_variants)]
    .groupby('fighter')['rank']
    .min()
    .reset_index()
    .rename(columns={'rank': 'pfp_rank'})
)


In [8]:
div_ranks = (
    rank[rank['weightclass'] != "Pound-for-Pound"]
    .groupby('fighter')['rank']
    .min()
    .reset_index()
    .rename(columns={'rank': 'div_rank'})
)

In [9]:
fighter_avg = fighter_avg.merge(pfp_ranks, how='left', left_on='name', right_on='fighter')
fighter_avg = fighter_avg.merge(div_ranks, how='left', left_on='name', right_on='fighter')

In [10]:
fighter_avg = fighter_avg.drop(columns=['fighter_x', 'fighter_y'], errors='ignore')
fighter_avg['pfp_rank'] = fighter_avg['pfp_rank'].fillna('Unranked')
fighter_avg['div_rank'] = fighter_avg['div_rank'].fillna('Unranked')


In [11]:
fighter_avg.to_csv('../fightersDatab.csv', index=False)

In [12]:
divisions = [
    "women's strawweight",
    "women's flyweight",
    "women's bantamweight",
    "women's featherweight",
    "flyweight",
    "bantamweight",
    "featherweight",
    "lightweight",
    "welterweight",
    "middleweight",
    "light heavyweight",
    "heavyweight"
]

for div in divisions:
    temp = fighter_avg[fighter_avg["main_div"] == div].reset_index(drop=True)
    filename = div.replace(" ", "").replace("'", "")
    temp.to_csv(f"../Weightclass/{filename}.csv", index=False)


In [1]:
# for division_name, division_data in divisionsdf.items():
#     division_data['rank'] = division_data['overall'].rank(method='min', ascending=False).astype(int)

#     rank_values = division_data['rank']
#     division_data = division_data.drop('rank', axis=1)
#     division_data.insert(division_data.columns.get_loc('overall') + 1, 'rank', rank_values)

#     fighters = division_data.sort_values('rank')

#     # display(fighters)
#     fighters.to_csv(f"../SkillLevel/{division_name}.csv", index = False)