In [1]:
import pandas as pd
import numpy as np
import os

# Config
INPUT_PATH = '../data/processed/ufc_fight_stats_cleaned.csv'
OUTPUT_PATH = '../data/processed/ufc_fighter_style_features.csv'

if not os.path.exists(INPUT_PATH):
    print(f"Error: Could not find {INPUT_PATH}. Please run 01 first.")
else:
    print("Loading cleaned fight data...")
    df = pd.read_csv(INPUT_PATH)

    # Styles take time to manifest. Short knockouts should be considered noise
    df = df[df['total_fight_seconds'] > 60].copy()

    # Calculate "Per Minute" Metrics (Activity)
    # Using 60.0 to ensure float division
    duration_min = df['total_fight_seconds'] / 60.0
    
    df['Sig_Str_PM'] = df['Sig_Str_Landed'] / duration_min
    df['Takedown_Att_PM'] = df['Takedowns_Att'] / duration_min
    df['Sub_Att_PM'] = df['Sub_Attempts'] / duration_min

    # Calculate "Style Ratios" (Distance vs Clinch vs Ground)
    total_landed = (df['Distance_Strikes_Landed'] + 
                    df['Clinch_Strikes_Landed'] + 
                    df['Ground_Strikes_Landed'])
    
    # Avoid division by zero
    df['Distance_Ratio'] = np.where(total_landed > 0, df['Distance_Strikes_Landed'] / total_landed, 0)
    df['Clinch_Ratio'] = np.where(total_landed > 0, df['Clinch_Strikes_Landed'] / total_landed, 0)
    df['Ground_Ratio'] = np.where(total_landed > 0, df['Ground_Strikes_Landed'] / total_landed, 0)

    # Control Ratio (How much of the fight a fighter dictated)
    df['Control_Ratio'] = df['Control_Seconds'] / df['total_fight_seconds']

    # Aggregate to Fighter Level by taking the mean of these metrics across entire career
    style_metrics = [
        'Sig_Str_PM', 'Takedown_Att_PM', 'Sub_Att_PM', 
        'Distance_Ratio', 'Clinch_Ratio', 'Ground_Ratio', 'Control_Ratio'
    ]

    print("Aggregating into fighter profiles...")
    fighter_profiles = df.groupby('Fighter')[style_metrics].mean().reset_index()

    # Add Fight Count (for filtering later)
    fight_counts = df.groupby('Fighter')['Fight_Id'].count().reset_index().rename(columns={'Fight_Id': 'Total_Fights'})
    fighter_profiles = fighter_profiles.merge(fight_counts, on='Fighter')

    # Save
    fighter_profiles.to_csv(OUTPUT_PATH, index=False)
    print(f"Success. Saved style profiles for {len(fighter_profiles)} fighters to {OUTPUT_PATH}")
    print(fighter_profiles.head())

Loading cleaned fight data...
Aggregating into fighter profiles...
Success! Saved style profiles for 2601 fighters to ../data/processed/ufc_fighter_style_features.csv
          Fighter  Sig_Str_PM  Takedown_Att_PM  Sub_Att_PM  Distance_Ratio  \
0   AJ Cunningham    4.446377         0.000000    0.033333        0.972222   
1       AJ Dobson    3.774176         0.100000    0.000000        0.854668   
2     AJ Fletcher    2.936745         0.219114    0.071117        0.686188   
3      Aalon Cruz    0.705882         0.952941    0.000000        0.500000   
4  Aaron Phillips    1.417112         0.100000    0.033333        0.660733   

   Clinch_Ratio  Ground_Ratio  Control_Ratio  Total_Fights  
0      0.027778      0.000000       0.047101             2  
1      0.115984      0.029348       0.165626             4  
2      0.102956      0.210857       0.245997             4  
3      0.000000      0.000000       0.084412             2  
4      0.209881      0.129386       0.121951             4 