In [1]:
# notebook globals
import pandas as pd
from dateutil.relativedelta import relativedelta

# load data
fighters = pd.read_csv('../tuf/fighters.csv', parse_dates=['dob']).apply(lambda x: x.str.strip() if x.dtype == 'object' else x)
fights = pd.read_csv('../tuf/fights.csv', parse_dates=['event_date'])
fights[['f1_name', 'f2_name']] = fights[['f1_name', 'f2_name']].apply(lambda x: x.str.strip())

# calculate fighter age at the time of the fight
def calculate_age(dob, event_date):
    if pd.isna(dob) or pd.isna(event_date): return None
    return relativedelta(event_date, dob).years

In [2]:
# add fighter metrics
for fighter_num in ['f1', 'f2']:
    fights = fights.merge(
        fighters[['name', 'height', 'reach', 'stance', 'dob']],
        left_on=f'{fighter_num}_name',
        right_on='name',
        how='left',
        suffixes=('', f'_{fighter_num}')
    )
    # rename merged column based on fighter
    fights = fights.rename(columns={
        'height': f'{fighter_num}_height',
        'reach': f'{fighter_num}_reach',
        'stance': f'{fighter_num}_stance',
        'dob': f'{fighter_num}_dob'
    })
    # calculate age at time of fight
    fights[f'{fighter_num}_age'] = fights.apply(
        lambda x: calculate_age(x[f'{fighter_num}_dob'], x['event_date']),
        axis=1
    )
    fights.drop(columns=['name'], inplace=True) # already have their names

# fighter differentials
fights['reach_diff'] = (fights['f1_reach'] - fights['f2_reach']).abs()
fights['height_diff'] = (fights['f1_height'] - fights['f2_height']).abs()
fights['age_diff'] = (fights['f1_age'] - fights['f2_age']).abs()

In [3]:
# new column labels
column_labels = [
    # original column labels
    'event_date', 'event_name', 'weight_class', 
    'f1_name', 'f2_name', 'winner', 'loser',
    'method', 'method_details', 'end_round',
    'time', 'total_time', 'outcome',
    
    # fighter metrics
    'f1_height', 'f1_reach', 'f1_stance', 'f1_age',
    'f2_height', 'f2_reach', 'f2_stance', 'f2_age',
    
    # fighter differentials
    'reach_diff', 'height_diff', 'age_diff'
]

# get the data from the columns
fights = fights[column_labels]

# output new csv file
fights.to_csv('enhanced_fights.csv', index=False)