## In this notebook we are converting all of the offensive and defensive statistics into ratios. For example, avg_body_att and avg_body_landed have been compressed into a compressed success rate ratio. body_attack_succ = (avg_body_landed /avg_body_att). 

## Imports

In [10]:
import pandas as pd

## Read in the data

In [11]:
ufc_data_location = '../raw_data/preprocessed_data.csv'

ufc_data = pd.read_csv(ufc_data_location)

In [12]:
ufc_data

Unnamed: 0,Winner,title_bout,no_of_rounds,B_current_lose_streak,B_current_win_streak,B_draw,B_avg_BODY_att,B_avg_BODY_landed,B_avg_CLINCH_att,B_avg_CLINCH_landed,...,weight_class_Women's Strawweight,B_Stance_Open Stance,B_Stance_Orthodox,B_Stance_Sideways,B_Stance_Southpaw,B_Stance_Switch,R_Stance_Open Stance,R_Stance_Orthodox,R_Stance_Southpaw,R_Stance_Switch
0,Red,True,5,0.0,4.0,0.0,9.200000,6.000000,0.200000,0.000000,...,0,0,1,0,0,0,0,1,0,0
1,Red,True,5,0.0,3.0,0.0,14.600000,9.100000,11.800000,7.300000,...,0,0,1,0,0,0,0,0,1,0
2,Red,False,3,0.0,3.0,0.0,15.354839,11.322581,6.741935,4.387097,...,0,0,1,0,0,0,0,1,0,0
3,Blue,False,3,0.0,4.0,0.0,17.000000,14.000000,13.750000,11.000000,...,0,0,0,0,0,1,0,1,0,0
4,Blue,False,3,0.0,1.0,0.0,17.000000,14.500000,2.500000,2.000000,...,0,0,0,0,1,0,0,0,1,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3587,Red,False,1,0.0,1.0,0.0,1.000000,1.000000,0.000000,0.000000,...,0,0,0,0,1,0,0,0,1,0
3588,Red,False,1,0.0,1.0,0.0,0.000000,0.000000,1.000000,1.000000,...,0,0,1,0,0,0,0,1,0,0
3589,Red,True,1,0.0,2.0,0.0,0.500000,0.500000,0.000000,0.000000,...,0,0,1,0,0,0,0,0,1,0
3590,Red,False,1,0.0,1.0,0.0,0.000000,0.000000,0.000000,0.000000,...,0,0,1,0,0,0,0,0,1,0


## Generate ratios

### Convert offensive statistics

In [13]:
off_ratios_dict = {
    'body_off_success': ['avg_BODY_landed', 'avg_BODY_att'],
    'clinch_off_success': ['avg_CLINCH_landed', 'avg_CLINCH_att'],
    'distance_off_success': ['avg_DISTANCE_landed', 'avg_DISTANCE_att'],
    'ground_off_success': ['avg_GROUND_landed', 'avg_GROUND_att'],
    'head_off_success': ['avg_HEAD_landed', 'avg_HEAD_att'],
    'leg_off_success': ['avg_LEG_landed', 'avg_LEG_att'],
    'sig_str_off_success': ['avg_SIG_STR_landed', 'avg_SIG_STR_att'],
    'td_off_success': ['avg_TD_landed', 'avg_TD_att'],
    'total_landed_off_success': ['avg_TOTAL_STR_landed', 'avg_TOTAL_STR_att']  
}

In [14]:
# generate the ratios for the blue corner and red corner
for ratio in off_ratios_dict.keys():
    ratio_arr = off_ratios_dict[ratio]
    # Do this for both blue and red corner
    for corner in ['B_', 'R_']:
        left = corner + ratio_arr[0]
        right = corner + ratio_arr[1]
        ratio_name = corner + ratio

        # Generate ratios
        ufc_data[ratio_name] = ufc_data[left].div(ufc_data[right])
        # Drop columns
        ufc_data = ufc_data.drop(columns=[left, right])

        print('{} = {}/{}'.format(ratio_name, left, right))

B_body_off_success = B_avg_BODY_landed/B_avg_BODY_att
R_body_off_success = R_avg_BODY_landed/R_avg_BODY_att
B_clinch_off_success = B_avg_CLINCH_landed/B_avg_CLINCH_att
R_clinch_off_success = R_avg_CLINCH_landed/R_avg_CLINCH_att
B_distance_off_success = B_avg_DISTANCE_landed/B_avg_DISTANCE_att
R_distance_off_success = R_avg_DISTANCE_landed/R_avg_DISTANCE_att
B_ground_off_success = B_avg_GROUND_landed/B_avg_GROUND_att
R_ground_off_success = R_avg_GROUND_landed/R_avg_GROUND_att
B_head_off_success = B_avg_HEAD_landed/B_avg_HEAD_att
R_head_off_success = R_avg_HEAD_landed/R_avg_HEAD_att
B_leg_off_success = B_avg_LEG_landed/B_avg_LEG_att
R_leg_off_success = R_avg_LEG_landed/R_avg_LEG_att
B_sig_str_off_success = B_avg_SIG_STR_landed/B_avg_SIG_STR_att
R_sig_str_off_success = R_avg_SIG_STR_landed/R_avg_SIG_STR_att
B_td_off_success = B_avg_TD_landed/B_avg_TD_att
R_td_off_success = R_avg_TD_landed/R_avg_TD_att
B_total_landed_off_success = B_avg_TOTAL_STR_landed/B_avg_TOTAL_STR_att
R_total_landed_o

### Convert Defensive statistics

In [15]:
def_ratios_dict = {
    'body_def_success': ['avg_opp_BODY_landed', 'avg_opp_BODY_att'],
    'clinch_def_success': ['avg_opp_CLINCH_landed', 'avg_opp_CLINCH_att'],
    'distance_def_success': ['avg_opp_DISTANCE_landed', 'avg_opp_DISTANCE_att'],
    'ground_def_success': ['avg_opp_GROUND_landed', 'avg_opp_GROUND_att'],
    'head_def_success': ['avg_opp_HEAD_landed', 'avg_opp_HEAD_att'],
    'leg_def_success': ['avg_opp_LEG_landed', 'avg_opp_LEG_att'],
    'sig_str_def_success': ['avg_opp_SIG_STR_landed', 'avg_opp_SIG_STR_att'],
    'td_def_success': ['avg_opp_TD_landed', 'avg_opp_TD_att'],
    'total_landed_def_success': ['avg_opp_TOTAL_STR_landed', 'avg_opp_TOTAL_STR_att']  
}

In [16]:
# generate the ratios for the blue corner and red corner
for ratio in def_ratios_dict.keys():
    ratio_arr = def_ratios_dict[ratio]
    # Do this for both blue and red corner
    for corner in ['B_', 'R_']:
        left = corner + ratio_arr[0]
        right = corner + ratio_arr[1]
        ratio_name = corner + ratio

        # Generate ratios
        ufc_data[ratio_name] = ufc_data[left].div(ufc_data[right])
        # Drop columns
        ufc_data = ufc_data.drop(columns=[left, right])

        print('{} = {}/{}'.format(ratio_name, left, right))

B_body_def_success = B_avg_opp_BODY_landed/B_avg_opp_BODY_att
R_body_def_success = R_avg_opp_BODY_landed/R_avg_opp_BODY_att
B_clinch_def_success = B_avg_opp_CLINCH_landed/B_avg_opp_CLINCH_att
R_clinch_def_success = R_avg_opp_CLINCH_landed/R_avg_opp_CLINCH_att
B_distance_def_success = B_avg_opp_DISTANCE_landed/B_avg_opp_DISTANCE_att
R_distance_def_success = R_avg_opp_DISTANCE_landed/R_avg_opp_DISTANCE_att
B_ground_def_success = B_avg_opp_GROUND_landed/B_avg_opp_GROUND_att
R_ground_def_success = R_avg_opp_GROUND_landed/R_avg_opp_GROUND_att
B_head_def_success = B_avg_opp_HEAD_landed/B_avg_opp_HEAD_att
R_head_def_success = R_avg_opp_HEAD_landed/R_avg_opp_HEAD_att
B_leg_def_success = B_avg_opp_LEG_landed/B_avg_opp_LEG_att
R_leg_def_success = R_avg_opp_LEG_landed/R_avg_opp_LEG_att
B_sig_str_def_success = B_avg_opp_SIG_STR_landed/B_avg_opp_SIG_STR_att
R_sig_str_def_success = R_avg_opp_SIG_STR_landed/R_avg_opp_SIG_STR_att
B_td_def_success = B_avg_opp_TD_landed/B_avg_opp_TD_att
R_td_def_success

### Postprocess data by converting NANS to 0

In [17]:
ufc_data = ufc_data.fillna(0)

## Save new csv into the datasets directory

In [18]:
ufc_data.to_csv('../generated_data/preprocessed_ratio_data.csv', index=False)