# 04 Cleanse Fighter Page Info

## Cleansing TODOs:
- Convert reach string -> inches
- Convert weight string -> lbs
- Convert height string -> inches
- Convert fighter slug -> all lower case, no spaces or dashs

## Main Goal:
- Be able to join this data to the training stub

## Imports

In [58]:
import pandas as pd
import re

## Bring in raw data frame

In [59]:
Fighter_Info2181 = pd.read_csv('../../02_Data/01_Raw_Scraped_Data/UFC_Fighter_Page_Info2181.csv', index_col=0)

In [60]:
# Clean up column names
Fighter_Info2181.columns = ['age', 'college', 'degree', 'fights_out', 'from', 'height',
       'leg_reach', 'nickname', 'past', 'reach', 'weight', 'fighter_slug']

In [61]:
# Select relevant columns
df = Fighter_Info2181[['fighter_slug', 'reach', 'weight', 'height', 'leg_reach']].copy()

## Column cleansing

In [62]:
# Convert reach string -> inches
# There are nans in data
df.loc[df.reach.notnull(),'reach'] = df[df.reach.notnull()].reach.map(lambda x: x[:-1])

In [63]:
# Convert leg_reach string -> inches
# There are nans in data
df.loc[df.leg_reach.notnull(),'leg_reach'] = df[df.leg_reach.notnull()].leg_reach.map(lambda x: x[:-1])

In [64]:
# Convert weight to lbs
# There are nans in data
df.loc[df.weight.notnull(),'weight'] = df[df.weight.notnull()].weight.map(lambda x: x[:3])

In [65]:
# convert slug to lower case and no space or dash
df.fighter_slug = df.fighter_slug.map(lambda slug: re.sub('-','',slug).lower())

In [66]:
# Convert height to inches
def convert_height(height_string):
    feet, inches, _ = re.findall("\d+", height_string)
    feet = int(feet)
    inches = int(inches)
    total_inches = feet*12 + inches
    return total_inches

df.loc[df.height.notnull(),'height'] = df[df.height.notnull()].height.map(convert_height)

In [67]:
# Drop leg_reach since too many nans
df = df.drop(columns='leg_reach')

In [68]:
df.head(2)

Unnamed: 0,fighter_slug,reach,weight,height
0,dannyabbadi,,185,71
1,tankabbott,,250,72


## Bring in Fighter ID into this dataframe

In [69]:
stub = pd.read_csv('../../02_Data/02_Processed_Data/train_stub.csv', index_col=0)


In [70]:
fightidmap = stub[['f1_fighterid', 'f1_slug', 'eventid']]
fightidmap.columns = ['fighterid', 'fighter_slug', 'eventid']
fightidmap = fightidmap.groupby(['fighterid', 'fighter_slug']).sum().reset_index().drop(columns='eventid')

In [71]:
df = df.merge(fightidmap, how='left', on='fighter_slug')

In [73]:
df.head()

Unnamed: 0,fighter_slug,reach,weight,height,fighterid
0,dannyabbadi,,185,71,
1,tankabbott,,250,72,
2,shamilabdurakhimov,76.0,235,75,2424.0
3,daichiabe,71.0,170,71,2964.0
4,papyabedi,74.0,185,71,1878.0


## Export Cleansed Fighter Page Info

In [74]:
df.to_csv('../../02_Data/02_Processed_Data/fighter_page_info.csv')