# 07 Create Training Stub

 - The training stub is the initial part of the dataframe that I will use to join all the other training data.  Each row represents a specific fight and the columns are the keys used to join in additional data, such as EventID - to grab additional event information, FightID - as a unique key for each fight, F1_FighterID and F2_FighterID - to grab fighter specific information
 - In this notebook, I also create the fighter "slug".  This acts as a key to join data scrapped from the UFC webpages as opposed to the data from the FightMetrics Jsons
 - I will use the data from the V1 Jsons as the "base" as it it contains the EventID, FightID, IDs for both fighters as well as the outcome for the fight


## Imports

In [14]:
import pandas as pd
import numpy as np
import re

## Import V1_Fight_Fighters_FighterInfo and convert columns to lowercase

In [15]:
V1_df = pd.read_csv('../../02_Data/02_Processed_Data/V1_DF_w_flipped.csv', index_col=0)
V1_df = V1_df.reset_index().drop(columns='index')
V1_df.columns = [col.lower() for col in V1_df.columns]

## Create train dataframe from V1_df

In [16]:
train_columns = ['eventid','fightid','f1_fullname','f2_fullname','f1_fighterid','f2_fighterid','f1_outcome']
train = V1_df[train_columns].reset_index().drop(columns='index')


## Create f1 and f2 slug columns to act as keys for joining the fighter info scraped from UFC website

#### I was not able to find a direct link between the FighterIDs and the fighter slugs, so I converted the names to slugs and then created a map to fix the issues

In [17]:
slug_map = {'kamaruusman':'kamarudeenusman', 
            'marcopoloreyes':'poloreyes',   
            'marcioalexandrejr':'marcioalexandre',   
            'claudiosilva':'claudiohenriquedasilva',   
            'philhawes':'philliphawes',   
            'rampagejackson':'quintonjackson',   
            'nicomusoke':'nicholasmusoke',   
            'paddyholohan':'patrickholohan',               
            'migueltorres':'miguelangeltorres',   
            'songkenan':'kenansong',   
            'wuyanan':'yananwu',   
            'williampatolino':'williammacario',   
            'zhanglipeng':'lipengzhang',               
            'jacaresouza':'ronaldosouza',   
            'antoniorodrigonogueira':'minotauronogueira',   
            'josemaria':'josemariatome',     
            'alexeyoleinik':'oleksiyoliynyk',   
            'timjohnson':'timothyjohnson',               
            'rogerionogueira':'antoniorogerionogueira',   
            'dmitriismoliakov':'dmitrysmolyakov',  
            'humbertobrownmorrison':'humbertobrown',   
            'joshuasampo':'joshsampo',   
            'alekseioleinik':'oleksiyoliynyk',   
            'bradleyscott':'bradscott',               
            'juanmanuelpuig':'juanmanuelpuigcarreon',   
            'ruonpotts':'ruanpotts', 
            'wendelloliveiramarques':'wendelloliveira',   
            'rodolforubioperez':'rodolforubio',   
            'michaloleksiejczuk':'michaeloleksiejczuk',   
            'anthonywaldburger':'tjwaldburgerfighter',   
            'antoniocarlosjr':'antoniocarlos',               
            'wagnersilva':'wagnersilvagomes',   
            'richardsonmorreira':'richardsonmoreira',  
            'joegigliotti':'josephgigliotti',   
            'deivesonfigueiredo':'deivesonalcantara',   
            'carlsjohndethomas':'carlsjohndetomas',   
            'alexandervolkanovski':'alexvolkanovski',   
            'ningguangyou':'guangyouning',               
            'joshuaburkman':'joshburkman',   
            'pauloborrachinha':'paulohenriquecosta',              
            'renatocarneiro':'renatomoicano',   
            'daveygrant':'davidgrant',   
            'peggymorgan':'margaretmorgan',   
            'mikerodriguez':'michaelrodriguez',               
            'henriquedasilva':'luizhenriquedasilva',   
            'rafaelfeijao':'rafaelcavalcante',   
            'landovannata':'landonvannata',   
            'freddyserrano':'fredyserrano',   
            'davidmitchell':'davidmitchellfighter',   
            'marcioalexandrejunior':'marcioalexandre',               
            'garrethmclellen':'garrethmclellan',   
            'manvelgamburyan':'mannygamburyan',
            'mairbecktaisumov':'mairbektaisumov',
            'jussierdasilva':'jussierformiga',
            'ramirohernandez':'ramirohernandezjr',
            'tiagodossantosesilva':'',   
            'ovincesaintpreux':'ovincestpreux',
            'davidbranch':'davebranch',
            'heatherjoclark':'heatherclark',
            'marcosrogeriodelima':'marcusrogeriodelima',
            'danielhooker':'danhooker',            
            'alextorres':'alexandertorres',
            'luisgomez':'luisraulgomez',
            'antoniocarlosjunior':'antoniocarlos',
            'leonardoaugustoleleco':'leonardoaugustoguimaraes',
            'yanxiaonan':'xiaonanyan',
            'ulkasasaki':'yutasasaki',
            'huyaozong':'yaozonghu',
            'jimyhettes':'jimhettes',
            'robertwhiteford':'robwhiteford',
            'costasphilippou':'constantinosphilippou',
            'timothyelliott':'timelliott',
            'tatianasuarez':'tatianasaurez',
            'jingliangli':'lijingliang',
            'jimmywallhead':'jimwallhead',
            'emilmeek':'emilweber',
            'khalilrountreejr':'khalilrountree',            
            'jackmarshman':'jackharshman',
            'paulocosta':'paulohenriquecosta',
            'joeduffy':'josephduffy',
            'michelquinones':'michaelquinones',            
            'alessandroricci':'alesandroricci',
            'joshstansbury':'joshuastansbury',
            'robertmcdaniel':'bubbamcdaniel',
            'saparbegsafarov':'saparbeksafarov',
            'danielkelly':'dankelly',
            'diegoferreira':'carlosdiegoferreira',
            'godofredopepey':'godofredocastro',
            'songyadong':'yadongsong',
            'edimilsonsouza':'kevinsouza'}

In [18]:
def convert_name_to_slug(name):
    name = re.sub(' ','', name)
    name = re.sub('\.','', name)
    name = re.sub("'","", name)
    slug = re.sub('-','', name).lower()
    return slug

train['f1_slug'] = train.f1_fullname.map(convert_name_to_slug)
train['f1_slug'].replace(slug_map, inplace=True)
train['f2_slug'] = train.f2_fullname.map(convert_name_to_slug)
train['f2_slug'].replace(slug_map, inplace=True)

## There are 106 missing outcomes

In [19]:
print(train[train.f1_outcome.isnull()].shape)
train[train.f1_outcome.isnull()].head(3)

(106, 9)


Unnamed: 0,eventid,fightid,f1_fullname,f2_fullname,f1_fighterid,f2_fighterid,f1_outcome,f1_slug,f2_slug
0,644,4512,Antonio Silva,Mark Hunt,1431,735,,antoniosilva,markhunt
17,701,5158,Levan Makashvili,Nik Lentz,2430,1137,,levanmakashvili,niklentz
51,740,5759,Teruto Ishihara,Mizuto Hirota,2655,1177,,terutoishihara,mizutohirota


## Fill in the missing outcomes after aggregating data

In [20]:
def fill_in_outcome(df, fight_id, newval):
    index = df[df.fightid == fight_id].index[0]
    df.at[index,'f1_outcome'] = newval
    return df

In [21]:
#train = fill_in_outcome(train, 4512, "Win")

## Export Train_stub for use later

In [22]:
train.to_csv('../../02_Data/02_Processed_Data/train_stub.csv')