# Web Scraping peleadores y estadisticas

## Import libraries

In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup as BS
import requests
import json
import time
import unicodedata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn
import string

pd.options.display.max_rows = None

### pruebas

In [2]:
df = pd.read_html('https://es.wikipedia.org/wiki/Anexo:Eventos_de_UFC')

In [3]:
eventos = df[0].copy()

In [4]:
eventos.shape

(599, 6)

In [5]:
eventos.tail(20)

Unnamed: 0,#,Evento,Fecha,Sede,Localización,Fuente
579,571,UFC on ESPN: Sandhagen vs. Dillashaw,24 de julio de 2021,UFC Apex,"Las Vegas, Nevada",
580,572,UFC on ESPN: Hall vs. Strickland,31 de julio de 2021,UFC Apex,"Las Vegas, Nevada",
581,573,UFC 265: Lewis vs. Gane,7 de agosto de 2021,Toyota Center,"Houston, Texas",
582,574,UFC on ESPN: Cannonier vs. Gastelum,21 de agosto de 2021,UFC Apex,"Las Vegas, Nevada",
583,575,UFC on ESPN: Barboza vs. Chikadze,28 de agosto de 2021,UFC Apex,"Las Vegas, Nevada",
584,576,UFC Fight Night: Brunson vs. Till,4 de septiembre de 2021,UFC Apex,"Las Vegas, Nevada",
585,577,UFC Fight Night: Smith vs. Spann,18 de septiembre de 2021,UFC Apex,"Las Vegas, Nevada",
586,578,UFC 266: Volkanovski vs. Ortega,25 de septiembre de 2021,T-Mobile Arena,"Paradise, Nevada",
587,579,UFC Fight Night: Santos vs. Walker,2 de octubre de 2021,UFC Apex,"Las Vegas, Nevada",
588,580,UFC Fight Night: Dern vs. Rodriguez,9 de octubre de 2021,UFC Apex,"Las Vegas, Nevada",


In [6]:
eventos.tail()

Unnamed: 0,#,Evento,Fecha,Sede,Localización,Fuente
594,586,UFC Fight Night: Vieira vs. Tate,20 de noviembre de 2021,UFC Apex,"Las Vegas, Nevada",
595,587,UFC on ESPN: Font vs. Aldo,4 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",
596,588,UFC 269: Oliveira vs. Poirier,11 de diciembre de 2021,T-Mobile Arena,"Las Vegas, Nevada",
597,589,UFC Fight Night: Lewis vs. Daukaus,18 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",
598,590,UFC on ESPN: Kattar vs. Chikadze,15 de enero de 2022,UFC Apex,"Las Vegas, Nevada",


In [7]:
wiki_baselink = 'https://es.wikipedia.org'
response = requests.get('https://es.wikipedia.org/wiki/Anexo:Eventos_de_UFC')
soup = BS(response.text)
scrap_table = soup.find('tbody')
rows = scrap_table.find_all('tr')[1:]
wikilink_event = []
for row in rows:
    event_link = row.find_all('a')[0]['href']
    complete_link = wiki_baselink + event_link
    wikilink_event.append(complete_link)

In [8]:
eventos['link_wiki'] = pd.Series(wikilink_event)

In [9]:
eventos['Fuente'] = eventos['Fuente'].fillna('-')
NO_eventos = eventos[eventos['Fuente'].str.contains('Cancelado')].index
eventos.drop(NO_eventos, inplace = True)
eventos.shape

(590, 7)

In [10]:
eventos.drop(['#', 'Fuente'], axis = 1, inplace = True)
eventos.reset_index(drop=True, inplace = True)

In [11]:
eventos.tail()

Unnamed: 0,Evento,Fecha,Sede,Localización,link_wiki
585,UFC Fight Night: Vieira vs. Tate,20 de noviembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...
586,UFC on ESPN: Font vs. Aldo,4 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Fon...
587,UFC 269: Oliveira vs. Poirier,11 de diciembre de 2021,T-Mobile Arena,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_269
588,UFC Fight Night: Lewis vs. Daukaus,18 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...
589,UFC on ESPN: Kattar vs. Chikadze,15 de enero de 2022,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Kat...


In [12]:
eventos.shape

(590, 5)

## Recaudacion eventos scraping

In [13]:
ingresos = []
for link in eventos['link_wiki']:
    response = requests.get(link)
    soup = BS(response.text)
    scrap_table = soup.find('tbody')
    rows = scrap_table.find_all('tr')
    elementos = [row.text.split('\n') for row in rows]
    for elemento in elementos:
        if 'Recaudación' in elemento:
            ingresos.append(unicodedata.normalize("NFKD", elemento[1]))
            break
        if elemento == elementos[-1]:
            ingresos.append('Not Found')
            
len(ingresos) 

590

In [14]:
eventos['ingresos_totales'] = pd.Series(ingresos)
eventos.tail()

Unnamed: 0,Evento,Fecha,Sede,Localización,link_wiki,ingresos_totales
585,UFC Fight Night: Vieira vs. Tate,20 de noviembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...,Not Found
586,UFC on ESPN: Font vs. Aldo,4 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Fon...,Not Found
587,UFC 269: Oliveira vs. Poirier,11 de diciembre de 2021,T-Mobile Arena,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_269,"$8,000,000[2]​"
588,UFC Fight Night: Lewis vs. Daukaus,18 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...,Not Found
589,UFC on ESPN: Kattar vs. Chikadze,15 de enero de 2022,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Kat...,Not Found


### Formatear ingresos

In [15]:
def format_ingresos(ingreso_str):
    try:
        formated_ingreso = ingreso_str.split('[')[0].replace('$', '').replace(',', '')
    except:
        formated_ingreso = ingreso_str
    return formated_ingreso

In [16]:
eventos['ingresos_dollar'] = eventos['ingresos_totales'].apply(format_ingresos)
eventos.tail()

Unnamed: 0,Evento,Fecha,Sede,Localización,link_wiki,ingresos_totales,ingresos_dollar
585,UFC Fight Night: Vieira vs. Tate,20 de noviembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...,Not Found,Not Found
586,UFC on ESPN: Font vs. Aldo,4 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Fon...,Not Found,Not Found
587,UFC 269: Oliveira vs. Poirier,11 de diciembre de 2021,T-Mobile Arena,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_269,"$8,000,000[2]​",8000000
588,UFC Fight Night: Lewis vs. Daukaus,18 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...,Not Found,Not Found
589,UFC on ESPN: Kattar vs. Chikadze,15 de enero de 2022,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Kat...,Not Found,Not Found


In [17]:
notfound_eventos = eventos[eventos['ingresos_totales'] == 'Not Found']
notfound_eventos


Unnamed: 0,Evento,Fecha,Sede,Localización,link_wiki,ingresos_totales,ingresos_dollar
14,UFC 13: The Ultimate Force,30 de mayo de 1997,Augusta Civic Center,"Augusta, Georgia, USA",https://es.wikipedia.org/wiki/UFC_13,Not Found,Not Found
15,UFC 14: Showdown,27 de julio de 1997,Boutwell Auditorium,"Birmingham, Alabama, USA",https://es.wikipedia.org/wiki/UFC_14,Not Found,Not Found
16,UFC 15: Collision Course,17 de octubre de 1997,Casino Magic Bay St. Louis,"Bay St. Louis, Mississippi, USA",https://es.wikipedia.org/wiki/UFC_15,Not Found,Not Found
17,UFC Ultimate Japan,21 de diciembre de 1997,Yokohama Arena,"Yokohama, Japón",https://es.wikipedia.org/wiki/UFC_Japan,Not Found,Not Found
18,UFC 16: Battle in the Bayou,13 de marzo de 1998,Pontchartrain Center,"Nueva Orleans, Lousiana, USA",https://es.wikipedia.org/wiki/UFC_16,Not Found,Not Found
19,UFC 17: Redemption,15 de mayo de 1998,Mobile Civic Center,"Mobile, Alabama, USA",https://es.wikipedia.org/wiki/UFC_17,Not Found,Not Found
20,UFC Ultimate Brazil,16 de octubre de 1998,Ginásio da Portuguesa,"São Paulo, Brasil",https://es.wikipedia.org/wiki/UFC_Brazil,Not Found,Not Found
21,UFC 18: The Road to the Heavyweight Title,8 de enero de 1999,Pontchartrain Center,"Nueva Orleans, Lousiana, USA",https://es.wikipedia.org/wiki/UFC_18,Not Found,Not Found
22,UFC 19: Ultimate Young Guns,5 de marzo de 1999,Casino Magic Bay St. Louis,"Bay St. Louis, Mississippi, USA",https://es.wikipedia.org/wiki/UFC_19,Not Found,Not Found
23,UFC 20: Battle for the Gold,7 de mayo de 1999,Boutwell Auditorium,"Birmingham, Alabama, USA",https://es.wikipedia.org/wiki/UFC_20,Not Found,Not Found


### Scraping Fighters


In [18]:
abecedario = string.ascii_lowercase
fighters_df_list = []
for letra in abecedario:
    fighter_letra_link = f'http://ufcstats.com/statistics/fighters?char={letra}&page=all'
    df_fighters = pd.read_html(fighter_letra_link)[0]
    df_fighters.dropna(how = 'all', axis = 0, inplace = True)
    df_fighters.reset_index(drop=True, inplace = True)

    response = requests.get(fighter_letra_link)
    soup = BS(response.text)
    enlaces = []
    for tr in soup.find_all('tr')[2:]:
        enlace = tr.find('a')['href']
        enlaces.append(enlace)
    df_fighters['enlaces'] = pd.Series(enlaces)

    fighters_df_list.append(df_fighters)
    

In [19]:
peleadores = pd.concat(fighters_df_list)
peleadores.shape

(3875, 12)

In [20]:
peleadores.head()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt,enlaces
0,Tom,Aaron,,--,155 lbs.,--,,5.0,3.0,0.0,,http://ufcstats.com/fighter-details/93fe7332d1...
1,Danny,Abbadi,The Assassin,"5' 11""",155 lbs.,--,Orthodox,4.0,6.0,0.0,,http://ufcstats.com/fighter-details/15df64c02b...
2,Nariman,Abbasov,,"5' 8""",155 lbs.,--,,27.0,3.0,0.0,,http://ufcstats.com/fighter-details/59a9d6dac6...
3,David,Abbott,Tank,"6' 0""",265 lbs.,--,Switch,10.0,15.0,0.0,,http://ufcstats.com/fighter-details/b361180739...
4,Hamdy,Abdelwahab,The Hammer,"6' 2""",264 lbs.,"72.0""",Southpaw,6.0,0.0,0.0,,http://ufcstats.com/fighter-details/3329d692ae...


In [57]:
stats = []
for link in peleadores['enlaces']:
    response = requests.get(link)
    soup = BS(response.text)
    stats_peleador = {}
    for li in soup.find_all('li')[3:17]:
        lista = li.text.replace('\n', ' ').strip().split(':')
        lista = [elemento.strip() for elemento in lista]
        if len(lista[0]) != 0:
            stats_peleador[lista[0]] = lista[1]
            
    stats.append(stats_peleador)        
df_stats = pd.DataFrame(stats)

In [64]:
df_stats.tail()

Unnamed: 0,Height,Weight,Reach,STANCE,DOB,SLpM,Str. Acc.,SApM,Str. Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.
3870,--,170 lbs.,--,,"Mar 05, 1980",0.0,0%,0.0,0%,0.0,0%,0%,0.0
3871,--,145 lbs.,--,,--,0.0,0%,0.0,0%,0.0,0%,0%,0.0
3872,"5' 9""",185 lbs.,--,,--,7.64,38%,5.45,37%,0.0,0%,100%,0.0
3873,"5' 7""",155 lbs.,"70""",Orthodox,"Apr 04, 1992",3.93,52%,1.8,61%,0.0,0%,57%,1.0
3874,"6' 2""",205 lbs.,"74""",,"Jun 26, 1982",3.34,48%,4.87,39%,1.31,30%,50%,0.0


In [62]:
df_stats.shape

(3875, 13)

In [68]:
peleadores.tail(100)

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt,enlaces
107,Karolina,Wojcik,Polish Assassin,--,115 lbs.,--,,8.0,2.0,0.0,,http://ufcstats.com/fighter-details/b99b362083...
108,Danyelle,Wolf,,"5' 11""",145 lbs.,"70.0""",Orthodox,1.0,0.0,0.0,,http://ufcstats.com/fighter-details/a7e3f02fe2...
109,Brandon,Wolff,,"5' 9""",170 lbs.,--,Orthodox,7.0,5.0,0.0,,http://ufcstats.com/fighter-details/aa79d53995...
110,Xue Do,Won,,"5' 7""",165 lbs.,--,,0.0,1.0,0.0,,http://ufcstats.com/fighter-details/daa89f01e1...
111,Joanne,Wood,JoJo,"5' 6""",125 lbs.,"65.0""",Orthodox,15.0,8.0,0.0,,http://ufcstats.com/fighter-details/12f91bfa8f...
112,Nathaniel,Wood,The Prospect,"5' 6""",145 lbs.,"69.0""",Orthodox,18.0,5.0,0.0,,http://ufcstats.com/fighter-details/329e403448...
113,Tyron,Woodley,The Chosen One,"5' 9""",170 lbs.,"74.0""",Orthodox,19.0,7.0,1.0,,http://ufcstats.com/fighter-details/effd9de993...
114,Salvador,Woods,,"5' 11""",170 lbs.,--,,8.0,17.0,0.0,,http://ufcstats.com/fighter-details/8de471543d...
115,Sean,Woodson,The Sniper,"6' 2""",145 lbs.,"78.0""",Orthodox,9.0,1.0,0.0,,http://ufcstats.com/fighter-details/4682bc59d5...
116,Cal,Worsham,,"5' 11""",230 lbs.,--,Orthodox,13.0,10.0,0.0,,http://ufcstats.com/fighter-details/de3ed2e152...


In [69]:
peleadores.reset_index(drop=True, inplace=True)

In [70]:
peleadores.tail()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt,enlaces
3870,Dave,Zitanick,,--,170 lbs.,--,,5.0,7.0,0.0,,http://ufcstats.com/fighter-details/be124bdd60...
3871,Alex,Zuniga,,--,145 lbs.,--,,6.0,3.0,0.0,,http://ufcstats.com/fighter-details/02d808afb9...
3872,George,Zuniga,,"5' 9""",185 lbs.,--,,3.0,1.0,0.0,,http://ufcstats.com/fighter-details/1291dd6b8a...
3873,Allan,Zuniga,Tigre,"5' 7""",155 lbs.,"70.0""",Orthodox,13.0,1.0,0.0,,http://ufcstats.com/fighter-details/523af801b3...
3874,Virgil,Zwicker,RezDog,"6' 2""",205 lbs.,"74.0""",,15.0,6.0,1.0,,http://ufcstats.com/fighter-details/0c277f3ff6...


In [71]:
fighter_stats_df = pd.concat([peleadores, df_stats], axis = 1)

In [72]:
fighter_stats_df.to_csv('fighter_stats.csv')
eventos.to_csv('eventos_UFC.csv')

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,...,STANCE,DOB,SLpM,Str. Acc.,SApM,Str. Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.
0,Tom,Aaron,,--,155 lbs.,--,,5.0,3.0,0.0,...,,"Jul 13, 1978",0.0,0%,0.0,0%,0.0,0%,0%,0.0
1,Danny,Abbadi,The Assassin,"5' 11""",155 lbs.,--,Orthodox,4.0,6.0,0.0,...,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.0,0%,77%,0.0
2,Nariman,Abbasov,,"5' 8""",155 lbs.,--,,27.0,3.0,0.0,...,,"Feb 01, 1994",0.0,0%,0.0,0%,0.0,0%,0%,0.0
3,David,Abbott,Tank,"6' 0""",265 lbs.,--,Switch,10.0,15.0,0.0,...,Switch,--,1.35,30%,3.55,38%,1.07,33%,66%,0.0
4,Hamdy,Abdelwahab,The Hammer,"6' 2""",264 lbs.,"72.0""",Southpaw,6.0,0.0,0.0,...,Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.0,75%,0%,0.0


### Scraping bouts


In [77]:
bouts_link = 'http://ufcstats.com/statistics/events/completed?page=all'
events_v2 = pd.read_html(bouts_link)[0]
events_v2.head()

Unnamed: 0,Name/date,Location
0,,
1,"UFC Fight Night: Vera vs. Cruz August 13, 2022","San Diego, California, USA"
2,"UFC Fight Night: Santos vs. Hill August 06, 2022","Las Vegas, Nevada, USA"
3,"UFC 277: Pena vs. Nunes 2 July 30, 2022","Dallas, Texas, USA"
4,UFC Fight Night: Blaydes vs. Aspinall July 23...,"London, England, United Kingdom"


In [78]:
events_v2.drop([0,1], inplace=True)
events_v2.reset_index(drop=True, inplace=True)

In [79]:
events_v2.shape

(615, 2)

In [80]:
events_v2.head()

Unnamed: 0,Name/date,Location
0,"UFC Fight Night: Santos vs. Hill August 06, 2022","Las Vegas, Nevada, USA"
1,"UFC 277: Pena vs. Nunes 2 July 30, 2022","Dallas, Texas, USA"
2,UFC Fight Night: Blaydes vs. Aspinall July 23...,"London, England, United Kingdom"
3,UFC Fight Night: Ortega vs. Rodriguez July 16...,"Elmont, New York, USA"
4,UFC Fight Night: Dos Anjos vs. Fiziev July 09...,"Las Vegas, Nevada, USA"


In [81]:
response = requests.get(bouts_link)
soup = BS(response.text)
scrap_table = soup.find('tbody')
rows = scrap_table.find_all('a')
event_link = []
for row in rows[1:]:
    event = row['href']
    event_link.append(event)

In [82]:
events_v2['link']= pd.Series(event_link)

In [83]:
events_v2.tail()

Unnamed: 0,Name/date,Location,link
610,"UFC 6: Clash of the Titans July 14, 1995","Casper, Wyoming, USA",http://ufcstats.com/event-details/1c3f5e85b59e...
611,"UFC 5: The Return of the Beast April 07, 1995","Charlotte, North Carolina, USA",http://ufcstats.com/event-details/dedc3bb440d0...
612,"UFC 4: Revenge of the Warriors December 16, 1994","Tulsa, Oklahoma, USA",http://ufcstats.com/event-details/b60391da771d...
613,"UFC 3: The American Dream September 09, 1994","Charlotte, North Carolina, USA",http://ufcstats.com/event-details/1a49e0670dfa...
614,"UFC 2: No Way Out March 11, 1994","Denver, Colorado, USA",http://ufcstats.com/event-details/a6a9ab5a824e...


In [84]:
peleas_por_evento = []
for link in events_v2['link']:
    response = requests.get(link)
    soup = BS(response.text)
    links_peleas = pd.Series([tr['data-link'] for tr in soup.find_all('tr')[1:]], name = 'link')
    fights = pd.read_html(link)[0]
    fights_with_link = pd.concat([fights, links_peleas], axis = 1)
    peleas_por_evento.append(fights_with_link)
bouts = pd.concat(peleas_por_evento, axis=0)

In [85]:
bouts.reset_index(drop=True, inplace=True)

In [86]:
bouts.shape

(6754, 11)

In [87]:
bouts.head()

Unnamed: 0,W/L,Fighter,Kd,Str,Td,Sub,Weight class,Method,Round,Time,link
0,win,Jamahal Hill Thiago Santos,0 0,89 53,0 6,0 0,Light Heavyweight,KO/TKO Elbows,4,2:31,http://ufcstats.com/fight-details/a8bc6e005077...
1,win,Geoff Neal Vicente Luque,2 0,121 97,1 0,0 0,Welterweight,KO/TKO Punches,3,2:01,http://ufcstats.com/fight-details/a6b328733d61...
2,win,Mohammed Usman Zac Pauga,1 0,12 32,0 0,0 0,Heavyweight,KO/TKO Punch,2,0:36,http://ufcstats.com/fight-details/36f38c2534ef...
3,win,Juliana Miller Brogan Walker,0 0,54 30,4 0,1 0,Women's Flyweight,KO/TKO Elbows,3,3:57,http://ufcstats.com/fight-details/2b11aa4b90b1...
4,win,Serghei Spivac Augusto Sakai,0 0,33 8,6 0,1 0,Heavyweight,KO/TKO Punches,2,3:42,http://ufcstats.com/fight-details/044d210d7b1e...


In [88]:
bouts.to_csv('bouts_UFC.csv', index=False)

In [92]:
bouts.head()

Unnamed: 0,W/L,Fighter,Kd,Str,Td,Sub,Weight class,Method,Round,Time,link
0,win,Jamahal Hill Thiago Santos,0 0,89 53,0 6,0 0,Light Heavyweight,KO/TKO Elbows,4,2:31,http://ufcstats.com/fight-details/a8bc6e005077...
1,win,Geoff Neal Vicente Luque,2 0,121 97,1 0,0 0,Welterweight,KO/TKO Punches,3,2:01,http://ufcstats.com/fight-details/a6b328733d61...
2,win,Mohammed Usman Zac Pauga,1 0,12 32,0 0,0 0,Heavyweight,KO/TKO Punch,2,0:36,http://ufcstats.com/fight-details/36f38c2534ef...
3,win,Juliana Miller Brogan Walker,0 0,54 30,4 0,1 0,Women's Flyweight,KO/TKO Elbows,3,3:57,http://ufcstats.com/fight-details/2b11aa4b90b1...
4,win,Serghei Spivac Augusto Sakai,0 0,33 8,6 0,1 0,Heavyweight,KO/TKO Punches,2,3:42,http://ufcstats.com/fight-details/044d210d7b1e...


## Dividir celdas peleadores

In [89]:
cols_dividir = ['Fighter', 'Kd', 'Str', 'Td', 'Sub']

In [90]:
def winner(fighters):
    separed = fighters.split('  ')
    winner = separed[0]
    return winner

In [94]:
chunk.head()

Unnamed: 0_level_0,Fighter,Kd,Str,Td,Sub,Weight class,Method,Round,Time,link
W/L,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
win,Jamahal Hill Thiago Santos,0 0,89 53,0 6,0 0,Light Heavyweight,KO/TKO Elbows,4,2:31,http://ufcstats.com/fight-details/a8bc6e005077...
win,Geoff Neal Vicente Luque,2 0,121 97,1 0,0 0,Welterweight,KO/TKO Punches,3,2:01,http://ufcstats.com/fight-details/a6b328733d61...
win,Mohammed Usman Zac Pauga,1 0,12 32,0 0,0 0,Heavyweight,KO/TKO Punch,2,0:36,http://ufcstats.com/fight-details/36f38c2534ef...
win,Juliana Miller Brogan Walker,0 0,54 30,4 0,1 0,Women's Flyweight,KO/TKO Elbows,3,3:57,http://ufcstats.com/fight-details/2b11aa4b90b1...
win,Serghei Spivac Augusto Sakai,0 0,33 8,6 0,1 0,Heavyweight,KO/TKO Punches,2,3:42,http://ufcstats.com/fight-details/044d210d7b1e...


In [95]:
bouts_chunks = pd.read_csv('./bouts_UFC.csv', chunksize = 20)
bouts_dfs = []
for chunk in bouts_chunks:
    chunk.drop(['W/L'], axis = 1, inplace = True)
    chunk['winner'] = chunk['Fighter'].apply(winner)
    rand0 = np.random.choice([0,1])
    rand1 = 0 if rand0 == 1 else 1
    for col in cols_dividir:
        new = chunk[col].str.split('  ', expand = True)
        col0 = str(col)+ '_0'
        col1 = str(col)+ '_1'
        chunk[col0] = new[rand0].copy()
        chunk[col1] = new[rand1].copy()
        chunk.drop(col, axis = 1, inplace = True)
    bouts_dfs.append(chunk)

bouts = pd.concat(bouts_dfs, ignore_index = True)

In [99]:
bouts.head()

Unnamed: 0,Weight class,Method,Round,Time,link,winner,Fighter_0,Fighter_1,Kd_0,Kd_1,Str_0,Str_1,Td_0,Td_1,Sub_0,Sub_1
0,Light Heavyweight,KO/TKO Elbows,4,2:31,http://ufcstats.com/fight-details/a8bc6e005077...,Jamahal Hill,Thiago Santos,Jamahal Hill,0,0,53,89,6,0,0,0
1,Welterweight,KO/TKO Punches,3,2:01,http://ufcstats.com/fight-details/a6b328733d61...,Geoff Neal,Vicente Luque,Geoff Neal,0,2,97,121,0,1,0,0
2,Heavyweight,KO/TKO Punch,2,0:36,http://ufcstats.com/fight-details/36f38c2534ef...,Mohammed Usman,Zac Pauga,Mohammed Usman,0,1,32,12,0,0,0,0
3,Women's Flyweight,KO/TKO Elbows,3,3:57,http://ufcstats.com/fight-details/2b11aa4b90b1...,Juliana Miller,Brogan Walker,Juliana Miller,0,0,30,54,0,4,0,1
4,Heavyweight,KO/TKO Punches,2,3:42,http://ufcstats.com/fight-details/044d210d7b1e...,Serghei Spivac,Augusto Sakai,Serghei Spivac,0,0,8,33,0,6,0,1


In [100]:
bouts.to_csv('bouts_UFC.csv', index=False)