# Web Scraping peleadores y estadisticas

## Import libraries

In [3]:
!pip install streamlit

Collecting streamlit
  Downloading streamlit-1.12.2-py2.py3-none-any.whl (9.1 MB)
[K     |████████████████████████████████| 9.1 MB 184 kB/s eta 0:00:01
Collecting pydeck>=0.1.dev5
  Downloading pydeck-0.8.0b3-py2.py3-none-any.whl (4.7 MB)
[K     |████████████████████████████████| 4.7 MB 518 kB/s eta 0:00:01
Collecting rich>=10.11.0
  Downloading rich-12.5.1-py3-none-any.whl (235 kB)
[K     |████████████████████████████████| 235 kB 324 kB/s eta 0:00:01
Collecting pympler>=0.9
  Using cached Pympler-1.0.1-py3-none-any.whl (164 kB)
Collecting tzlocal>=1.1
  Using cached tzlocal-4.2-py3-none-any.whl (19 kB)
Collecting validators>=0.2
  Using cached validators-0.20.0.tar.gz (30 kB)
Collecting pyarrow>=4.0
  Downloading pyarrow-9.0.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (35.3 MB)
[K     |████████████████████████████████| 35.3 MB 101 kB/s eta 0:00:01     |█████████▋                      | 10.6 MB 549 kB/s eta 0:00:45     |███████████████▍                | 17.0 MB 476 k

In [1]:
from selenium import webdriver
from bs4 import BeautifulSoup as BS
import requests
import json
import time
import unicodedata
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import string
import re
from datetime import datetime
from sklearn.preprocessing import OneHotEncoder
import streamlit as st

pd.options.display.max_rows = None
pd.options.display.max_columns = None

### pruebas

In [None]:
df = pd.read_html('https://es.wikipedia.org/wiki/Anexo:Eventos_de_UFC')

In [None]:
eventos = df[0].copy()

In [None]:
eventos.shape

(599, 6)

In [None]:
eventos.tail(20)

Unnamed: 0,#,Evento,Fecha,Sede,Localización,Fuente
579,571,UFC on ESPN: Sandhagen vs. Dillashaw,24 de julio de 2021,UFC Apex,"Las Vegas, Nevada",
580,572,UFC on ESPN: Hall vs. Strickland,31 de julio de 2021,UFC Apex,"Las Vegas, Nevada",
581,573,UFC 265: Lewis vs. Gane,7 de agosto de 2021,Toyota Center,"Houston, Texas",
582,574,UFC on ESPN: Cannonier vs. Gastelum,21 de agosto de 2021,UFC Apex,"Las Vegas, Nevada",
583,575,UFC on ESPN: Barboza vs. Chikadze,28 de agosto de 2021,UFC Apex,"Las Vegas, Nevada",
584,576,UFC Fight Night: Brunson vs. Till,4 de septiembre de 2021,UFC Apex,"Las Vegas, Nevada",
585,577,UFC Fight Night: Smith vs. Spann,18 de septiembre de 2021,UFC Apex,"Las Vegas, Nevada",
586,578,UFC 266: Volkanovski vs. Ortega,25 de septiembre de 2021,T-Mobile Arena,"Paradise, Nevada",
587,579,UFC Fight Night: Santos vs. Walker,2 de octubre de 2021,UFC Apex,"Las Vegas, Nevada",
588,580,UFC Fight Night: Dern vs. Rodriguez,9 de octubre de 2021,UFC Apex,"Las Vegas, Nevada",


In [None]:
eventos.tail()

Unnamed: 0,#,Evento,Fecha,Sede,Localización,Fuente
594,586,UFC Fight Night: Vieira vs. Tate,20 de noviembre de 2021,UFC Apex,"Las Vegas, Nevada",
595,587,UFC on ESPN: Font vs. Aldo,4 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",
596,588,UFC 269: Oliveira vs. Poirier,11 de diciembre de 2021,T-Mobile Arena,"Las Vegas, Nevada",
597,589,UFC Fight Night: Lewis vs. Daukaus,18 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",
598,590,UFC on ESPN: Kattar vs. Chikadze,15 de enero de 2022,UFC Apex,"Las Vegas, Nevada",


In [None]:
wiki_baselink = 'https://es.wikipedia.org'
response = requests.get('https://es.wikipedia.org/wiki/Anexo:Eventos_de_UFC')
soup = BS(response.text)
scrap_table = soup.find('tbody')
rows = scrap_table.find_all('tr')[1:]
wikilink_event = []
for row in rows:
    event_link = row.find_all('a')[0]['href']
    complete_link = wiki_baselink + event_link
    wikilink_event.append(complete_link)

In [None]:
eventos['link_wiki'] = pd.Series(wikilink_event)

In [None]:
eventos['Fuente'] = eventos['Fuente'].fillna('-')
NO_eventos = eventos[eventos['Fuente'].str.contains('Cancelado')].index
eventos.drop(NO_eventos, inplace = True)
eventos.shape

(590, 7)

In [None]:
eventos.drop(['#', 'Fuente'], axis = 1, inplace = True)
eventos.reset_index(drop=True, inplace = True)

In [None]:
eventos.tail()

Unnamed: 0,Evento,Fecha,Sede,Localización,link_wiki
585,UFC Fight Night: Vieira vs. Tate,20 de noviembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...
586,UFC on ESPN: Font vs. Aldo,4 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Fon...
587,UFC 269: Oliveira vs. Poirier,11 de diciembre de 2021,T-Mobile Arena,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_269
588,UFC Fight Night: Lewis vs. Daukaus,18 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...
589,UFC on ESPN: Kattar vs. Chikadze,15 de enero de 2022,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Kat...


In [None]:
eventos.shape

(590, 5)

## Recaudacion eventos scraping

In [None]:
ingresos = []
for link in eventos['link_wiki']:
    response = requests.get(link)
    soup = BS(response.text)
    scrap_table = soup.find('tbody')
    rows = scrap_table.find_all('tr')
    elementos = [row.text.split('\n') for row in rows]
    for elemento in elementos:
        if 'Recaudación' in elemento:
            ingresos.append(unicodedata.normalize("NFKD", elemento[1]))
            break
        if elemento == elementos[-1]:
            ingresos.append('Not Found')
            
len(ingresos) 

590

In [None]:
eventos['ingresos_totales'] = pd.Series(ingresos)
eventos.tail()

Unnamed: 0,Evento,Fecha,Sede,Localización,link_wiki,ingresos_totales
585,UFC Fight Night: Vieira vs. Tate,20 de noviembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...,Not Found
586,UFC on ESPN: Font vs. Aldo,4 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Fon...,Not Found
587,UFC 269: Oliveira vs. Poirier,11 de diciembre de 2021,T-Mobile Arena,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_269,"$8,000,000[2]​"
588,UFC Fight Night: Lewis vs. Daukaus,18 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...,Not Found
589,UFC on ESPN: Kattar vs. Chikadze,15 de enero de 2022,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Kat...,Not Found


### Formatear ingresos

In [None]:
def format_ingresos(ingreso_str):
    try:
        formated_ingreso = ingreso_str.split('[')[0].replace('$', '').replace(',', '')
    except:
        formated_ingreso = ingreso_str
    return formated_ingreso

In [None]:
eventos['ingresos_dollar'] = eventos['ingresos_totales'].apply(format_ingresos)
eventos.tail()

Unnamed: 0,Evento,Fecha,Sede,Localización,link_wiki,ingresos_totales,ingresos_dollar
585,UFC Fight Night: Vieira vs. Tate,20 de noviembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...,Not Found,Not Found
586,UFC on ESPN: Font vs. Aldo,4 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Fon...,Not Found,Not Found
587,UFC 269: Oliveira vs. Poirier,11 de diciembre de 2021,T-Mobile Arena,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_269,"$8,000,000[2]​",8000000
588,UFC Fight Night: Lewis vs. Daukaus,18 de diciembre de 2021,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_Fight_Night:...,Not Found,Not Found
589,UFC on ESPN: Kattar vs. Chikadze,15 de enero de 2022,UFC Apex,"Las Vegas, Nevada",https://es.wikipedia.org/wiki/UFC_on_ESPN:_Kat...,Not Found,Not Found


In [None]:
notfound_eventos = eventos[eventos['ingresos_totales'] == 'Not Found']
notfound_eventos


Unnamed: 0,Evento,Fecha,Sede,Localización,link_wiki,ingresos_totales,ingresos_dollar
14,UFC 13: The Ultimate Force,30 de mayo de 1997,Augusta Civic Center,"Augusta, Georgia, USA",https://es.wikipedia.org/wiki/UFC_13,Not Found,Not Found
15,UFC 14: Showdown,27 de julio de 1997,Boutwell Auditorium,"Birmingham, Alabama, USA",https://es.wikipedia.org/wiki/UFC_14,Not Found,Not Found
16,UFC 15: Collision Course,17 de octubre de 1997,Casino Magic Bay St. Louis,"Bay St. Louis, Mississippi, USA",https://es.wikipedia.org/wiki/UFC_15,Not Found,Not Found
17,UFC Ultimate Japan,21 de diciembre de 1997,Yokohama Arena,"Yokohama, Japón",https://es.wikipedia.org/wiki/UFC_Japan,Not Found,Not Found
18,UFC 16: Battle in the Bayou,13 de marzo de 1998,Pontchartrain Center,"Nueva Orleans, Lousiana, USA",https://es.wikipedia.org/wiki/UFC_16,Not Found,Not Found
19,UFC 17: Redemption,15 de mayo de 1998,Mobile Civic Center,"Mobile, Alabama, USA",https://es.wikipedia.org/wiki/UFC_17,Not Found,Not Found
20,UFC Ultimate Brazil,16 de octubre de 1998,Ginásio da Portuguesa,"São Paulo, Brasil",https://es.wikipedia.org/wiki/UFC_Brazil,Not Found,Not Found
21,UFC 18: The Road to the Heavyweight Title,8 de enero de 1999,Pontchartrain Center,"Nueva Orleans, Lousiana, USA",https://es.wikipedia.org/wiki/UFC_18,Not Found,Not Found
22,UFC 19: Ultimate Young Guns,5 de marzo de 1999,Casino Magic Bay St. Louis,"Bay St. Louis, Mississippi, USA",https://es.wikipedia.org/wiki/UFC_19,Not Found,Not Found
23,UFC 20: Battle for the Gold,7 de mayo de 1999,Boutwell Auditorium,"Birmingham, Alabama, USA",https://es.wikipedia.org/wiki/UFC_20,Not Found,Not Found


### Scraping Fighters


In [None]:
abecedario = string.ascii_lowercase
fighters_df_list = []
for letra in abecedario:
    fighter_letra_link = f'http://ufcstats.com/statistics/fighters?char={letra}&page=all'
    df_fighters = pd.read_html(fighter_letra_link)[0]
    df_fighters.dropna(how = 'all', axis = 0, inplace = True)
    df_fighters.reset_index(drop=True, inplace = True)

    response = requests.get(fighter_letra_link)
    soup = BS(response.text)
    enlaces = []
    for tr in soup.find_all('tr')[2:]:
        enlace = tr.find('a')['href']
        enlaces.append(enlace)
    df_fighters['enlaces'] = pd.Series(enlaces)

    fighters_df_list.append(df_fighters)
    

In [None]:
peleadores = pd.concat(fighters_df_list)
peleadores.shape

(3876, 12)

In [None]:
peleadores.head()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt,enlaces
0,Tom,Aaron,,--,155 lbs.,--,,5.0,3.0,0.0,,http://ufcstats.com/fighter-details/93fe7332d1...
1,Danny,Abbadi,The Assassin,"5' 11""",155 lbs.,--,Orthodox,4.0,6.0,0.0,,http://ufcstats.com/fighter-details/15df64c02b...
2,Nariman,Abbasov,,"5' 8""",155 lbs.,--,,27.0,3.0,0.0,,http://ufcstats.com/fighter-details/59a9d6dac6...
3,David,Abbott,Tank,"6' 0""",265 lbs.,--,Switch,10.0,15.0,0.0,,http://ufcstats.com/fighter-details/b361180739...
4,Hamdy,Abdelwahab,The Hammer,"6' 2""",264 lbs.,"72.0""",Southpaw,6.0,0.0,0.0,,http://ufcstats.com/fighter-details/3329d692ae...


In [None]:
stats = []
for link in peleadores['enlaces']:
    response = requests.get(link)
    soup = BS(response.text)
    stats_peleador = {}
    for li in soup.find_all('li')[3:17]:
        lista = li.text.replace('\n', ' ').strip().split(':')
        lista = [elemento.strip() for elemento in lista]
        if len(lista[0]) != 0:
            stats_peleador[lista[0]] = lista[1]
            
    stats.append(stats_peleador)        
df_stats = pd.DataFrame(stats)

In [None]:
df_stats.tail()

Unnamed: 0,Height,Weight,Reach,STANCE,DOB,SLpM,Str. Acc.,SApM,Str. Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.
3871,--,170 lbs.,--,,"Mar 05, 1980",0.0,0%,0.0,0%,0.0,0%,0%,0.0
3872,--,145 lbs.,--,,--,0.0,0%,0.0,0%,0.0,0%,0%,0.0
3873,"5' 9""",185 lbs.,--,,--,7.64,38%,5.45,37%,0.0,0%,100%,0.0
3874,"5' 7""",155 lbs.,"70""",Orthodox,"Apr 04, 1992",3.93,52%,1.8,61%,0.0,0%,57%,1.0
3875,"6' 2""",205 lbs.,"74""",,"Jun 26, 1982",3.34,48%,4.87,39%,1.31,30%,50%,0.0


In [None]:
df_stats.shape

(3876, 13)

In [None]:
peleadores.tail(100)

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt,enlaces
107,Karolina,Wojcik,Polish Assassin,"5' 2""",115 lbs.,--,Orthodox,9.0,2.0,0.0,,http://ufcstats.com/fighter-details/b99b362083...
108,Danyelle,Wolf,,"5' 11""",145 lbs.,"70.0""",Orthodox,1.0,0.0,0.0,,http://ufcstats.com/fighter-details/a7e3f02fe2...
109,Brandon,Wolff,,"5' 9""",170 lbs.,--,Orthodox,7.0,5.0,0.0,,http://ufcstats.com/fighter-details/aa79d53995...
110,Xue Do,Won,,"5' 7""",165 lbs.,--,,0.0,1.0,0.0,,http://ufcstats.com/fighter-details/daa89f01e1...
111,Joanne,Wood,JoJo,"5' 6""",125 lbs.,"65.0""",Orthodox,15.0,8.0,0.0,,http://ufcstats.com/fighter-details/12f91bfa8f...
112,Nathaniel,Wood,The Prospect,"5' 6""",145 lbs.,"69.0""",Orthodox,18.0,5.0,0.0,,http://ufcstats.com/fighter-details/329e403448...
113,Tyron,Woodley,The Chosen One,"5' 9""",170 lbs.,"74.0""",Orthodox,19.0,7.0,1.0,,http://ufcstats.com/fighter-details/effd9de993...
114,Salvador,Woods,,"5' 11""",170 lbs.,--,,8.0,17.0,0.0,,http://ufcstats.com/fighter-details/8de471543d...
115,Sean,Woodson,The Sniper,"6' 2""",145 lbs.,"78.0""",Orthodox,9.0,1.0,0.0,,http://ufcstats.com/fighter-details/4682bc59d5...
116,Cal,Worsham,,"5' 11""",230 lbs.,--,Orthodox,13.0,10.0,0.0,,http://ufcstats.com/fighter-details/de3ed2e152...


In [None]:
peleadores.reset_index(drop=True, inplace=True)

In [None]:
peleadores.tail()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt,enlaces
3871,Dave,Zitanick,,--,170 lbs.,--,,5.0,7.0,0.0,,http://ufcstats.com/fighter-details/be124bdd60...
3872,Alex,Zuniga,,--,145 lbs.,--,,6.0,3.0,0.0,,http://ufcstats.com/fighter-details/02d808afb9...
3873,George,Zuniga,,"5' 9""",185 lbs.,--,,3.0,1.0,0.0,,http://ufcstats.com/fighter-details/1291dd6b8a...
3874,Allan,Zuniga,Tigre,"5' 7""",155 lbs.,"70.0""",Orthodox,13.0,1.0,0.0,,http://ufcstats.com/fighter-details/523af801b3...
3875,Virgil,Zwicker,RezDog,"6' 2""",205 lbs.,"74.0""",,15.0,6.0,1.0,,http://ufcstats.com/fighter-details/0c277f3ff6...


In [None]:
fighter_stats_df = pd.concat([peleadores, df_stats], axis = 1)

In [None]:
fighter_stats_df.to_csv('fighter_stats.csv', index=False)
#eventos.to_csv('eventos_UFC.csv', index=False)

### Scraping bouts


In [None]:
bouts_link = 'http://ufcstats.com/statistics/events/completed?page=all'
events_v2 = pd.read_html(bouts_link)[0]
events_v2.head()

Unnamed: 0,Name/date,Location
0,,
1,"UFC Fight Night: Vera vs. Cruz August 13, 2022","San Diego, California, USA"
2,"UFC Fight Night: Santos vs. Hill August 06, 2022","Las Vegas, Nevada, USA"
3,"UFC 277: Pena vs. Nunes 2 July 30, 2022","Dallas, Texas, USA"
4,UFC Fight Night: Blaydes vs. Aspinall July 23...,"London, England, United Kingdom"


In [None]:
events_v2.drop([0,1], inplace=True)
events_v2.reset_index(drop=True, inplace=True)

In [None]:
events_v2.shape

(615, 2)

In [None]:
events_v2.head()

Unnamed: 0,Name/date,Location
0,"UFC Fight Night: Santos vs. Hill August 06, 2022","Las Vegas, Nevada, USA"
1,"UFC 277: Pena vs. Nunes 2 July 30, 2022","Dallas, Texas, USA"
2,UFC Fight Night: Blaydes vs. Aspinall July 23...,"London, England, United Kingdom"
3,UFC Fight Night: Ortega vs. Rodriguez July 16...,"Elmont, New York, USA"
4,UFC Fight Night: Dos Anjos vs. Fiziev July 09...,"Las Vegas, Nevada, USA"


In [None]:
response = requests.get(bouts_link)
soup = BS(response.text)
scrap_table = soup.find('tbody')
rows = scrap_table.find_all('a')
event_link = []
for row in rows[1:]:
    event = row['href']
    event_link.append(event)

In [None]:
events_v2['link']= pd.Series(event_link)

In [None]:
events_v2.tail()

Unnamed: 0,Name/date,Location,link
610,"UFC 6: Clash of the Titans July 14, 1995","Casper, Wyoming, USA",http://ufcstats.com/event-details/1c3f5e85b59e...
611,"UFC 5: The Return of the Beast April 07, 1995","Charlotte, North Carolina, USA",http://ufcstats.com/event-details/dedc3bb440d0...
612,"UFC 4: Revenge of the Warriors December 16, 1994","Tulsa, Oklahoma, USA",http://ufcstats.com/event-details/b60391da771d...
613,"UFC 3: The American Dream September 09, 1994","Charlotte, North Carolina, USA",http://ufcstats.com/event-details/1a49e0670dfa...
614,"UFC 2: No Way Out March 11, 1994","Denver, Colorado, USA",http://ufcstats.com/event-details/a6a9ab5a824e...


In [None]:
peleas_por_evento = []
for i in range(events_v2.shape[0]):
    response = requests.get(events_v2['link'][i])
    soup = BS(response.text)
    links_peleas = pd.Series([tr['data-link'] for tr in soup.find_all('tr')[1:]], name = 'link')
    fights = pd.read_html(events_v2['link'][i])[0]
    fecha = pd.Series(np.full(len(links_peleas), events_v2['Name/date'][i]), name = 'date_fight')
    fights_with_link = pd.concat([fights,fecha, links_peleas], axis = 1)
    peleas_por_evento.append(fights_with_link)
bouts = pd.concat(peleas_por_evento, axis=0)


In [None]:
bouts.head()

Unnamed: 0,W/L,Fighter,Kd,Str,Td,Sub,Weight class,Method,Round,Time,date_fight,link
0,win,Jamahal Hill Thiago Santos,0 0,89 53,0 6,0 0,Light Heavyweight,KO/TKO Elbows,4,2:31,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/a8bc6e005077...
1,win,Geoff Neal Vicente Luque,2 0,121 97,1 0,0 0,Welterweight,KO/TKO Punches,3,2:01,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/a6b328733d61...
2,win,Mohammed Usman Zac Pauga,1 0,12 32,0 0,0 0,Heavyweight,KO/TKO Punch,2,0:36,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/36f38c2534ef...
3,win,Juliana Miller Brogan Walker,0 0,54 30,4 0,1 0,Women's Flyweight,KO/TKO Elbows,3,3:57,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/2b11aa4b90b1...
4,win,Serghei Spivac Augusto Sakai,0 0,33 8,6 0,1 0,Heavyweight,KO/TKO Punches,2,3:42,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/044d210d7b1e...


In [None]:
bouts.reset_index(drop=True, inplace=True)

In [None]:
bouts.tail()

Unnamed: 0,W/L,Fighter,Kd,Str,Td,Sub,Weight class,Method,Round,Time,date_fight,link
6749,win,Orlando Wiet Robert Lucarelli,0 0,8 2,0 1,0 1,Open Weight,KO/TKO,1,2:50,"UFC 2: No Way Out March 11, 1994",http://ufcstats.com/fight-details/3b020d4914b4...
6750,win,Frank Hamaker Thaddeus Luster,0 0,2 0,1 0,3 0,Open Weight,SUB Keylock,1,4:52,"UFC 2: No Way Out March 11, 1994",http://ufcstats.com/fight-details/d917c8c7461b...
6751,win,Johnny Rhodes David Levicki,0 0,11 4,1 0,0 0,Open Weight,KO/TKO Punches,1,12:13,"UFC 2: No Way Out March 11, 1994",http://ufcstats.com/fight-details/ccee020be2e8...
6752,win,Patrick Smith Ray Wizard,0 0,1 1,0 0,1 0,Open Weight,SUB Guillotine Choke,1,0:58,"UFC 2: No Way Out March 11, 1994",http://ufcstats.com/fight-details/4b9ae533ccb3...
6753,win,Scott Morris Sean Daugherty,0 0,1 0,1 0,1 0,Open Weight,SUB Guillotine Choke,1,0:20,"UFC 2: No Way Out March 11, 1994",http://ufcstats.com/fight-details/4acab67848e7...


In [None]:
bouts.to_csv('bouts_UFC.csv', index=False)

In [None]:
bouts.head()

Unnamed: 0,W/L,Fighter,Kd,Str,Td,Sub,Weight class,Method,Round,Time,date_fight,link
0,win,Jamahal Hill Thiago Santos,0 0,89 53,0 6,0 0,Light Heavyweight,KO/TKO Elbows,4,2:31,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/a8bc6e005077...
1,win,Geoff Neal Vicente Luque,2 0,121 97,1 0,0 0,Welterweight,KO/TKO Punches,3,2:01,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/a6b328733d61...
2,win,Mohammed Usman Zac Pauga,1 0,12 32,0 0,0 0,Heavyweight,KO/TKO Punch,2,0:36,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/36f38c2534ef...
3,win,Juliana Miller Brogan Walker,0 0,54 30,4 0,1 0,Women's Flyweight,KO/TKO Elbows,3,3:57,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/2b11aa4b90b1...
4,win,Serghei Spivac Augusto Sakai,0 0,33 8,6 0,1 0,Heavyweight,KO/TKO Punches,2,3:42,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/044d210d7b1e...


## Dividir celdas peleadores

In [None]:
cols_dividir = ['Fighter', 'Kd', 'Str', 'Td', 'Sub']

In [None]:
def winner(fighters):
    separed = fighters.split('  ')
    winner = separed[0]
    return winner

In [None]:
bouts_chunks = pd.read_csv('./bouts_UFC.csv', chunksize = 20)
bouts_dfs = []
for chunk in bouts_chunks:
    chunk.drop(['W/L'], axis = 1, inplace = True)
    chunk['winner'] = chunk['Fighter'].apply(winner)
    rand0 = np.random.choice([0,1])
    rand1 = 0 if rand0 == 1 else 1
    for col in cols_dividir:
        new = chunk[col].str.split('  ', expand = True)
        col0 = str(col)+ '_0'
        col1 = str(col)+ '_1'
        chunk[col0] = new[rand0].copy()
        chunk[col1] = new[rand1].copy()
        chunk.drop(col, axis = 1, inplace = True)
    bouts_dfs.append(chunk)

bouts = pd.concat(bouts_dfs, ignore_index = True)

In [None]:
bouts.head()

Unnamed: 0,Weight class,Method,Round,Time,date_fight,link,winner,Fighter_0,Fighter_1,Kd_0,Kd_1,Str_0,Str_1,Td_0,Td_1,Sub_0,Sub_1
0,Light Heavyweight,KO/TKO Elbows,4,2:31,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/a8bc6e005077...,Jamahal Hill,Jamahal Hill,Thiago Santos,0,0,89,53,0,6,0,0
1,Welterweight,KO/TKO Punches,3,2:01,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/a6b328733d61...,Geoff Neal,Geoff Neal,Vicente Luque,2,0,121,97,1,0,0,0
2,Heavyweight,KO/TKO Punch,2,0:36,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/36f38c2534ef...,Mohammed Usman,Mohammed Usman,Zac Pauga,1,0,12,32,0,0,0,0
3,Women's Flyweight,KO/TKO Elbows,3,3:57,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/2b11aa4b90b1...,Juliana Miller,Juliana Miller,Brogan Walker,0,0,54,30,4,0,1,0
4,Heavyweight,KO/TKO Punches,2,3:42,"UFC Fight Night: Santos vs. Hill August 06, 2022",http://ufcstats.com/fight-details/044d210d7b1e...,Serghei Spivac,Serghei Spivac,Augusto Sakai,0,0,33,8,6,0,1,0


In [None]:
bouts.to_csv('bouts_UFC.csv', index=False)

## Fighters csv

SLpM - Significant Strikes Landed per Minute

Str. Acc. - Significant Striking Accuracy

SApM - Significant Strikes Absorbed per Minute

Str. Def. - Significant Strike Defence (the % of opponents strikes that did not land)

TD Avg. - Average Takedowns Landed per 15 minutes

TD Acc. - Takedown Accuracy

TD Def. - Takedown Defense (the % of opponents TD attempts that did not land)

Sub. Avg. - Average Submissions Attempted per 15 minutes 

In [None]:
fighters = pd.read_csv('fighter_stats.csv')

In [None]:
fighters.head()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt,enlaces,Height,Weight,Reach.1,STANCE,DOB,SLpM,Str. Acc.,SApM,Str. Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.
0,Tom,Aaron,,--,155 lbs.,--,,5.0,3.0,0.0,,http://ufcstats.com/fighter-details/93fe7332d1...,--,155 lbs.,--,,"Jul 13, 1978",0.0,0%,0.0,0%,0.0,0%,0%,0.0
1,Danny,Abbadi,The Assassin,"5' 11""",155 lbs.,--,Orthodox,4.0,6.0,0.0,,http://ufcstats.com/fighter-details/15df64c02b...,"5' 11""",155 lbs.,--,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.0,0%,77%,0.0
2,Nariman,Abbasov,,"5' 8""",155 lbs.,--,,27.0,3.0,0.0,,http://ufcstats.com/fighter-details/59a9d6dac6...,"5' 8""",155 lbs.,--,,"Feb 01, 1994",0.0,0%,0.0,0%,0.0,0%,0%,0.0
3,David,Abbott,Tank,"6' 0""",265 lbs.,--,Switch,10.0,15.0,0.0,,http://ufcstats.com/fighter-details/b361180739...,"6' 0""",265 lbs.,--,Switch,--,1.35,30%,3.55,38%,1.07,33%,66%,0.0
4,Hamdy,Abdelwahab,The Hammer,"6' 2""",264 lbs.,"72.0""",Southpaw,6.0,0.0,0.0,,http://ufcstats.com/fighter-details/3329d692ae...,"6' 2""",264 lbs.,"72""",Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.0,75%,0%,0.0


In [None]:
fighters["Full Name"] = fighters["First"] + " " + fighters["Last"]

In [None]:
fighters.head()

Unnamed: 0,First,Last,Nickname,Ht.,Wt.,Reach,Stance,W,L,D,Belt,enlaces,Height,Weight,Reach.1,STANCE,DOB,SLpM,Str. Acc.,SApM,Str. Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,Full Name
0,Tom,Aaron,,--,155 lbs.,--,,5.0,3.0,0.0,,http://ufcstats.com/fighter-details/93fe7332d1...,--,155 lbs.,--,,"Jul 13, 1978",0.0,0%,0.0,0%,0.0,0%,0%,0.0,Tom Aaron
1,Danny,Abbadi,The Assassin,"5' 11""",155 lbs.,--,Orthodox,4.0,6.0,0.0,,http://ufcstats.com/fighter-details/15df64c02b...,"5' 11""",155 lbs.,--,Orthodox,"Jul 03, 1983",3.29,38%,4.41,57%,0.0,0%,77%,0.0,Danny Abbadi
2,Nariman,Abbasov,,"5' 8""",155 lbs.,--,,27.0,3.0,0.0,,http://ufcstats.com/fighter-details/59a9d6dac6...,"5' 8""",155 lbs.,--,,"Feb 01, 1994",0.0,0%,0.0,0%,0.0,0%,0%,0.0,Nariman Abbasov
3,David,Abbott,Tank,"6' 0""",265 lbs.,--,Switch,10.0,15.0,0.0,,http://ufcstats.com/fighter-details/b361180739...,"6' 0""",265 lbs.,--,Switch,--,1.35,30%,3.55,38%,1.07,33%,66%,0.0,David Abbott
4,Hamdy,Abdelwahab,The Hammer,"6' 2""",264 lbs.,"72.0""",Southpaw,6.0,0.0,0.0,,http://ufcstats.com/fighter-details/3329d692ae...,"6' 2""",264 lbs.,"72""",Southpaw,"Jan 22, 1993",3.87,52%,3.13,59%,3.0,75%,0%,0.0,Hamdy Abdelwahab


In [None]:
def pies_a_cm(distancia_pies):
    if len(distancia_pies)>3:
        digits = re.findall('[0-9]+', distancia_pies)
        pies, pulgadas = re.findall('[0-9]+', distancia_pies)
        cm = round(float(pies) * 30.48 + float(pulgadas) * 2.54)
    elif len(distancia_pies) == 3:
        pulgadas = re.findall('[0-9]+', distancia_pies)[0]
        cm = round(float(pulgadas) * 2.54)
    else:
        cm = np.nan
    return cm

In [None]:
fighters['Reach'] = fighters['Reach.1'].apply(pies_a_cm)

In [None]:
fighters.drop(['Reach.1', 'Ht.', 'Wt.', 'STANCE', 'Nickname'], axis = 1, inplace = True)

In [None]:
fighters['Height'] = fighters['Height'].apply(pies_a_cm)

In [None]:
fighters.head()

Unnamed: 0,Full Name,Height,Weight,Reach,DOB,Stance,W,L,D,Belt,SLpM,Str. Acc.,SApM,Str. Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,enlaces
0,Tom Aaron,,155.0,,1978-07-13,,5.0,3.0,0.0,,0.0,0,0.0,0,0.0,0,0,0.0,http://ufcstats.com/fighter-details/93fe7332d1...
1,Danny Abbadi,180.0,155.0,,1983-07-03,Orthodox,4.0,6.0,0.0,,3.29,38,4.41,57,0.0,0,77,0.0,http://ufcstats.com/fighter-details/15df64c02b...
2,Nariman Abbasov,173.0,155.0,,1994-02-01,,27.0,3.0,0.0,,0.0,0,0.0,0,0.0,0,0,0.0,http://ufcstats.com/fighter-details/59a9d6dac6...
3,David Abbott,183.0,265.0,,,Switch,10.0,15.0,0.0,,1.35,30,3.55,38,1.07,33,66,0.0,http://ufcstats.com/fighter-details/b361180739...
4,Hamdy Abdelwahab,188.0,264.0,183.0,1993-01-22,Southpaw,6.0,0.0,0.0,,3.87,52,3.13,59,3.0,75,0,0.0,http://ufcstats.com/fighter-details/3329d692ae...


In [None]:
fighters['Weight'] = fighters['Weight'].str[:3]

In [None]:
fighters = fighters[['Full Name','Height', 'Weight','Reach', 'DOB', 
         'Stance', 'W', 'L', 'D', 'Belt', 'SLpM',
         'Str. Acc.', 'SApM', 'Str. Def', 'TD Avg.',
         'TD Acc.', 'TD Def.', 'Sub. Avg.', 
         'enlaces']]

In [None]:
def delete_percentage(serie):
    return serie.str[:-1]

In [None]:
cols_per = ['Str. Acc.', 'Str. Def', 'TD Acc.', 'TD Def.']
fighters[cols_per] = fighters[cols_per].apply(delete_percentage)

In [None]:
date_parse_v1 = lambda x: np.nan if x == '--' else datetime.strptime(x, '%b %d, %Y')

In [None]:
fighters['DOB'] = fighters['DOB'].apply(date_parse_v1)

In [None]:
fighters.head()

Unnamed: 0,Full Name,Height,Weight,Reach,DOB,Stance,W,L,D,Belt,SLpM,Str. Acc.,SApM,Str. Def,TD Avg.,TD Acc.,TD Def.,Sub. Avg.,enlaces
0,Tom Aaron,,155.0,,1978-07-13,,5.0,3.0,0.0,,0.0,0,0.0,0,0.0,0,0,0.0,http://ufcstats.com/fighter-details/93fe7332d1...
1,Danny Abbadi,180.0,155.0,,1983-07-03,Orthodox,4.0,6.0,0.0,,3.29,38,4.41,57,0.0,0,77,0.0,http://ufcstats.com/fighter-details/15df64c02b...
2,Nariman Abbasov,173.0,155.0,,1994-02-01,,27.0,3.0,0.0,,0.0,0,0.0,0,0.0,0,0,0.0,http://ufcstats.com/fighter-details/59a9d6dac6...
3,David Abbott,183.0,265.0,,,Switch,10.0,15.0,0.0,,1.35,30,3.55,38,1.07,33,66,0.0,http://ufcstats.com/fighter-details/b361180739...
4,Hamdy Abdelwahab,188.0,264.0,183.0,1993-01-22,Southpaw,6.0,0.0,0.0,,3.87,52,3.13,59,3.0,75,0,0.0,http://ufcstats.com/fighter-details/3329d692ae...


In [None]:
delete_guion = lambda x: np.nan if x == '--' else x

In [None]:
fighters['Weight'] = fighters['Weight'].apply(delete_guion)

In [None]:
fighters.to_csv('fighter_stats.csv', index = False)

In [None]:
fighters = pd.read_csv('fighter_stats.csv')

In [None]:
fighters.dtypes

Full Name     object
Height       float64
Weight       float64
Reach        float64
DOB           object
Stance        object
W            float64
L            float64
D            float64
Belt         float64
SLpM         float64
Str. Acc.      int64
SApM         float64
Str. Def       int64
TD Avg.      float64
TD Acc.        int64
TD Def.        int64
Sub. Avg.    float64
enlaces       object
dtype: object

## merge fights and bouts

In [None]:
bouts = pd.read_csv('./bouts_UFC.csv')

In [None]:
bouts['date_fight'] = bouts['date_fight'].str[-8:]

In [None]:
bouts.head()

Unnamed: 0,Weight class,Method,Round,Time,date_fight,link,winner,Fighter_0,Fighter_1,Kd_0,Kd_1,Str_0,Str_1,Td_0,Td_1,Sub_0,Sub_1
0,Light Heavyweight,KO/TKO Elbows,4,2:31,"06, 2022",http://ufcstats.com/fight-details/a8bc6e005077...,Jamahal Hill,Jamahal Hill,Thiago Santos,0,0,89,53,0,6,0,0
1,Welterweight,KO/TKO Punches,3,2:01,"06, 2022",http://ufcstats.com/fight-details/a6b328733d61...,Geoff Neal,Geoff Neal,Vicente Luque,2,0,121,97,1,0,0,0
2,Heavyweight,KO/TKO Punch,2,0:36,"06, 2022",http://ufcstats.com/fight-details/36f38c2534ef...,Mohammed Usman,Mohammed Usman,Zac Pauga,1,0,12,32,0,0,0,0
3,Women's Flyweight,KO/TKO Elbows,3,3:57,"06, 2022",http://ufcstats.com/fight-details/2b11aa4b90b1...,Juliana Miller,Juliana Miller,Brogan Walker,0,0,54,30,4,0,1,0
4,Heavyweight,KO/TKO Punches,2,3:42,"06, 2022",http://ufcstats.com/fight-details/044d210d7b1e...,Serghei Spivac,Serghei Spivac,Augusto Sakai,0,0,33,8,6,0,1,0


In [None]:
bouts_min = bouts[['Fighter_0', 'Fighter_1', 'date_fight', 'winner']].copy()

In [None]:
bouts_min.head()

Unnamed: 0,Fighter_0,Fighter_1,date_fight,winner
0,Jamahal Hill,Thiago Santos,"06, 2022",Jamahal Hill
1,Geoff Neal,Vicente Luque,"06, 2022",Geoff Neal
2,Mohammed Usman,Zac Pauga,"06, 2022",Mohammed Usman
3,Juliana Miller,Brogan Walker,"06, 2022",Juliana Miller
4,Serghei Spivac,Augusto Sakai,"06, 2022",Serghei Spivac


In [None]:
bouts_min['winner'] = np.where(bouts_min['Fighter_0'] == bouts_min['winner'], 0, 1)

In [None]:
bouts_min.head(30)

Unnamed: 0,Fighter_0,Fighter_1,date_fight,winner
0,Jamahal Hill,Thiago Santos,"06, 2022",0
1,Geoff Neal,Vicente Luque,"06, 2022",0
2,Mohammed Usman,Zac Pauga,"06, 2022",0
3,Juliana Miller,Brogan Walker,"06, 2022",0
4,Serghei Spivac,Augusto Sakai,"06, 2022",0
5,Terrance McKinney,Erick Gonzalez,"06, 2022",0
6,Michal Oleksiejczuk,Sam Alvey,"06, 2022",0
7,Bryan Battle,Takashi Sato,"06, 2022",0
8,Cory McKenna,Miranda Granger,"06, 2022",0
9,Mayra Bueno Silva,Stephanie Egger,"06, 2022",0


In [None]:
bouts_fighter_stats = bouts_min.merge(fighters, how = 'inner', left_on = 'Fighter_0', right_on = 'Full Name')

In [None]:
bouts_fighter_stats = bouts_fighter_stats.merge(fighters, how = 'inner', left_on = 'Fighter_1', right_on = 'Full Name', suffixes = ['_0', '_1'])

In [None]:
bouts_fighter_stats.to_csv('./dataset_v1.csv', index = False)

In [None]:
dataset = pd.read_csv('dataset_v1.csv', parse_dates=True)

## Final dataset

In [None]:
dataset.head()

Unnamed: 0,Fighter_0,Fighter_1,date_fight,winner,Full Name_0,Height_0,Weight_0,Reach_0,DOB_0,Stance_0,W_0,L_0,D_0,Belt_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,enlaces_0,Full Name_1,Height_1,Weight_1,Reach_1,DOB_1,Stance_1,W_1,L_1,D_1,Belt_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,enlaces_1
0,Jamahal Hill,Thiago Santos,2022-01-06,0,Jamahal Hill,193.0,205.0,201.0,1991-05-19,Southpaw,11.0,1.0,0.0,,6.46,52,3.51,45,0.0,0,65,0.0,http://ufcstats.com/fighter-details/5444c5a201...,Thiago Santos,188.0,205.0,193.0,1984-01-07,Orthodox,22.0,11.0,0.0,,3.72,48,2.57,53,0.88,33,65,0.1,http://ufcstats.com/fighter-details/dea070ed4a...
1,Kevin Holland,Thiago Santos,2018-01-04,1,Kevin Holland,190.0,170.0,206.0,1992-11-05,Orthodox,23.0,7.0,0.0,,3.9,54,2.41,55,0.84,43,50,0.6,http://ufcstats.com/fighter-details/3a46b26801...,Thiago Santos,188.0,205.0,193.0,1984-01-07,Orthodox,22.0,11.0,0.0,,3.72,48,2.57,53,0.88,33,65,0.1,http://ufcstats.com/fighter-details/dea070ed4a...
2,Aleksandar Rakic,Thiago Santos,2021-01-06,0,Aleksandar Rakic,193.0,205.0,198.0,1992-02-06,Orthodox,14.0,3.0,0.0,,4.01,50,2.3,53,0.82,25,90,0.2,http://ufcstats.com/fighter-details/333b9e5c72...,Thiago Santos,188.0,205.0,193.0,1984-01-07,Orthodox,22.0,11.0,0.0,,3.72,48,2.57,53,0.88,33,65,0.1,http://ufcstats.com/fighter-details/dea070ed4a...
3,Johnny Walker,Thiago Santos,2021-01-02,1,Johnny Walker,198.0,205.0,208.0,1992-03-30,Orthodox,18.0,7.0,0.0,,3.45,58,2.78,42,0.23,100,62,0.7,http://ufcstats.com/fighter-details/c21f26bbde...,Thiago Santos,188.0,205.0,193.0,1984-01-07,Orthodox,22.0,11.0,0.0,,3.72,48,2.57,53,0.88,33,65,0.1,http://ufcstats.com/fighter-details/dea070ed4a...
4,Jan Blachowicz,Thiago Santos,2019-01-23,1,Jan Blachowicz,188.0,205.0,198.0,1983-02-24,Orthodox,29.0,9.0,0.0,,3.55,49,2.77,54,1.08,53,66,0.3,http://ufcstats.com/fighter-details/99df7d0a2a...,Thiago Santos,188.0,205.0,193.0,1984-01-07,Orthodox,22.0,11.0,0.0,,3.72,48,2.57,53,0.88,33,65,0.1,http://ufcstats.com/fighter-details/dea070ed4a...


In [None]:
date_parse_v2 = lambda x: np.nan if x == '--' else datetime.strptime(x, '%d, %Y')

In [None]:
dataset['date_fight'] = dataset['date_fight'].apply(date_parse_v2)

ValueError: time data '2022-01-06' does not match format '%d, %Y'

In [None]:
dataset['date_fight'] = dataset['date_fight'].astype('datetime64[ns]')

In [None]:
dataset['DOB_0'] = dataset['DOB_0'].astype('datetime64[ns]')

In [None]:
dataset['DOB_1'] = dataset['DOB_1'].astype('datetime64[ns]')

In [None]:
dataset.dtypes

Fighter_0              object
Fighter_1              object
date_fight             object
winner                  int64
Full Name_0            object
Height_0              float64
Weight_0              float64
Reach_0               float64
DOB_0          datetime64[ns]
Stance_0               object
W_0                   float64
L_0                   float64
D_0                   float64
Belt_0                float64
SLpM_0                float64
Str. Acc._0             int64
SApM_0                float64
Str. Def_0              int64
TD Avg._0             float64
TD Acc._0               int64
TD Def._0               int64
Sub. Avg._0           float64
enlaces_0              object
Full Name_1            object
Height_1              float64
Weight_1              float64
Reach_1               float64
DOB_1          datetime64[ns]
Stance_1               object
W_1                   float64
L_1                   float64
D_1                   float64
Belt_1                float64
SLpM_1    

In [None]:
dataset['edad_pelea_0'] = dataset['date_fight'].dt.year - dataset['DOB_0'].dt.year

In [None]:
dataset['edad_pelea_1'] = dataset['date_fight'].dt.year - dataset['DOB_1'].dt.year 

In [None]:
dataset.to_csv('./dataset_v1.csv', index=False)

### dataset v2

In [None]:
dataset = pd.read_csv('dataset_v1.csv')

In [None]:
dataset.shape

(6760, 44)

In [None]:
colums_delete = ['date_fight', 'Full Name_0', 'Full Name_1',
                 'DOB_0', 'DOB_1', 'Belt_0', 'Belt_1',
                 'enlaces_0', 'enlaces_1', 'Fighter_0',
                 'Fighter_1']

In [None]:
dataset.drop(colums_delete, axis = 1, inplace = True)

In [None]:
dataset.head()

Unnamed: 0,winner,Height_0,Weight_0,Reach_0,Stance_0,W_0,L_0,D_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,Height_1,Weight_1,Reach_1,Stance_1,W_1,L_1,D_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,edad_pelea_0,edad_pelea_1
0,0,193.0,205.0,201.0,Southpaw,11.0,1.0,0.0,6.46,52,3.51,45,0.0,0,65,0.0,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,31.0,38.0
1,1,190.0,170.0,206.0,Orthodox,23.0,7.0,0.0,3.9,54,2.41,55,0.84,43,50,0.6,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,26.0,34.0
2,0,193.0,205.0,198.0,Orthodox,14.0,3.0,0.0,4.01,50,2.3,53,0.82,25,90,0.2,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0
3,1,198.0,205.0,208.0,Orthodox,18.0,7.0,0.0,3.45,58,2.78,42,0.23,100,62,0.7,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0
4,1,188.0,205.0,198.0,Orthodox,29.0,9.0,0.0,3.55,49,2.77,54,1.08,53,66,0.3,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,36.0,35.0


In [None]:
dataset.to_csv('./dataset_v2.csv', index = False)

## Missing values

In [None]:
df = pd.read_csv('./dataset_v2.csv')

In [None]:
df.head()

Unnamed: 0,winner,Height_0,Weight_0,Reach_0,Stance_0,W_0,L_0,D_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,Height_1,Weight_1,Reach_1,Stance_1,W_1,L_1,D_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,edad_pelea_0,edad_pelea_1
0,0,193.0,205.0,201.0,Southpaw,11.0,1.0,0.0,6.46,52,3.51,45,0.0,0,65,0.0,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,31.0,38.0
1,1,190.0,170.0,206.0,Orthodox,23.0,7.0,0.0,3.9,54,2.41,55,0.84,43,50,0.6,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,26.0,34.0
2,0,193.0,205.0,198.0,Orthodox,14.0,3.0,0.0,4.01,50,2.3,53,0.82,25,90,0.2,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0
3,1,198.0,205.0,208.0,Orthodox,18.0,7.0,0.0,3.45,58,2.78,42,0.23,100,62,0.7,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0
4,1,188.0,205.0,198.0,Orthodox,29.0,9.0,0.0,3.55,49,2.77,54,1.08,53,66,0.3,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,36.0,35.0


In [None]:
df.describe()

Unnamed: 0,winner,Height_0,Weight_0,Reach_0,W_0,L_0,D_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,Height_1,Weight_1,Reach_1,W_1,L_1,D_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,edad_pelea_0,edad_pelea_1
count,6760.0,6748.0,6751.0,6120.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6743.0,6744.0,6062.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6760.0,6635.0,6613.0
mean,0.512278,178.645525,169.666272,183.12598,18.148225,7.578402,0.264941,3.242876,43.110651,3.278654,53.395858,1.561817,37.497337,57.910207,0.645902,178.822186,169.710261,183.246948,18.132544,7.583136,0.252219,3.239787,42.973964,3.262891,53.475,1.504741,37.198964,58.102367,0.633817,30.127355,30.08816
std,0.499886,8.731542,36.012173,10.566654,9.783993,4.5345,0.694758,1.371457,10.472851,1.333893,11.202257,1.303093,20.57344,22.85386,0.850719,8.763275,37.03319,10.631545,9.452893,4.64753,0.691393,1.383094,10.560245,1.377135,11.433746,1.2903,20.589735,22.825282,0.751336,4.113028,4.206979
min,0.0,152.0,115.0,147.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,152.0,115.0,147.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,20.0,18.0
25%,0.0,173.0,145.0,178.0,12.0,4.0,0.0,2.38,39.0,2.51,50.0,0.58,26.0,47.0,0.1,173.0,145.0,178.0,12.0,4.0,0.0,2.39,39.0,2.45,50.0,0.54,26.0,48.0,0.1,27.0,27.0
50%,1.0,180.0,170.0,183.0,17.0,7.0,0.0,3.18,44.0,3.1,55.0,1.3,38.0,61.0,0.4,180.0,170.0,183.0,17.0,7.0,0.0,3.2,44.0,3.1,55.0,1.22,38.0,62.0,0.4,30.0,30.0
75%,1.0,185.0,185.0,190.0,23.0,10.0,0.0,3.98,49.0,3.96,60.0,2.31,50.0,73.0,0.9,185.0,185.0,190.0,23.0,10.0,0.0,4.01,49.0,3.94,60.0,2.16,50.0,73.0,0.9,33.0,33.0
max,1.0,211.0,345.0,213.0,253.0,53.0,10.0,9.31,100.0,17.86,85.0,11.11,100.0,100.0,21.9,211.0,770.0,213.0,253.0,53.0,11.0,12.07,100.0,22.5,100.0,13.95,100.0,100.0,12.1,48.0,51.0


# Eliminar NANs

In [None]:
chunks_df = pd.read_csv('dataset_v2.csv', chunksize = 500)

In [None]:
df_list = []
for chunk in chunks_df:
    chunk.dropna(inplace = True)
    df_list.append(chunk)

df = pd.concat(df_list, ignore_index = True)

In [None]:
df.describe()

Unnamed: 0,winner,Height_0,Weight_0,Reach_0,W_0,L_0,D_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,Height_1,Weight_1,Reach_1,W_1,L_1,D_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,edad_pelea_0,edad_pelea_1
count,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0,5668.0
mean,0.525582,178.339097,166.908433,183.075159,18.453952,7.54199,0.230593,3.455517,44.366972,3.3787,55.120854,1.604388,38.569337,60.929958,0.627611,178.497883,166.773994,183.170254,18.532639,7.514291,0.214538,3.465129,44.428017,3.347068,55.253529,1.570371,38.563867,61.395201,0.627541,30.321983,30.265879
std,0.499389,8.807254,34.581257,10.659247,8.412149,4.306892,0.604322,1.25178,7.523889,1.19303,7.16675,1.260875,18.660999,19.757481,0.690254,8.812989,34.645879,10.666733,8.245066,4.356527,0.557782,1.230212,7.347024,1.17002,7.241708,1.255592,18.336722,19.347084,0.674679,4.061104,4.137532
min,0.0,152.0,115.0,147.0,0.0,0.0,0.0,0.18,8.0,0.37,21.0,0.0,0.0,0.0,0.0,152.0,115.0,147.0,0.0,0.0,0.0,0.18,12.0,0.37,12.0,0.0,0.0,0.0,0.0,20.0,19.0
25%,0.0,173.0,145.0,178.0,13.0,4.0,0.0,2.59,40.0,2.59,51.0,0.65,28.0,50.0,0.1,173.0,145.0,178.0,13.0,4.0,0.0,2.62,40.0,2.57,51.0,0.63,28.0,52.0,0.1,27.0,27.0
50%,1.0,178.0,155.0,183.0,17.0,7.0,0.0,3.33,44.0,3.19,56.0,1.35,38.0,63.0,0.5,178.0,155.0,183.0,17.0,7.0,0.0,3.35,44.0,3.18,56.0,1.32,38.0,63.0,0.5,30.0,30.0
75%,1.0,185.0,185.0,190.0,23.0,10.0,0.0,4.15,49.0,3.99,60.0,2.32,50.0,74.0,0.9,185.0,185.0,190.0,23.0,10.0,0.0,4.15,49.0,3.97,60.0,2.22,50.0,75.0,0.9,33.0,33.0
max,1.0,211.0,265.0,213.0,91.0,26.0,8.0,9.31,80.0,14.41,85.0,10.98,100.0,100.0,7.4,211.0,265.0,213.0,91.0,26.0,8.0,12.07,79.0,15.48,85.0,11.27,100.0,100.0,7.4,47.0,48.0


In [None]:
df.to_csv('./df_del_nan.csv', index = False)

### Variables categoricas

In [2]:
df = pd.read_csv('df_del_nan.csv')

In [3]:
df['Stance_0'].unique()

array(['Southpaw', 'Orthodox', 'Switch', 'Open Stance'], dtype=object)

In [4]:
dummies_stance = pd.get_dummies(df[['Stance_0', 'Stance_1']], prefix = ['St_0', 'St_1'])

In [5]:
df = pd.concat([df, dummies_stance], axis=1)

In [6]:
df.head()

Unnamed: 0,winner,Height_0,Weight_0,Reach_0,Stance_0,W_0,L_0,D_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,Height_1,Weight_1,Reach_1,Stance_1,W_1,L_1,D_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,edad_pelea_0,edad_pelea_1,St_0_Open Stance,St_0_Orthodox,St_0_Southpaw,St_0_Switch,St_1_Open Stance,St_1_Orthodox,St_1_Southpaw,St_1_Switch
0,0,193.0,205.0,201.0,Southpaw,11.0,1.0,0.0,6.46,52,3.51,45,0.0,0,65,0.0,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,31.0,38.0,0,0,1,0,0,1,0,0
1,1,190.0,170.0,206.0,Orthodox,23.0,7.0,0.0,3.9,54,2.41,55,0.84,43,50,0.6,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,26.0,34.0,0,1,0,0,0,1,0,0
2,0,193.0,205.0,198.0,Orthodox,14.0,3.0,0.0,4.01,50,2.3,53,0.82,25,90,0.2,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0,0,1,0,0,0,1,0,0
3,1,198.0,205.0,208.0,Orthodox,18.0,7.0,0.0,3.45,58,2.78,42,0.23,100,62,0.7,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0,0,1,0,0,0,1,0,0
4,1,188.0,205.0,198.0,Orthodox,29.0,9.0,0.0,3.55,49,2.77,54,1.08,53,66,0.3,188.0,205.0,193.0,Orthodox,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,36.0,35.0,0,1,0,0,0,1,0,0


In [8]:
df.drop(['Stance_0', 'Stance_1'], axis = 1, inplace = True)

In [9]:
df.head()

Unnamed: 0,winner,Height_0,Weight_0,Reach_0,W_0,L_0,D_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,Height_1,Weight_1,Reach_1,W_1,L_1,D_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,edad_pelea_0,edad_pelea_1,St_0_Open Stance,St_0_Orthodox,St_0_Southpaw,St_0_Switch,St_1_Open Stance,St_1_Orthodox,St_1_Southpaw,St_1_Switch
0,0,193.0,205.0,201.0,11.0,1.0,0.0,6.46,52,3.51,45,0.0,0,65,0.0,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,31.0,38.0,0,0,1,0,0,1,0,0
1,1,190.0,170.0,206.0,23.0,7.0,0.0,3.9,54,2.41,55,0.84,43,50,0.6,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,26.0,34.0,0,1,0,0,0,1,0,0
2,0,193.0,205.0,198.0,14.0,3.0,0.0,4.01,50,2.3,53,0.82,25,90,0.2,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0,0,1,0,0,0,1,0,0
3,1,198.0,205.0,208.0,18.0,7.0,0.0,3.45,58,2.78,42,0.23,100,62,0.7,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0,0,1,0,0,0,1,0,0
4,1,188.0,205.0,198.0,29.0,9.0,0.0,3.55,49,2.77,54,1.08,53,66,0.3,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,36.0,35.0,0,1,0,0,0,1,0,0


In [10]:
df.to_csv('./df_del_nan_v2.csv', index = False)

# Normalizacion datos para DNN

In [4]:
df = pd.read_csv('df_del_nan_v2.csv')

In [5]:
df.head()

Unnamed: 0,winner,Height_0,Weight_0,Reach_0,W_0,L_0,D_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,Height_1,Weight_1,Reach_1,W_1,L_1,D_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,edad_pelea_0,edad_pelea_1,St_0_Open Stance,St_0_Orthodox,St_0_Southpaw,St_0_Switch,St_1_Open Stance,St_1_Orthodox,St_1_Southpaw,St_1_Switch
0,0,193.0,205.0,201.0,11.0,1.0,0.0,6.46,52,3.51,45,0.0,0,65,0.0,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,31.0,38.0,0,0,1,0,0,1,0,0
1,1,190.0,170.0,206.0,23.0,7.0,0.0,3.9,54,2.41,55,0.84,43,50,0.6,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,26.0,34.0,0,1,0,0,0,1,0,0
2,0,193.0,205.0,198.0,14.0,3.0,0.0,4.01,50,2.3,53,0.82,25,90,0.2,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0,0,1,0,0,0,1,0,0
3,1,198.0,205.0,208.0,18.0,7.0,0.0,3.45,58,2.78,42,0.23,100,62,0.7,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,29.0,37.0,0,1,0,0,0,1,0,0
4,1,188.0,205.0,198.0,29.0,9.0,0.0,3.55,49,2.77,54,1.08,53,66,0.3,188.0,205.0,193.0,22.0,11.0,0.0,3.72,48,2.57,53,0.88,33,65,0.1,36.0,35.0,0,1,0,0,0,1,0,0


In [8]:
columns = df.columns

In [7]:
columns

Index(['winner', 'Height_0', 'Weight_0', 'Reach_0', 'W_0', 'L_0', 'D_0',
       'SLpM_0', 'Str. Acc._0', 'SApM_0', 'Str. Def_0', 'TD Avg._0',
       'TD Acc._0', 'TD Def._0', 'Sub. Avg._0', 'Height_1', 'Weight_1',
       'Reach_1', 'W_1', 'L_1', 'D_1', 'SLpM_1', 'Str. Acc._1', 'SApM_1',
       'Str. Def_1', 'TD Avg._1', 'TD Acc._1', 'TD Def._1', 'Sub. Avg._1',
       'edad_pelea_0', 'edad_pelea_1', 'St_0_Open Stance', 'St_0_Orthodox',
       'St_0_Southpaw', 'St_0_Switch', 'St_1_Open Stance', 'St_1_Orthodox',
       'St_1_Southpaw', 'St_1_Switch'],
      dtype='object')

In [8]:
from sklearn.preprocessing import MinMaxScaler

In [9]:
scaler = MinMaxScaler()
df_scaled = scaler.fit_transform(df)

In [10]:
df_scaled = pd.DataFrame(df_scaled, columns=columns)

In [11]:
df_scaled.to_csv('./df_del_nan_v3.csv', index = False)

## DNN

In [1]:
df = pd.read_csv('df_del_nan_v3.csv')

NameError: name 'pd' is not defined

In [30]:
!pip install tensorflow



In [9]:
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Flatten, Dense
from tensorflow import convert_to_tensor
#from tensorflow.keras.losses import 

In [10]:
X = df[columns[1:]]
Y = df[columns[0]]

In [11]:
X.head()

Unnamed: 0,Height_0,Weight_0,Reach_0,W_0,L_0,D_0,SLpM_0,Str. Acc._0,SApM_0,Str. Def_0,TD Avg._0,TD Acc._0,TD Def._0,Sub. Avg._0,Height_1,Weight_1,Reach_1,W_1,L_1,D_1,SLpM_1,Str. Acc._1,SApM_1,Str. Def_1,TD Avg._1,TD Acc._1,TD Def._1,Sub. Avg._1,edad_pelea_0,edad_pelea_1,St_0_Open Stance,St_0_Orthodox,St_0_Southpaw,St_0_Switch,St_1_Open Stance,St_1_Orthodox,St_1_Southpaw,St_1_Switch
0,0.694915,0.6,0.818182,0.120879,0.038462,0.0,0.687842,0.611111,0.223647,0.375,0.0,0.0,0.65,0.0,0.610169,0.6,0.69697,0.241758,0.423077,0.0,0.297729,0.537313,0.145599,0.561644,0.078083,0.33,0.65,0.013514,0.407407,0.655172,0.0,0.0,1.0,0.0,0.0,1.0,0.0,0.0
1,0.644068,0.366667,0.893939,0.252747,0.269231,0.0,0.407448,0.638889,0.145299,0.53125,0.076503,0.43,0.5,0.081081,0.610169,0.6,0.69697,0.241758,0.423077,0.0,0.297729,0.537313,0.145599,0.561644,0.078083,0.33,0.65,0.013514,0.222222,0.517241,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
2,0.694915,0.6,0.772727,0.153846,0.115385,0.0,0.419496,0.583333,0.137464,0.5,0.074681,0.25,0.9,0.027027,0.610169,0.6,0.69697,0.241758,0.423077,0.0,0.297729,0.537313,0.145599,0.561644,0.078083,0.33,0.65,0.013514,0.333333,0.62069,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
3,0.779661,0.6,0.924242,0.197802,0.269231,0.0,0.35816,0.694444,0.171652,0.328125,0.020947,1.0,0.62,0.094595,0.610169,0.6,0.69697,0.241758,0.423077,0.0,0.297729,0.537313,0.145599,0.561644,0.078083,0.33,0.65,0.013514,0.333333,0.62069,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0
4,0.610169,0.6,0.772727,0.318681,0.346154,0.0,0.369113,0.569444,0.17094,0.515625,0.098361,0.53,0.66,0.040541,0.610169,0.6,0.69697,0.241758,0.423077,0.0,0.297729,0.537313,0.145599,0.561644,0.078083,0.33,0.65,0.013514,0.592593,0.551724,0.0,1.0,0.0,0.0,0.0,1.0,0.0,0.0


In [12]:
Y.head()

0    0.0
1    1.0
2    0.0
3    1.0
4    1.0
Name: winner, dtype: float64

In [13]:
X = convert_to_tensor(X)
Y = convert_to_tensor(Y)

2022-09-13 11:50:12.183721: W tensorflow/stream_executor/platform/default/dso_loader.cc:64] Could not load dynamic library 'libcuda.so.1'; dlerror: libcuda.so.1: cannot open shared object file: No such file or directory
2022-09-13 11:50:12.183803: W tensorflow/stream_executor/cuda/cuda_driver.cc:269] failed call to cuInit: UNKNOWN ERROR (303)
2022-09-13 11:50:12.183848: I tensorflow/stream_executor/cuda/cuda_diagnostics.cc:156] kernel driver does not appear to be running on this host (kschool-vm): /proc/driver/nvidia/version does not exist
2022-09-13 11:50:12.188298: I tensorflow/core/platform/cpu_feature_guard.cc:193] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  AVX2
To enable them in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [14]:
X.shape

TensorShape([5668, 38])

In [15]:
Y.shape

TensorShape([5668])

In [16]:
Y_t = to_categorical(Y, num_classes = 2)

Y_t.shape

(5668, 2)

In [24]:
model = Sequential()

l1 = Dense(units = 30, input_shape=(38, ))
model.add(l1)

l2 = Dense(units = 30, activation='relu')
model.add(l2)

l2 = Dense(units = 30, activation='relu')
model.add(l2)

l2 = Dense(units = 30, activation='relu')
model.add(l2)

l2 = Dense(units = 30, activation='relu')
model.add(l2)

out = Dense(units = 2, activation='sigmoid')
model.add(out)

model.summary()


model.compile(optimizer = 'adam', loss='binary_crossentropy', metrics = ["accuracy"])


Model: "sequential_2"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_12 (Dense)            (None, 30)                1170      
                                                                 
 dense_13 (Dense)            (None, 30)                930       
                                                                 
 dense_14 (Dense)            (None, 30)                930       
                                                                 
 dense_15 (Dense)            (None, 30)                930       
                                                                 
 dense_16 (Dense)            (None, 30)                930       
                                                                 
 dense_17 (Dense)            (None, 2)                 62        
                                                                 
Total params: 4,952
Trainable params: 4,952
Non-traina

In [25]:
model.fit(x = X, y = Y_t, epochs = 40)

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


<keras.callbacks.History at 0x7f2bcd45d310>

# Streamlit

In [5]:
from PIL import Image 

In [13]:
%%writefile app.py

import streamlit as st
import pandas as pd
from PIL import Image 
  
st.set_page_config(page_title = 'Money Fighter', page_icon = ':large_yellow_square:') 


st.title('Money Fighter')
menu = ['Home', 'Stats', 'About']

choice = st.sidebar.selectbox('Menu', menu)
df = pd.read_csv('df_del_nan_v3.csv')

image = Image.open('./pak.jpg') 

if choice == 'Home':
    st.subheader('Home')
    st.table(df.head())
elif choice == 'Stats':
    st.subheader('Stats')
else:
    st.subheader('About')
    descripcion = ('Money Fighter  is a website that predicts the result of the upcoming MMA fights.\
         We take into account thousands of past bouts  and use a complex model to predict the \
         outcome of each fight.\
         Our predictions give you an edge over the bookmakers and other gamblers, \
         as well as insightful data about fighters’ tendencies in different situations. \
         Our predictions are 100% free and we do not accept any payments from the fighters, \
         their managers or promoters. Our goal is to help MMA fans understand the sport better\
         by providing them with valuable information about the upcoming fights.')
   
    st.image(image = image, width=200)
    st.markdown(f'<div style="text-align: justify;">{descripcion}</div>', unsafe_allow_html=True)                          

Overwriting app.py
