# Limpieza dataset de profesionales
  
En el presente archivo se realiza la limpieza de los datos que no terminaron de salir limpios en el scrapeo.

In [None]:
# Importación de librerías y recursos

In [1]:
import numpy as np
import pandas as pd
import time

pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

import warnings
warnings.filterwarnings('ignore')

In [None]:
# Importación de la librería pymongo para recuperar los datos ubicados en MongoDB

In [2]:
from pymongo import MongoClient
cursor=MongoClient()
final_proj = cursor.lol_scouting                                                   
colec = final_proj.player_prof 

In [3]:
data = [e for e in colec.find()]

In [4]:
df = pd.DataFrame(data)

df.head()

Unnamed: 0,_id,nombre_invocador,region,liga,lps,cola,ranking_mundial,ranking_porcentaje,wins,loses,n_games,winrate_total
0,640d83d1b40cd7e943808789,100T Bjergsen,,Diamante,,Soloqueue,67342,Top 0.71%,61,50,111,55.0
1,640d83d27177991764ba4b71,Hide on bush,KR,GrandMaster,542.0,Soloqueue,4736,Top 0.049%,149,135,284,52.5
2,640d83d2c0978a14433e3289,Jinno Kingdom,EUW,Challenger,801.0,Soloqueue,1183,Top 0.012%,295,273,568,51.9
3,640d83d52e0f5469d90e15b9,Satoru Gojo03,EUW,Challenger,984.0,Soloqueue,535,Top 0.0056%,77,47,124,62.1
4,640d83e9c0978a14433e328a,LFT ADC,EUW,Diamante,,Soloqueue,211267,Top 2.2%,7,13,20,35.0


In [None]:
# Eliminamos columnas de datos que no nos interesan, así como registros de los que no se han conseguido scrapear
# datos muy relevantes

In [5]:
df.drop('_id', axis = 1, inplace = True)

In [6]:
df.ranking_porcentaje = df.ranking_porcentaje.apply(lambda x: x.split(' ')[1][:-1])

In [7]:
df.ranking_mundial = df.ranking_mundial.apply(lambda x: x.replace(',', '') if ',' in x else x)

In [8]:
ind = df.loc[df['lps'] == ''].index

In [10]:
df.drop(index = ind, inplace = True)

In [None]:
# Transformación del tipo de dato

In [11]:
df.lps = df.lps.astype(int)
df.ranking_mundial = df.ranking_mundial.astype(int)
df.ranking_porcentaje = df.ranking_porcentaje.astype(float)
df.wins = df.wins.astype(int)
df.loses = df.loses.astype(int)
df.n_games = df.n_games.astype(int)
df.winrate_total = df.winrate_total.astype(float)

In [None]:
# Etiquetamos columna categórica para poder operar con ella en el futuro

In [12]:
df.liga.unique()

array(['GrandMaster', 'Challenger', 'Master'], dtype=object)

In [14]:
df['liga_label'] = df.liga.apply(lambda x: 2 if x == 'Challenger' else 1 if x == 'GrandMaster' else 0)

In [15]:
df.drop_duplicates(inplace = True)

In [16]:
df.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 51 entries, 1 to 55
Data columns (total 12 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   nombre_invocador    51 non-null     object 
 1   region              51 non-null     object 
 2   liga                51 non-null     object 
 3   lps                 51 non-null     int64  
 4   cola                51 non-null     object 
 5   ranking_mundial     51 non-null     int64  
 6   ranking_porcentaje  51 non-null     float64
 7   wins                51 non-null     int64  
 8   loses               51 non-null     int64  
 9   n_games             51 non-null     int64  
 10  winrate_total       51 non-null     float64
 11  liga_label          51 non-null     int64  
dtypes: float64(2), int64(6), object(4)
memory usage: 5.2+ KB


In [None]:
# Exportamos df limpio a .csv

In [17]:
df.to_csv('../data/info_general_prof.csv', index = False)

**DF_2**

In [None]:
# Seguiremos un proceso similar al anterior. 
# Recuperamos los datos de MongoDB, los limpiamos, transformamos el dato si se requiere y los exportamos a un .csv

In [18]:
colec = final_proj.rol_kda_profesional
data_2 = [e for e in colec.find()]

In [19]:
df_2 = pd.DataFrame(data_2)

df_2.head()

Unnamed: 0,_id,nombre invocador,kills_player_kda,deaths_player_kda,assists_player_kda,rol_1,games_per_rol_1,winrate_per_rol_1,rol_2,games_per_rol_2,winrate_per_rol_2,nombre_invocador,rol_3,games_per_rol_3,winrate_per_rol_3
0,640dab79768ab9d1e7077311,100T Bjergsen,5.2,3.7,6.1,Mid,89,57.3,Top,22.0,45.5,,,,
1,640dab802531c56df77b483a,,6.6,4.9,7.5,AD Carry,447,53.7,Jungler,59.0,45.8,Jinno Kingdom,Support,37.0,51.4
2,640dab80c39dccc9f705d3b5,,5.4,4.2,6.4,Mid,246,54.1,Jungler,25.0,44.0,Hide on bush,Top,8.0,37.5
3,640dab806f2eedc2432669b0,,7.2,5.2,7.2,AD Carry,120,61.7,Mid,3.0,100.0,Satoru Gojo03,Jungler,1.0,0.0
4,640dab902531c56df77b483b,,7.3,5.8,5.5,AD Carry,20,35.0,,,,LFT ADC,,,


In [20]:
df_2.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 56 entries, 0 to 55
Data columns (total 15 columns):
 #   Column              Non-Null Count  Dtype 
---  ------              --------------  ----- 
 0   _id                 56 non-null     object
 1   nombre invocador    3 non-null      object
 2   kills_player_kda    56 non-null     object
 3   deaths_player_kda   56 non-null     object
 4   assists_player_kda  56 non-null     object
 5   rol_1               56 non-null     object
 6   games_per_rol_1     56 non-null     object
 7   winrate_per_rol_1   56 non-null     object
 8   rol_2               55 non-null     object
 9   games_per_rol_2     55 non-null     object
 10  winrate_per_rol_2   55 non-null     object
 11  nombre_invocador    53 non-null     object
 12  rol_3               52 non-null     object
 13  games_per_rol_3     52 non-null     object
 14  winrate_per_rol_3   52 non-null     object
dtypes: object(15)
memory usage: 6.7+ KB


In [21]:
df_2.drop(['_id', 'nombre invocador'], axis = 1, inplace = True)

In [23]:
ind = df_2.loc[df_2['nombre_invocador'].isna()].index

In [24]:
df_2.drop(index = ind, inplace = True)

In [26]:
df_2 = df_2.fillna(0)

In [27]:
df_2.head()

Unnamed: 0,kills_player_kda,deaths_player_kda,assists_player_kda,rol_1,games_per_rol_1,winrate_per_rol_1,rol_2,games_per_rol_2,winrate_per_rol_2,nombre_invocador,rol_3,games_per_rol_3,winrate_per_rol_3
1,6.6,4.9,7.5,AD Carry,447,53.7,Jungler,59,45.8,Jinno Kingdom,Support,37,51.4
2,5.4,4.2,6.4,Mid,246,54.1,Jungler,25,44.0,Hide on bush,Top,8,37.5
3,7.2,5.2,7.2,AD Carry,120,61.7,Mid,3,100.0,Satoru Gojo03,Jungler,1,0.0
4,7.3,5.8,5.5,AD Carry,20,35.0,0,0,0.0,LFT ADC,0,0,0.0
5,7.1,5.9,5.7,Top,189,60.8,Jungler,7,42.9,Kongenvenderhjem,Mid,7,57.1


In [28]:
df_2.kills_player_kda = df_2.kills_player_kda.astype(float)
df_2.deaths_player_kda = df_2.deaths_player_kda.astype(float)
df_2.assists_player_kda = df_2.assists_player_kda.astype(float)
df_2.games_per_rol_1 = df_2.games_per_rol_1.astype(int)
df_2.winrate_per_rol_1 = df_2.winrate_per_rol_1.astype(float)

In [29]:
df_2.games_per_rol_2 = df_2.games_per_rol_2.astype(float)
df_2.winrate_per_rol_2 = df_2.winrate_per_rol_2.astype(float)
df_2.games_per_rol_3 = df_2.games_per_rol_3.astype(float)
df_2.winrate_per_rol_3 = df_2.winrate_per_rol_3.astype(float)

In [30]:
df_2.drop_duplicates(inplace = True)

In [31]:
df_2.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 53 entries, 1 to 55
Data columns (total 13 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   kills_player_kda    53 non-null     float64
 1   deaths_player_kda   53 non-null     float64
 2   assists_player_kda  53 non-null     float64
 3   rol_1               53 non-null     object 
 4   games_per_rol_1     53 non-null     int64  
 5   winrate_per_rol_1   53 non-null     float64
 6   rol_2               53 non-null     object 
 7   games_per_rol_2     53 non-null     float64
 8   winrate_per_rol_2   53 non-null     float64
 9   nombre_invocador    53 non-null     object 
 10  rol_3               53 non-null     object 
 11  games_per_rol_3     53 non-null     float64
 12  winrate_per_rol_3   53 non-null     float64
dtypes: float64(8), int64(1), object(4)
memory usage: 5.8+ KB


In [32]:
df_2.to_csv('../data/rol_kda_prof.csv', index = False)

**DF_3**

In [None]:
# Mismo proceso con el tercer y último df

In [33]:
colec = final_proj.champs_prof
data_3 = [e for e in colec.find()]

In [34]:
df_3 = pd.DataFrame(data_3)

df_3.head()

Unnamed: 0,_id,nombre,champ_1,regional_rank_1,kills_champ_1,deaths_champ_1,assists_champ_1,games_champ_1,winrate_champ_1,champ_2,regional_rank_2,kills_champ_2,deaths_champ_2,assists_champ_2,games_champ_2,winrate_champ_2,champ_3,regional_rank_3,kills_champ_3,deaths_champ_3,assists_champ_3,games_champ_3,winrate_champ_3,champ_4,regional_rank_4,kills_champ_4,deaths_champ_4,assists_champ_4,games_champ_4,winrate_champ_4,champ_5,regional_rank_5,kills_champ_5,deaths_champ_5,assists_champ_5,games_champ_5,winrate_champ_5,champ_6,regional_rank_6,kills_champ_6,deaths_champ_6,assists_champ_6,games_champ_6,winrate_champ_6,champ_7,regional_rank_7,kills_champ_7,deaths_champ_7,assists_champ_7,games_champ_7,winrate_champ_7,champ_8,regional_rank_8,kills_champ_8,deaths_champ_8,assists_champ_8,games_champ_8,winrate_champ_8
0,640db1e814ffaaa53732dc8b,Satoru Gojo03,Zeri,(EUW: 122),7.3,4.6,5.9,23,60.9,Draven,880,8.0,5.9,7.7,21,57.1,Varus,1558,6.8,5.8,8.2,17,58.8,Aphelios,1905,6.0,5.4,7.4,17,52.9,Caitlyn,666,5.8,4.5,7.7,13,61.5,Lucian,4463,9.5,6.4,7.7,11,81.8,Xayah,210.0,8.4,2.8,6.4,9,88.9,Sivir,,6.3,9.0,11.3,3,66.7
1,640db206a39d37fbfeb367a8,Hide on bush,Annie,(KR: 514),5.0,2.8,6.7,28,67.9,Aurelion Sol,245,6.9,3.4,6.6,27,55.6,Yone,850,5.1,4.5,5.4,16,62.5,Sylas,1640,4.4,3.8,5.4,16,50.0,Akali,19442,7.9,3.9,4.3,15,60.0,Ryze,1667,4.9,3.2,6.4,15,53.3,Tristana,5271.0,5.9,6.7,4.5,13,30.8,Jayce,18688.0,5.8,6.3,6.6,12,25.0
2,640db2084b1fd25546f0d2db,Jinno Kingdom,Varus,(EUW: 299),7.8,4.6,8.0,79,53.2,Zeri,72,6.8,3.4,6.1,64,64.1,Caitlyn,18,6.9,4.8,5.8,51,56.9,Ezreal,97,7.3,5.6,6.6,36,52.8,Sivir,678,6.3,4.4,8.8,32,62.5,Draven,106,7.6,5.4,5.6,29,48.3,Maokai,1391.0,3.6,4.3,10.4,25,56.0,Xayah,1137.0,6.9,5.5,5.7,24,37.5
3,640db214d33eb5cd2a1bf858,Kongenvenderhjem,Jayce,(EUW: 36),6.8,6.1,5.6,38,73.7,Gangplank,"(EUW: 10,569)",7.0,5.7,6.3,28,71.4,Fiora,53,5.0,5.6,4.9,14,50.0,Vladimir,305,4.3,5.0,5.4,11,54.5,Gnar,2672,8.1,4.2,5.3,10,70.0,Quinn,2429,8.8,7.1,6.0,10,60.0,Jax,485.0,4.2,5.9,4.3,9,33.3,Lee Sin,540.0,9.3,6.1,4.3,8,62.5
4,640db320a39d37fbfeb367a9,pleroma chronou,Aurelion Sol,(EUW: 839),7.5,6.7,7.9,24,54.2,Tristana,1216,8.8,5.0,4.9,22,63.6,Akali,756,9.8,4.0,5.4,17,64.7,Sylas,831,7.5,4.8,4.9,15,53.3,Ryze,3807,7.7,3.8,5.8,12,75.0,Lee Sin,1546,8.4,5.6,4.5,12,66.7,Syndra,,6.4,5.2,8.8,9,55.6,Gragas,,2.3,5.9,8.0,7,42.9


In [35]:
df_3.info(memory_usage = 'deep')

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 40 entries, 0 to 39
Data columns (total 58 columns):
 #   Column           Non-Null Count  Dtype 
---  ------           --------------  ----- 
 0   _id              40 non-null     object
 1   nombre           40 non-null     object
 2   champ_1          40 non-null     object
 3   regional_rank_1  40 non-null     object
 4   kills_champ_1    40 non-null     object
 5   deaths_champ_1   40 non-null     object
 6   assists_champ_1  40 non-null     object
 7   games_champ_1    40 non-null     object
 8   winrate_champ_1  40 non-null     object
 9   champ_2          40 non-null     object
 10  regional_rank_2  40 non-null     object
 11  kills_champ_2    40 non-null     object
 12  deaths_champ_2   40 non-null     object
 13  assists_champ_2  40 non-null     object
 14  games_champ_2    40 non-null     object
 15  winrate_champ_2  40 non-null     object
 16  champ_3          40 non-null     object
 17  regional_rank_3  40 non-null     obje

In [36]:
df_3.drop('_id', axis = 1, inplace = True)

In [37]:
df_3.regional_rank_1 = df_3.regional_rank_1.apply(lambda x: x.split(' ')[1][:-1] if len(x.split(' ')) > 1 else x)
df_3.regional_rank_2 = df_3.regional_rank_2.apply(lambda x: x.split(' ')[1][:-1] if len(x.split(' ')) > 1 else x)
df_3.regional_rank_3 = df_3.regional_rank_3.apply(lambda x: x.split(' ')[1][:-1] if len(x.split(' ')) > 1 else x)
df_3.regional_rank_4 = df_3.regional_rank_4.apply(lambda x: x.split(' ')[1][:-1] if len(x.split(' ')) > 1 else x)
df_3.regional_rank_5 = df_3.regional_rank_5.apply(lambda x: x.split(' ')[1][:-1] if len(x.split(' ')) > 1 else x)
df_3.regional_rank_6 = df_3.regional_rank_6.apply(lambda x: x.split(' ')[1][:-1] if len(x.split(' ')) > 1 else x)
df_3.regional_rank_7 = df_3.regional_rank_7.apply(lambda x: x.split(' ')[1][:-1] if len(x.split(' ')) > 1 else x)
df_3.regional_rank_8 = df_3.regional_rank_8.apply(lambda x: x.split(' ')[1][:-1] if len(x.split(' ')) > 1 else x)

In [38]:
df_3.regional_rank_1 = df_3.regional_rank_1.apply(lambda x: x.replace(',', ''))
df_3.regional_rank_2 = df_3.regional_rank_2.apply(lambda x: x.replace(',', ''))
df_3.regional_rank_3 = df_3.regional_rank_3.apply(lambda x: x.replace(',', ''))
df_3.regional_rank_4 = df_3.regional_rank_4.apply(lambda x: x.replace(',', ''))
df_3.regional_rank_5 = df_3.regional_rank_5.apply(lambda x: x.replace(',', ''))
df_3.regional_rank_6 = df_3.regional_rank_6.apply(lambda x: x.replace(',', ''))
df_3.regional_rank_7 = df_3.regional_rank_7.apply(lambda x: x.replace(',', ''))
df_3.regional_rank_8 = df_3.regional_rank_8.apply(lambda x: x.replace(',', ''))

In [39]:
df_3.regional_rank_4 = df_3.regional_rank_4.apply(lambda x: x.replace('', '0') if x == '' else x)
df_3.regional_rank_5 = df_3.regional_rank_5.apply(lambda x: x.replace('', '0') if x == '' else x)
df_3.regional_rank_6 = df_3.regional_rank_6.apply(lambda x: x.replace('', '0') if x == '' else x)
df_3.regional_rank_7 = df_3.regional_rank_7.apply(lambda x: x.replace('', '0') if x == '' else x)
df_3.regional_rank_8 = df_3.regional_rank_8.apply(lambda x: x.replace('', '0') if x == '' else x)

In [40]:
df_3.regional_rank_1 = df_3.regional_rank_1.astype(int)
df_3.regional_rank_2 = df_3.regional_rank_2.astype(int)
df_3.regional_rank_3 = df_3.regional_rank_3.astype(int)
df_3.regional_rank_4 = df_3.regional_rank_4.astype(int)
df_3.regional_rank_5 = df_3.regional_rank_5.astype(int)
df_3.regional_rank_6 = df_3.regional_rank_6.astype(int)
df_3.regional_rank_7 = df_3.regional_rank_7.astype(int)
df_3.regional_rank_8 = df_3.regional_rank_8.astype(int)

In [41]:
df_3.kills_champ_1 = df_3.kills_champ_1.astype(float)
df_3.kills_champ_2 = df_3.kills_champ_2.astype(float)
df_3.kills_champ_3 = df_3.kills_champ_3.astype(float)
df_3.kills_champ_4 = df_3.kills_champ_4.astype(float)
df_3.kills_champ_5 = df_3.kills_champ_5.astype(float)
df_3.kills_champ_6 = df_3.kills_champ_6.astype(float)
df_3.kills_champ_7 = df_3.kills_champ_7.astype(float)
df_3.kills_champ_8 = df_3.kills_champ_8.astype(float)

In [42]:
df_3.deaths_champ_1 = df_3.deaths_champ_1.astype(float)
df_3.deaths_champ_2 = df_3.deaths_champ_2.astype(float)
df_3.deaths_champ_3 = df_3.deaths_champ_3.astype(float)
df_3.deaths_champ_4 = df_3.deaths_champ_4.astype(float)
df_3.deaths_champ_5 = df_3.deaths_champ_5.astype(float)
df_3.deaths_champ_6 = df_3.deaths_champ_6.astype(float)
df_3.deaths_champ_7 = df_3.deaths_champ_7.astype(float)
df_3.deaths_champ_8 = df_3.deaths_champ_8.astype(float)

In [43]:
df_3.assists_champ_1 = df_3.assists_champ_1.astype(float)
df_3.assists_champ_2 = df_3.assists_champ_2.astype(float)
df_3.assists_champ_3 = df_3.assists_champ_3.astype(float)
df_3.assists_champ_4 = df_3.assists_champ_4.astype(float)
df_3.assists_champ_5 = df_3.assists_champ_5.astype(float)
df_3.assists_champ_6 = df_3.assists_champ_6.astype(float)
df_3.assists_champ_7 = df_3.assists_champ_7.astype(float)
df_3.assists_champ_8 = df_3.assists_champ_8.astype(float)

In [44]:
df_3.games_champ_3 = df_3.games_champ_3.apply(lambda x: x.split('.')[0][1:] if '%' in x else x)
df_3.games_champ_4 = df_3.games_champ_4.apply(lambda x: x.split('.')[0][1:] if '%' in x else x)
df_3.games_champ_5 = df_3.games_champ_5.apply(lambda x: x.split('.')[0][1:] if '%' in x else x)
df_3.games_champ_6 = df_3.games_champ_6.apply(lambda x: x.split('.')[0][1:] if '%' in x else x)
df_3.games_champ_7 = df_3.games_champ_7.apply(lambda x: x.split('.')[0][1:] if '%' in x else x)
df_3.games_champ_8 = df_3.games_champ_8.apply(lambda x: x.split('.')[0][1:] if '%' in x else x)

In [45]:
df_3.games_champ_1 = df_3.games_champ_1.astype(int)
df_3.games_champ_2 = df_3.games_champ_2.astype(int)
df_3.games_champ_3 = df_3.games_champ_3.astype(int)
df_3.games_champ_4 = df_3.games_champ_4.astype(int)
df_3.games_champ_5 = df_3.games_champ_5.astype(int)
df_3.games_champ_6 = df_3.games_champ_6.astype(int)
df_3.games_champ_7 = df_3.games_champ_7.astype(int)
df_3.games_champ_8 = df_3.games_champ_8.astype(int)

In [46]:
df_3.winrate_champ_1 = df_3.winrate_champ_1.astype(float)
df_3.winrate_champ_2 = df_3.winrate_champ_2.astype(float)
df_3.winrate_champ_3 = df_3.winrate_champ_3.astype(float)
df_3.winrate_champ_4 = df_3.winrate_champ_4.astype(float)
df_3.winrate_champ_5 = df_3.winrate_champ_5.astype(float)
df_3.winrate_champ_6 = df_3.winrate_champ_6.astype(float)
df_3.winrate_champ_7 = df_3.winrate_champ_7.astype(float)
df_3.winrate_champ_8 = df_3.winrate_champ_8.astype(float)

In [49]:
df_3.drop_duplicates(inplace = True)

In [50]:
df_3.head()

Unnamed: 0,nombre,champ_1,regional_rank_1,kills_champ_1,deaths_champ_1,assists_champ_1,games_champ_1,winrate_champ_1,champ_2,regional_rank_2,kills_champ_2,deaths_champ_2,assists_champ_2,games_champ_2,winrate_champ_2,champ_3,regional_rank_3,kills_champ_3,deaths_champ_3,assists_champ_3,games_champ_3,winrate_champ_3,champ_4,regional_rank_4,kills_champ_4,deaths_champ_4,assists_champ_4,games_champ_4,winrate_champ_4,champ_5,regional_rank_5,kills_champ_5,deaths_champ_5,assists_champ_5,games_champ_5,winrate_champ_5,champ_6,regional_rank_6,kills_champ_6,deaths_champ_6,assists_champ_6,games_champ_6,winrate_champ_6,champ_7,regional_rank_7,kills_champ_7,deaths_champ_7,assists_champ_7,games_champ_7,winrate_champ_7,champ_8,regional_rank_8,kills_champ_8,deaths_champ_8,assists_champ_8,games_champ_8,winrate_champ_8
0,Satoru Gojo03,Zeri,122,7.3,4.6,5.9,23,60.9,Draven,880,8.0,5.9,7.7,21,57.1,Varus,1558,6.8,5.8,8.2,17,58.8,Aphelios,1905,6.0,5.4,7.4,17,52.9,Caitlyn,666,5.8,4.5,7.7,13,61.5,Lucian,4463,9.5,6.4,7.7,11,81.8,Xayah,210,8.4,2.8,6.4,9,88.9,Sivir,0,6.3,9.0,11.3,3,66.7
1,Hide on bush,Annie,514,5.0,2.8,6.7,28,67.9,Aurelion Sol,245,6.9,3.4,6.6,27,55.6,Yone,850,5.1,4.5,5.4,16,62.5,Sylas,1640,4.4,3.8,5.4,16,50.0,Akali,19442,7.9,3.9,4.3,15,60.0,Ryze,1667,4.9,3.2,6.4,15,53.3,Tristana,5271,5.9,6.7,4.5,13,30.8,Jayce,18688,5.8,6.3,6.6,12,25.0
2,Jinno Kingdom,Varus,299,7.8,4.6,8.0,79,53.2,Zeri,72,6.8,3.4,6.1,64,64.1,Caitlyn,18,6.9,4.8,5.8,51,56.9,Ezreal,97,7.3,5.6,6.6,36,52.8,Sivir,678,6.3,4.4,8.8,32,62.5,Draven,106,7.6,5.4,5.6,29,48.3,Maokai,1391,3.6,4.3,10.4,25,56.0,Xayah,1137,6.9,5.5,5.7,24,37.5
3,Kongenvenderhjem,Jayce,36,6.8,6.1,5.6,38,73.7,Gangplank,10569,7.0,5.7,6.3,28,71.4,Fiora,53,5.0,5.6,4.9,14,50.0,Vladimir,305,4.3,5.0,5.4,11,54.5,Gnar,2672,8.1,4.2,5.3,10,70.0,Quinn,2429,8.8,7.1,6.0,10,60.0,Jax,485,4.2,5.9,4.3,9,33.3,Lee Sin,540,9.3,6.1,4.3,8,62.5
4,pleroma chronou,Aurelion Sol,839,7.5,6.7,7.9,24,54.2,Tristana,1216,8.8,5.0,4.9,22,63.6,Akali,756,9.8,4.0,5.4,17,64.7,Sylas,831,7.5,4.8,4.9,15,53.3,Ryze,3807,7.7,3.8,5.8,12,75.0,Lee Sin,1546,8.4,5.6,4.5,12,66.7,Syndra,0,6.4,5.2,8.8,9,55.6,Gragas,0,2.3,5.9,8.0,7,42.9


In [51]:
df_3.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 40 entries, 0 to 39
Data columns (total 57 columns):
 #   Column           Non-Null Count  Dtype  
---  ------           --------------  -----  
 0   nombre           40 non-null     object 
 1   champ_1          40 non-null     object 
 2   regional_rank_1  40 non-null     int64  
 3   kills_champ_1    40 non-null     float64
 4   deaths_champ_1   40 non-null     float64
 5   assists_champ_1  40 non-null     float64
 6   games_champ_1    40 non-null     int64  
 7   winrate_champ_1  40 non-null     float64
 8   champ_2          40 non-null     object 
 9   regional_rank_2  40 non-null     int64  
 10  kills_champ_2    40 non-null     float64
 11  deaths_champ_2   40 non-null     float64
 12  assists_champ_2  40 non-null     float64
 13  games_champ_2    40 non-null     int64  
 14  winrate_champ_2  40 non-null     float64
 15  champ_3          40 non-null     object 
 16  regional_rank_3  40 non-null     int64  
 17  kills_champ_3    4

In [52]:
df_3.rename(columns={'nombre': 'nombre_invocador'}, inplace = True)

In [53]:
df_3.to_csv('../data/champs_stats_prof.csv', index = False)

**MERGEANDO DFs**

In [None]:
# Una vez limpios nuestros 3 dfs, hacemos merge para tener centralizada toda la info en un solo df

In [54]:
merg = pd.merge(df, df_2, on = 'nombre_invocador')

In [55]:
merg.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 49 entries, 0 to 48
Data columns (total 24 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   nombre_invocador    49 non-null     object 
 1   region              49 non-null     object 
 2   liga                49 non-null     object 
 3   lps                 49 non-null     int64  
 4   cola                49 non-null     object 
 5   ranking_mundial     49 non-null     int64  
 6   ranking_porcentaje  49 non-null     float64
 7   wins                49 non-null     int64  
 8   loses               49 non-null     int64  
 9   n_games             49 non-null     int64  
 10  winrate_total       49 non-null     float64
 11  liga_label          49 non-null     int64  
 12  kills_player_kda    49 non-null     float64
 13  deaths_player_kda   49 non-null     float64
 14  assists_player_kda  49 non-null     float64
 15  rol_1               49 non-null     object 
 16  games_per_

In [56]:
bd = pd.merge(merg, df_3, on = 'nombre_invocador')

In [57]:
bd.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 38 entries, 0 to 37
Data columns (total 80 columns):
 #   Column              Non-Null Count  Dtype  
---  ------              --------------  -----  
 0   nombre_invocador    38 non-null     object 
 1   region              38 non-null     object 
 2   liga                38 non-null     object 
 3   lps                 38 non-null     int64  
 4   cola                38 non-null     object 
 5   ranking_mundial     38 non-null     int64  
 6   ranking_porcentaje  38 non-null     float64
 7   wins                38 non-null     int64  
 8   loses               38 non-null     int64  
 9   n_games             38 non-null     int64  
 10  winrate_total       38 non-null     float64
 11  liga_label          38 non-null     int64  
 12  kills_player_kda    38 non-null     float64
 13  deaths_player_kda   38 non-null     float64
 14  assists_player_kda  38 non-null     float64
 15  rol_1               38 non-null     object 
 16  games_per_

In [58]:
bd.rol_1.value_counts()

Mid         10
Support      9
Jungler      8
AD Carry     6
Top          5
Name: rol_1, dtype: int64

In [60]:
bd['nivel'] = 'profesional'

In [62]:
bd.to_csv('../data/total_prof.csv', index = False)