In [1]:
import pandas as pd
import numpy as np
from sklearn.naive_bayes import BernoulliNB
from sklearn.naive_bayes import GaussianNB
from sklearn.svm import SVC

df_combats = pd.read_csv("Datos/combats.csv")
df_combats = df_combats.sample(frac = 1)
df_pokemon = pd.read_csv("Datos/pokemon2.csv")
df_pokemon = df_pokemon.sample(frac = 1)

In [2]:
# Limpieza y configuración de dataframes
# Se cambia el nombre de ambos IDs y cambiamos el id del ganador por un booleano que indica si gana el primero
df_combats.rename(columns = {'First_pokemon':'ID1', 'Second_pokemon':'ID2', 'Winner': 'isFirstWinner'}, inplace = True)
df_combats.loc[df_combats['isFirstWinner'] == df_combats['ID1'], 'isFirstWinner'] = True
df_combats.loc[df_combats['isFirstWinner'] == df_combats['ID2'], 'isFirstWinner'] = False
df_combats.dropna(inplace=True)

In [3]:
# Unificamos las dos columnas de tipos y creamos una columna por cada tipo que hay
types1 = pd.get_dummies(df_pokemon["Type 1"])
types2 = pd.get_dummies(df_pokemon["Type 2"])
pd.set_option('max_rows', None)
pd.reset_option('max_rows')
types = types1 | types2

df_clean = pd.merge(df_pokemon, types, left_index=True, right_index=True)

df_clean.dropna(thresh=20,inplace=True)
df_clean.drop(["Type 1", 'Type 2'], axis = 1,inplace= True)
df_clean

Unnamed: 0,#,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
413,414,Mega Metagross,80,145,150,105,110,110,3,False,...,0,0,0,0,0,0,1,0,1,0
600,601,Sewaddle,45,53,70,40,60,42,5,False,...,0,1,0,0,0,0,0,0,0,0
450,451,Luxray,80,120,79,95,79,70,4,False,...,0,0,0,0,0,0,0,0,0,0
448,449,Shinx,45,65,34,40,34,45,4,False,...,0,0,0,0,0,0,0,0,0,0
263,264,Entei,115,115,85,90,75,100,2,True,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
721,722,Fennekin,40,45,40,62,60,60,6,False,...,0,0,0,0,0,0,0,0,0,0
120,121,Rhydon,105,130,120,45,45,40,1,False,...,0,0,1,0,0,0,0,1,0,0
362,363,Cacnea,50,85,40,85,40,35,3,False,...,0,1,0,0,0,0,0,0,0,0
227,228,Qwilfish,65,95,75,55,55,85,2,False,...,0,0,0,0,0,1,0,0,0,1


In [4]:
# Mapa de correlaciones
corr = round (df_clean.corr(), 3)
corr.style.background_gradient()

Unnamed: 0,#,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,Bug,Dark,Dragon,Electric,Fairy,Fighting,Fire,Flying,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
#,1.0,0.098,0.103,0.095,0.089,0.086,0.012,0.983,0.154,-0.04,0.099,0.141,0.016,0.05,0.048,-0.009,-0.033,0.169,0.044,-0.047,0.071,-0.053,-0.215,0.003,0.025,0.093,-0.102
HP,0.098,1.0,0.422,0.24,0.362,0.379,0.176,0.059,0.274,-0.154,0.01,0.138,-0.061,-0.007,0.055,0.01,0.031,-0.062,-0.046,0.075,0.082,0.112,-0.076,0.027,-0.036,-0.043,0.03
Attack,0.103,0.422,1.0,0.439,0.396,0.264,0.381,0.051,0.345,-0.07,0.143,0.213,-0.075,-0.123,0.211,0.046,0.014,-0.016,-0.063,0.127,0.021,-0.075,-0.073,-0.07,0.101,0.108,-0.071
Defense,0.095,0.24,0.439,1.0,0.224,0.511,0.015,0.042,0.246,-0.028,-0.024,0.106,-0.061,-0.025,0.002,-0.035,-0.068,0.061,-0.007,0.134,0.02,-0.175,-0.094,0.011,0.298,0.351,0.006
Sp. Atk,0.089,0.362,0.396,0.224,1.0,0.506,0.473,0.036,0.449,-0.185,0.037,0.194,0.127,0.026,-0.048,0.182,0.041,0.035,0.001,-0.107,0.07,-0.184,-0.044,0.237,-0.106,-0.006,0.026
Sp. Def,0.086,0.379,0.264,0.511,0.506,1.0,0.259,0.028,0.364,-0.082,-0.021,0.139,0.017,0.093,0.014,0.026,-0.009,0.045,-0.005,-0.077,0.06,-0.112,-0.047,0.19,0.019,0.103,-0.025
Speed,0.012,0.176,0.381,0.015,0.473,0.259,1.0,-0.023,0.327,-0.071,0.068,0.123,0.13,-0.098,0.076,0.073,0.237,-0.061,-0.096,-0.096,-0.005,0.052,-0.03,0.108,-0.165,-0.098,-0.049
Generation,0.983,0.059,0.051,0.042,0.036,0.028,-0.023,1.0,0.08,-0.019,0.094,0.102,0.006,0.066,0.051,0.006,-0.036,0.156,0.073,-0.051,0.038,-0.036,-0.192,-0.022,0.009,0.079,-0.105
Legendary,0.154,0.274,0.345,0.246,0.449,0.364,0.327,0.08,1.0,-0.094,-0.021,0.226,0.018,-0.005,-0.006,0.047,0.094,-0.014,-0.067,-0.007,0.041,-0.086,-0.086,0.169,-0.013,0.019,-0.066
Bug,-0.04,-0.154,-0.07,-0.028,-0.185,-0.082,-0.071,-0.019,-0.094,1.0,-0.082,-0.081,-0.045,-0.072,-0.049,-0.061,0.065,-0.059,-0.034,-0.064,-0.07,-0.12,0.121,-0.112,-0.004,0.047,-0.124


In [5]:
# Igualamos el nombre de la columna del id a ID1
df_clean.rename(columns = {'#':'ID1'}, inplace= True)
#Añadimos todos los campos del 1er pokemon a la tabla
df_combats = df_combats.merge(df_clean, on='ID1')

# Igualamos el nombre de la columna del id a ID2
df_clean.rename(columns = {'ID1':'ID2'}, inplace= True)
#Añadimos todos los campos del 1er pokemon a la tabla
df_combats = df_combats.merge(df_clean, on='ID2')

In [6]:
# Escalamos los datos
from sklearn import preprocessing

# Cambiamos true y false por ints 1 y 0 
df_combats["isFirstWinner"] = df_combats["isFirstWinner"].astype(int)

X_scaled = preprocessing.scale(df_combats.drop(["ID1","ID2","Name_x","Name_y"], axis=1))
df_scaled = pd.DataFrame(X_scaled)
df_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
0,-0.944081,0.828149,1.250482,1.446229,-0.393764,-0.432618,-0.621907,-0.800051,-0.298316,-0.317853,...,-0.240924,-0.360406,3.291127,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
1,-0.944081,-0.596074,-1.043759,-0.861341,0.064318,0.928603,-0.553620,1.620898,-0.298316,-0.317853,...,-0.240924,-0.360406,3.291127,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
2,-0.944081,0.036914,0.179837,-0.134299,-0.393764,-0.074402,-0.621907,-0.194814,-0.298316,-0.317853,...,-0.240924,-0.360406,3.291127,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
3,1.059231,1.025958,0.638685,0.339860,1.072096,-0.074402,0.060966,1.015661,-0.298316,-0.317853,...,-0.240924,-0.360406,3.291127,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
4,1.059231,0.036914,-0.126062,0.181807,1.896643,0.283814,2.280305,-0.194814,-0.298316,-0.317853,...,-0.240924,-0.360406,3.291127,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,-0.944081,0.234723,0.485735,-0.229130,1.591255,0.821138,0.504834,0.410423,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.433640
49996,1.059231,1.223766,0.638685,0.814018,0.827786,1.000246,1.085276,0.410423,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.433640
49997,1.059231,-0.160894,0.332786,-0.292351,-0.363225,-0.396797,1.085276,-1.405288,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.433640
49998,-0.944081,-0.358703,-0.890809,0.814018,0.369705,-0.074402,-0.109752,-0.194814,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.433640


In [7]:
# Variables para entrenar el modelo 
Y = df_scaled[0] > 0 # Combates en los que el primer pokemon gana
X = pd.get_dummies(df_scaled.drop([0], axis=1)) # Resto de columnas del dataframe menos la columna 'isFirstWinner'

In [8]:
# Ajustar el modelo Bermoulli
clf = BernoulliNB()
clf.fit(X, Y)
Y_pred = clf.predict(X)
np.mean(Y == Y_pred)

0.75402

In [9]:
# Ajustar el modelo Gausiiano
clf = GaussianNB()
clf.fit(X, Y)
Y_pred = clf.predict(X)
np.mean(Y == Y_pred)

0.74094