In [9]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import confusion_matrix
import matplotlib.pyplot as plt

df_combats = pd.read_csv("Datos/combats.csv")
df_combats = df_combats.sample(frac = 1)
df_pokemon = pd.read_csv("Datos/pokemon2.csv")
df_pokemon = df_pokemon.sample(frac = 1)

In [10]:
# Limpieza y configuración de dataframes
# Se cambia el nombre de ambos IDs y cambiamos el id del ganador por un booleano que indica si gana el primero
df_combats.rename(columns = {'First_pokemon':'ID1', 'Second_pokemon':'ID2', 'Winner': 'isFirstWinner'}, inplace = True)
df_combats.loc[df_combats['isFirstWinner'] == df_combats['ID1'], 'isFirstWinner'] = True
df_combats.loc[df_combats['isFirstWinner'] == df_combats['ID2'], 'isFirstWinner'] = False
df_combats.dropna(inplace=True)

In [11]:
# Unificamos las dos columnas de tipos y creamos una columna por cada tipo que hay
types1 = pd.get_dummies(df_pokemon["Type 1"])
types2 = pd.get_dummies(df_pokemon["Type 2"])
pd.set_option('max_rows', None)
pd.reset_option('max_rows')
types = types1 | types2

df_clean = pd.merge(df_pokemon, types, left_index=True, right_index=True)

df_clean.dropna(thresh=20,inplace=True)
df_clean.drop(["Type 1", 'Type 2'], axis = 1,inplace= True)
df_clean

Unnamed: 0,#,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
713,714,Keldeo Ordinary Forme,91,72,90,129,90,108,5,False,...,0,0,0,0,0,0,0,0,0,1
431,432,Deoxys Speed Forme,50,95,90,95,90,180,3,True,...,0,0,0,0,0,0,1,0,0,0
356,357,Spoink,60,25,35,70,80,60,3,False,...,0,0,0,0,0,0,1,0,0,0
569,570,Liepard,64,88,50,88,50,106,5,False,...,0,0,0,0,0,0,0,0,0,0
634,635,Cinccino,75,95,60,65,60,115,5,False,...,0,0,0,0,1,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
671,672,Axew,46,87,60,30,40,57,5,False,...,0,0,0,0,0,0,0,0,0,0
598,599,Throh,120,100,85,30,85,45,5,False,...,0,0,0,0,0,0,0,0,0,0
11,12,Blastoise,79,83,100,85,105,78,1,False,...,0,0,0,0,0,0,0,0,0,1
235,236,Ursaring,90,130,75,75,75,55,2,False,...,0,0,0,0,1,0,0,0,0,0


In [12]:
# Igualamos el nombre de la columna del id a ID1
df_clean.rename(columns = {'#':'ID1'}, inplace= True)
#Añadimos todos los campos del 1er pokemon a la tabla
df_combats = df_combats.merge(df_clean, on='ID1')

# Igualamos el nombre de la columna del id a ID2
df_clean.rename(columns = {'ID1':'ID2'}, inplace= True)
#Añadimos todos los campos del 1er pokemon a la tabla
df_combats = df_combats.merge(df_clean, on='ID2')

In [13]:
# Escalamos los datos
from sklearn import preprocessing

# Cambiamos true y false por ints 1 y 0 
df_combats["isFirstWinner"] = df_combats["isFirstWinner"].astype(int)

X_scaled = preprocessing.scale(df_combats.drop(["ID1","ID2","Name_x","Name_y"], axis=1))
df_scaled = pd.DataFrame(X_scaled)
df_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
0,1.059231,-0.160894,0.332786,1.288176,-0.851845,1.537570,-0.348758,1.015661,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
1,1.059231,0.234723,1.862280,-0.292351,1.194251,-0.253510,1.426713,1.015661,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
2,1.059231,-0.675197,-0.431961,-0.608457,-0.454841,-0.360975,-0.280470,-1.405288,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
3,1.059231,0.788587,1.403432,0.497912,1.285868,0.283814,1.119420,1.015661,3.352150,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
4,1.059231,-0.279580,-0.064882,-0.387183,-0.699151,-1.077407,-0.109752,1.015661,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,2.306061
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,1.059231,-0.358703,0.332786,-0.608457,0.522399,0.283814,1.426713,-1.405288,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.433640
49996,-0.944081,-1.743364,-1.349657,-0.134299,0.675093,-0.611726,-0.792626,-1.405288,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.433640
49997,-0.944081,-1.229061,-1.349657,-1.082615,-1.401542,-1.686374,-1.134062,1.620898,-0.298316,3.146108,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.433640
49998,-0.944081,-0.952129,0.179837,-0.766510,-0.546457,-0.790834,-0.109752,1.015661,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.433640


In [14]:
# Variables para entrenar el modelo 
Y = df_scaled[0] > 0 # Combates en los que el primer pokemon gana
X = pd.get_dummies(df_scaled.drop([0], axis=1)) # Resto de columnas del dataframe menos la columna 'isFirstWinner'

In [15]:
# Creamos el modelo con LogisticRegression()
logreg = LogisticRegression()
# Ajustamos el modelo con fit
logreg.fit(X, Y)
# Obtenemos las predicciones
Y_pred = logreg.predict(X)
np.mean(Y_pred == Y)

0.88698