In [1]:
import pandas as pd
from sklearn.model_selection import LeaveOneOut
from sklearn.linear_model import LogisticRegression
import numpy as np

df_combats = pd.read_csv("Datos/combats.csv")
df_combats = df_combats.sample(frac = 1)
df_pokemon = pd.read_csv("Datos/pokemon2.csv")
df_pokemon = df_pokemon.sample(frac = 1)

In [2]:
# Limpieza y configuración de dataframes
# Se cambia el nombre de ambos IDs y cambiamos el id del ganador por un booleano que indica si gana el primero
df_combats.rename(columns = {'First_pokemon':'ID1', 'Second_pokemon':'ID2', 'Winner': 'isFirstWinner'}, inplace = True)
df_combats.loc[df_combats['isFirstWinner'] == df_combats['ID1'], 'isFirstWinner'] = True
df_combats.loc[df_combats['isFirstWinner'] == df_combats['ID2'], 'isFirstWinner'] = False
df_combats.dropna(inplace=True)

In [3]:
# Unificamos las dos columnas de tipos y creamos una columna por cada tipo que hay
types1 = pd.get_dummies(df_pokemon["Type 1"])
types2 = pd.get_dummies(df_pokemon["Type 2"])
pd.set_option('max_rows', None)
pd.reset_option('max_rows')
types = types1 | types2

df_clean = pd.merge(df_pokemon, types, left_index=True, right_index=True)

df_clean.dropna(thresh=20,inplace=True)
df_clean.drop(["Type 1", 'Type 2'], axis = 1,inplace= True)
df_clean

Unnamed: 0,#,Name,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary,...,Ghost,Grass,Ground,Ice,Normal,Poison,Psychic,Rock,Steel,Water
462,463,Combee,30,30,42,30,42,70,4,False,...,0,0,0,0,0,0,0,0,0,0
281,282,Marshtomp,70,85,70,60,70,50,3,False,...,0,0,1,0,0,0,0,0,0,1
395,396,Snorunt,50,50,50,50,50,50,3,False,...,0,0,0,1,0,0,0,0,0,0
620,621,Scraggy,50,75,70,35,70,48,5,False,...,0,0,0,0,0,0,0,0,0,0
276,277,Torchic,45,60,40,70,50,45,3,False,...,0,0,0,0,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
396,397,Glalie,80,80,80,80,80,80,3,False,...,0,0,0,1,0,0,0,0,0,0
788,789,Bergmite,55,69,85,32,35,28,6,False,...,0,0,0,1,0,0,0,0,0,0
156,157,Articuno,90,85,100,95,125,85,1,True,...,0,0,0,1,0,0,0,0,0,0
583,584,Roggenrola,55,75,85,25,25,15,5,False,...,0,0,0,0,0,0,0,1,0,0


In [4]:
# Igualamos el nombre de la columna del id a ID1
df_clean.rename(columns = {'#':'ID1'}, inplace= True)
#Añadimos todos los campos del 1er pokemon a la tabla
df_combats = df_combats.merge(df_clean, on='ID1')

# Igualamos el nombre de la columna del id a ID2
df_clean.rename(columns = {'ID1':'ID2'}, inplace= True)
#Añadimos todos los campos del 1er pokemon a la tabla
df_combats = df_combats.merge(df_clean, on='ID2')

In [5]:
# Escalamos los datos
from sklearn import preprocessing

# Cambiamos true y false por ints 1 y 0 
df_combats["isFirstWinner"] = df_combats["isFirstWinner"].astype(int)

X_scaled = preprocessing.scale(df_combats.drop(["ID1","ID2","Name_x","Name_y"], axis=1))
df_scaled = pd.DataFrame(X_scaled)
df_scaled

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,43,44,45,46,47,48,49,50,51,52
0,-0.944081,1.579822,-0.401371,0.308249,0.247550,0.964424,-1.236493,1.015661,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,2.633486,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364
1,-0.944081,-1.149938,-1.196708,-0.608457,-1.004539,-0.074402,-0.451189,-0.194814,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,2.633486,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364
2,-0.944081,1.025958,0.913994,0.972071,0.064318,0.462922,-0.417045,0.410423,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,2.633486,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364
3,-0.944081,-0.754321,-0.799039,-0.829731,-0.607535,-0.862477,-0.143896,1.015661,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,2.633486,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364
4,1.059231,-0.556512,-0.890809,-0.924562,1.896643,0.821138,1.768150,-1.405288,-0.298316,-0.317853,...,-0.240924,-0.360406,-0.303847,-0.227526,2.633486,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
49995,-0.944081,0.788587,1.372842,0.181807,-0.546457,0.283814,-0.451189,1.015661,-0.298316,-0.317853,...,-0.240924,2.774649,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364
49996,-0.944081,-0.754321,0.179837,0.339860,-0.546457,-0.611726,-0.621907,-0.194814,-0.298316,-0.317853,...,-0.240924,2.774649,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364
49997,1.059231,-0.042209,0.179837,-0.292351,2.812805,2.254002,1.085276,-0.194814,-0.298316,-0.317853,...,-0.240924,2.774649,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364
49998,-0.944081,-0.952129,-1.043759,-1.240668,-1.615314,-1.507266,-1.646217,-0.194814,-0.298316,3.146108,...,-0.240924,2.774649,-0.303847,-0.227526,-0.379725,-0.286794,-0.356676,-0.281905,-0.257228,-0.43364


In [6]:
df_scaled = df_scaled.reset_index()
df_scaled = df_scaled.head(1000)

# Variables para entrenar el modelo 
Y = df_scaled[0] > 0 # Combates en los que el primer pokemon gana
X = pd.get_dummies(df_scaled.drop([0], axis=1)) # Resto de columnas del dataframe menos la columna 'isFirstWinner'

In [7]:
loo = LeaveOneOut()

In [8]:
regr = LogisticRegression(max_iter=1000)
sumaError = 0
for train_index, test_index in loo.split(X):
    X_train, X_test = X.loc[train_index,], X.loc[test_index,]
    y_train, y_test = Y[train_index], Y[test_index]
    regr.fit(X_train, y_train)
    predicciones = regr.predict(X_test)
    sumaError += (y_test.values.astype(int) - predicciones[0])**2

print(sumaError/1000)

[0.134]
