# Predict Pokemon battles using Machine Learning

Import libraries

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.ensemble import RandomForestClassifier

Import Datasets

In [None]:
pokemon_df = pd.read_csv("pokemon.csv")
combats_df = pd.read_csv("combats.csv")
tests_df = pd.read_csv("tests.csv")
combined_df = pokemon_df

# __`pokemon_df`__

In [None]:
pokemon_df.head()

#### Print shape of pokemon_df

In [None]:
pokemon_df.shape

# __`combats_df`__

In [None]:
combats_df.head()

#### Print shape of combats_df

In [None]:
combats_df.shape

In [7]:
combined_df.head()

In [8]:
combats_df.head()

Unnamed: 0,First_pokemon,Second_pokemon,Winner
0,266,298,298
1,702,701,701
2,191,668,668
3,237,683,683
4,151,231,151


In [9]:
combats_df['First_pokemon'].head()

0    266
1    702
2    191
3    237
4    151
Name: First_pokemon, dtype: int64

In [10]:
combats_df['First_pokemon'][0]

266

In [11]:
combined_df[combined_df['#'] == 1]['Name']


0    Bulbasaur
Name: Name, dtype: object

In [12]:
combined_df[combined_df['#'] == combats_df['First_pokemon'][0]]['Name']


265    Larvitar
Name: Name, dtype: object

In [13]:
print(combats_df['First_pokemon'].head())
print(combats_df['Second_pokemon'].head())
print(combats_df['Winner'].head())
lost_to_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] != x) ]
win_to_df = lambda x: combats[ (combats["First_pokemon"] == x) & (combats["Winner"] == x) ]
print(lost_to_df)
print(win_to_df)

0    266
1    702
2    191
3    237
4    151
Name: First_pokemon, dtype: int64
0    298
1    701
2    668
3    683
4    231
Name: Second_pokemon, dtype: int64
0    298
1    701
2    668
3    683
4    151
Name: Winner, dtype: int64
<function <lambda> at 0x10f0de8c8>
<function <lambda> at 0x10f0de730>


In [14]:
features = ['#', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed']
combined_df = combined_df[features]
combined_df.head()



Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed
0,1,Grass,Poison,45,49,49,65,65,45
1,2,Grass,Poison,60,62,63,80,80,60
2,3,Grass,Poison,80,82,83,100,100,80
3,4,Grass,Poison,80,100,123,122,120,80
4,5,Fire,,39,52,43,60,50,65


In [15]:
combats_df = combats_df.rename(index= str, columns= {'First_pokemon': '#', 'Second_pokemon': 'Fought'})
combats_df.head()
print(type(combats_df['#'][0]))

<class 'numpy.int64'>


In [16]:
combined_df = pd.merge(combined_df[features], combats_df, on = '#', how = 'left')
print(combined_df.shape)

combined_df = combined_df.where((pd.notnull(combined_df)), -1)
fought_to_int = combined_df['Fought'].map(lambda x: np.int64(x))
winner_to_int = combined_df['Winner'].map(lambda x: np.int64(x))
type_to_none = combined_df['Type 2'].map(lambda x: None if x == -1 else x)


#print(combined_df.head())
combined_df['Fought'] = fought_to_int
combined_df['Winner'] = winner_to_int
combined_df['Type 2'] = type_to_none

print(combined_df.shape)
combined_df.head()

(50016, 11)
(50016, 11)


Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Fought,Winner
0,1,Grass,Poison,45,49,49,65,65,45,679,679
1,1,Grass,Poison,45,49,49,65,65,45,687,687
2,1,Grass,Poison,45,49,49,65,65,45,557,557
3,1,Grass,Poison,45,49,49,65,65,45,766,766
4,1,Grass,Poison,45,49,49,65,65,45,153,153


In [17]:
final_features = ['#', 'Type 1', 'Type 2', 'HP', 'Attack', 'Defense', 'Sp. Atk', 'Sp. Def', 'Speed', 'Fought']
y = combined_df['Winner']
x = combined_df[final_features]


print(x.head())
print(y.head())


   # Type 1  Type 2  HP  Attack  Defense  Sp. Atk  Sp. Def  Speed  Fought
0  1  Grass  Poison  45      49       49       65       65     45     679
1  1  Grass  Poison  45      49       49       65       65     45     687
2  1  Grass  Poison  45      49       49       65       65     45     557
3  1  Grass  Poison  45      49       49       65       65     45     766
4  1  Grass  Poison  45      49       49       65       65     45     153
0    679
1    687
2    557
3    766
4    153
Name: Winner, dtype: int64


In [18]:
x_ohe = pd.get_dummies(x, columns = ['#', 'Type 1', 'Type 2', 'Fought'])
print(x_ohe.head())
x_ohe.shape

   HP  Attack  Defense  Sp. Atk  Sp. Def  Speed  #_1  #_2  #_3  #_4  \
0  45      49       49       65       65     45    1    0    0    0   
1  45      49       49       65       65     45    1    0    0    0   
2  45      49       49       65       65     45    1    0    0    0   
3  45      49       49       65       65     45    1    0    0    0   
4  45      49       49       65       65     45    1    0    0    0   

      ...      Fought_791  Fought_792  Fought_793  Fought_794  Fought_795  \
0     ...               0           0           0           0           0   
1     ...               0           0           0           0           0   
2     ...               0           0           0           0           0   
3     ...               0           0           0           0           0   
4     ...               0           0           0           0           0   

   Fought_796  Fought_797  Fought_798  Fought_799  Fought_800  
0           0           0           0         

(50016, 1627)

In [19]:
x_ohe_train, x_ohe_test, y_train, y_test = train_test_split(x_ohe, y, test_size = 0.3)


In [None]:
knn = KNeighborsClassifier(n_neighbors = 5, n_jobs = -1)

knn.fit(x_ohe_train, y_train)
y_predict = knn.predict(x_ohe_test)
accuracy = accuracy_score(y_test, y_predict)
print("Knn: ", accuracy)


In [None]:
decisiontree = DecisionTreeClassifier()
decisiontree.fit(x_ohe_train,y_train)
y_predict = decisiontree.predict(x_ohe_test)
accuracy = accuracy_score(y_test, y_predict)
print("Decision Tree: ", accuracy)

In [None]:
random_forest = RandomForestClassifier(n_jobs = -1)
random_forest.fit(x_ohe_train, y_train)
y_predict = random_forest.predict(x_ohe_test)
accuracy = accuracy_score(y_test, y_predict)
print("Random Forest: ", accuracy)

In [None]:
logreg = LogisticRegression()
logreg.fit(x_ohe_train,y_train)
y_predict = logreg.predict(x_ohe_test)
accuracy = accuracy_score(y_test, y_predict)
print("Log Reg: ", accuracy)


### Using test.csv

In [20]:
tests_df = tests_df.rename(index= str, columns= {'First_pokemon': '#', 'Second_pokemon': 'Fought'})
tests_df.head()


Unnamed: 0,#,Fought
0,129,117
1,660,211
2,706,115
3,195,618
4,27,656


In [21]:
combined_test_df = pd.merge(combined_df[features], tests_df, on = '#', how = 'left')
combined_test_df.head()

Unnamed: 0,#,Type 1,Type 2,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Fought
0,1,Grass,Poison,45,49,49,65,65,45,343
1,1,Grass,Poison,45,49,49,65,65,45,133
2,1,Grass,Poison,45,49,49,65,65,45,646
3,1,Grass,Poison,45,49,49,65,65,45,696
4,1,Grass,Poison,45,49,49,65,65,45,480


In [22]:
combined_test_x = pd.get_dummies(x, columns = ['#', 'Type 1', 'Type 2', 'Fought'])
combined_test_x.head()


Unnamed: 0,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,#_1,#_2,#_3,#_4,...,Fought_791,Fought_792,Fought_793,Fought_794,Fought_795,Fought_796,Fought_797,Fought_798,Fought_799,Fought_800
0,45,49,49,65,65,45,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
1,45,49,49,65,65,45,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,45,49,49,65,65,45,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
3,45,49,49,65,65,45,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,45,49,49,65,65,45,1,0,0,0,...,0,0,0,0,0,0,0,0,0,0


In [None]:
knn = KNeighborsClassifier(n_neighbors = 5, n_jobs = -1)
knn.fit(x_ohe_train, y_train)
y_predict = knn.predict(combined_test_x)
accuracy = accuracy_score(y_test, y_predict)
print("Knn: ", accuracy)