# Pokemon Type Prediction
Given the base stats of a Pokemon, train a model using supervised learning that is able to classify a Pokemon's type by its arbitrary stats.

In [1]:
!pip install -r requirements.txt



You should consider upgrading via the 'C:\ProgramData\Anaconda3\python.exe -m pip install --upgrade pip' command.


In [2]:
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report
import pandas as pd

## Data Preprocessing
For data preprocessing, we:
1. Remove rows with invalid primary types
2. Drop unneeded columns
3. Extract the features and labels

In [3]:
pokedex = pd.read_csv('data/Pokedex_Cleaned.csv')
pokedex

Unnamed: 0,#,Name,Primary Type,Secondary Type,Total,HP,Attack,Defense,Sp.Atk,Sp.Def,Speed,Variant
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,
3,3,Venusaur,Grass,Poison,625,80,100,123,122,120,80,
4,4,Charmander,Fire,,309,39,52,43,60,50,65,
...,...,...,...,...,...,...,...,...,...,...,...,...
1168,1004,Chi-Yu,Dark,Fire,570,55,80,80,135,120,100,
1169,1005,Roaring Moon,Dragon,Dark,590,105,139,71,55,101,119,
1170,1006,Iron Valiant,Fairy,Fighting,590,74,130,90,120,60,116,
1171,1007,Koraidon,Fighting,Dragon,670,100,135,115,85,100,135,


In [4]:
primary_types = pokedex['Primary Type']
primary_types.value_counts()  # need to remove rows that aren't types

Water       147
Normal      130
Grass       102
Bug          99
Psychic      76
Electric     73
Fire         73
Rock         67
Dark         56
Ground       47
Dragon       47
Ghost        46
Fighting     46
Poison       46
Steel        42
Fairy        32
Ice          27
Flying       12
Null          2
Rockruff      1
Male          1
Female        1
Name: Primary Type, dtype: int64

In [5]:
not_types = {"Null", "Male", "Female", "Rockruff"}
pokedex = pokedex.loc[~pokedex['Primary Type'].isin(not_types)]  # remove rows with invalid types
primary_types = pokedex['Primary Type']
primary_types.value_counts()  # bad rows are gone

Water       147
Normal      130
Grass       102
Bug          99
Psychic      76
Electric     73
Fire         73
Rock         67
Dark         56
Dragon       47
Ground       47
Fighting     46
Ghost        46
Poison       46
Steel        42
Fairy        32
Ice          27
Flying       12
Name: Primary Type, dtype: int64

In [6]:
# Drop irrelevant columns
pokedex_relevant = pokedex.drop(['#', 'Name', 'Secondary Type', 'Total', 'Variant'], axis=1)
pokedex_relevant

Unnamed: 0,Primary Type,HP,Attack,Defense,Sp.Atk,Sp.Def,Speed
0,Grass,45,49,49,65,65,45
1,Grass,60,62,63,80,80,60
2,Grass,80,82,83,100,100,80
3,Grass,80,100,123,122,120,80
4,Fire,39,52,43,60,50,65
...,...,...,...,...,...,...,...
1168,Dark,55,80,80,135,120,100
1169,Dragon,105,139,71,55,101,119
1170,Fairy,74,130,90,120,60,116
1171,Fighting,100,135,115,85,100,135


In [7]:
# extract features and labels
features = pokedex_relevant.iloc[:, 1:].values
labels = pokedex_relevant.iloc[:, 0].values
features, labels

(array([[ 45,  49,  49,  65,  65,  45],
        [ 60,  62,  63,  80,  80,  60],
        [ 80,  82,  83, 100, 100,  80],
        ...,
        [ 74, 130,  90, 120,  60, 116],
        [100, 135, 115,  85, 100, 135],
        [100,  85, 100, 135, 115, 135]], dtype=int64),
 array(['Grass', 'Grass', 'Grass', ..., 'Fairy', 'Fighting', 'Electric'],
       dtype=object))

## Decision Tree Approach using Entropy

In [22]:
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.2, random_state = 42)
# Decision tree with entropy
clf_entropy = DecisionTreeClassifier(
        criterion = "entropy", random_state = 42)
# Performing training
entropy_model = clf_entropy.fit(X_train, y_train)

# Prediction using entropy
y_pred = entropy_model.predict(X_test)
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))

print ("Accuracy : ",
accuracy_score(y_test,y_pred)*100)

print("Report : ",
classification_report(y_test, y_pred))

Confusion Matrix:  [[7 2 1 1 0 0 0 0 2 1 2 0 0 1 0 0 0 1]
 [1 2 0 1 0 1 0 1 0 0 1 0 3 0 1 1 0 2]
 [1 0 2 0 0 0 0 0 0 0 0 0 0 0 2 1 0 2]
 [0 1 0 2 0 0 1 1 1 1 0 0 0 0 1 0 1 0]
 [1 1 1 1 0 0 0 0 0 0 0 1 0 1 1 1 0 1]
 [1 0 0 0 1 4 0 0 0 0 1 0 2 0 0 1 1 1]
 [2 0 0 1 0 0 2 1 0 1 1 2 0 0 2 0 0 2]
 [0 2 0 0 0 0 1 0 0 0 0 0 2 0 0 0 0 1]
 [0 0 1 1 0 0 1 0 2 2 0 0 0 0 2 1 2 0]
 [0 0 1 1 0 0 1 0 2 8 1 0 0 1 0 1 0 1]
 [0 0 2 0 0 0 2 0 0 0 0 0 2 0 0 1 0 1]
 [0 0 0 0 0 1 1 0 0 0 1 1 0 0 1 0 0 1]
 [3 0 1 1 1 1 1 0 0 3 2 1 5 2 1 2 0 3]
 [0 0 0 1 1 1 1 0 0 1 1 0 3 2 1 0 0 0]
 [0 0 0 1 1 0 0 0 0 1 0 1 0 0 4 0 0 1]
 [1 0 0 0 0 1 1 0 1 0 1 0 1 0 0 3 0 0]
 [1 0 0 0 0 0 0 0 0 0 0 0 0 0 2 0 0 6]
 [1 1 0 2 2 3 1 0 1 3 0 2 2 3 5 1 2 6]]
Accuracy :  21.367521367521366
Report :                precision    recall  f1-score   support

         Bug       0.37      0.39      0.38        18
        Dark       0.22      0.14      0.17        14
      Dragon       0.22      0.25      0.24         8
    Electric       0