# Pokemon Type Prediction
Given the base stats, names, and images of a Pokemon, train a model using supervised learning that is able to classify a Pokemon's type.

In [1]:
!pip install -r requirements.txt



You should consider upgrading via the 'C:\ProgramData\Anaconda3\python.exe -m pip install --upgrade pip' command.


In [2]:
# Model helpers and scorers
from sklearn.metrics import confusion_matrix
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

# Actual models -- add to these
from sklearn.tree import DecisionTreeClassifier

import pandas as pd

## Data Preprocessing
For data preprocessing, we:
1. Remove rows with invalid primary types
2. Drop unneeded columns
3. Extract the features and labels
4. Make train-test splits

In [3]:
pokedex = pd.read_csv('data/Pokedex_Cleaned.csv')
pokedex

Unnamed: 0,#,Name,Primary Type,Secondary Type,Total,HP,Attack,Defense,Sp.Atk,Sp.Def,Speed,Variant
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,
3,3,Venusaur,Grass,Poison,625,80,100,123,122,120,80,
4,4,Charmander,Fire,,309,39,52,43,60,50,65,
...,...,...,...,...,...,...,...,...,...,...,...,...
1168,1004,Chi-Yu,Dark,Fire,570,55,80,80,135,120,100,
1169,1005,Roaring Moon,Dragon,Dark,590,105,139,71,55,101,119,
1170,1006,Iron Valiant,Fairy,Fighting,590,74,130,90,120,60,116,
1171,1007,Koraidon,Fighting,Dragon,670,100,135,115,85,100,135,


In [4]:
primary_types = pokedex['Primary Type']
primary_types.value_counts()  # need to remove rows that aren't types

Water       147
Normal      130
Grass       102
Bug          99
Psychic      76
Electric     73
Fire         73
Rock         67
Dark         56
Ground       47
Dragon       47
Poison       46
Ghost        46
Fighting     46
Steel        42
Fairy        32
Ice          27
Flying       12
Null          2
Rockruff      1
Male          1
Female        1
Name: Primary Type, dtype: int64

In [5]:
not_types = {"Null", "Male", "Female", "Rockruff"}
pokedex = pokedex.loc[~pokedex['Primary Type'].isin(not_types)]  # remove rows with invalid types
primary_types = pokedex['Primary Type']
primary_types.value_counts()  # bad rows are gone

Water       147
Normal      130
Grass       102
Bug          99
Psychic      76
Electric     73
Fire         73
Rock         67
Dark         56
Dragon       47
Ground       47
Ghost        46
Poison       46
Fighting     46
Steel        42
Fairy        32
Ice          27
Flying       12
Name: Primary Type, dtype: int64

In [6]:
# Drop irrelevant rows, columns, reorder name
pokemon_names = pokedex['Name'].copy()
pokedex_relevant = pokedex.drop(['#', 'Name', 'Secondary Type', 'Total', 'Variant'], axis=1)
pokedex_relevant['Name'] = pokemon_names
pokedex_relevant

Unnamed: 0,Primary Type,HP,Attack,Defense,Sp.Atk,Sp.Def,Speed,Name
0,Grass,45,49,49,65,65,45,Bulbasaur
1,Grass,60,62,63,80,80,60,Ivysaur
2,Grass,80,82,83,100,100,80,Venusaur
3,Grass,80,100,123,122,120,80,Venusaur
4,Fire,39,52,43,60,50,65,Charmander
...,...,...,...,...,...,...,...,...
1168,Dark,55,80,80,135,120,100,Chi-Yu
1169,Dragon,105,139,71,55,101,119,Roaring Moon
1170,Fairy,74,130,90,120,60,116,Iron Valiant
1171,Fighting,100,135,115,85,100,135,Koraidon


In [7]:
pokedex_relevant.Name.tolist()

['Bulbasaur',
 'Ivysaur',
 'Venusaur',
 'Venusaur',
 'Charmander',
 'Charmeleon',
 'Charizard',
 'Charizard',
 'Charizard',
 'Squirtle',
 'Wartortle',
 'Blastoise',
 'Blastoise',
 'Caterpie',
 'Metapod',
 'Butterfree',
 'Weedle',
 'Kakuna',
 'Beedrill',
 'Beedrill',
 'Pidgey',
 'Pidgeotto',
 'Pidgeot',
 'Pidgeot',
 'Rattata',
 'Rattata',
 'Raticate',
 'Raticate',
 'Spearow',
 'Fearow',
 'Ekans',
 'Arbok',
 'Pikachu',
 'Pikachu',
 'Raichu',
 'Raichu',
 'Sandshrew',
 'Sandshrew',
 'Sandslash',
 'Sandslash',
 'Nidoran?',
 'Nidorina',
 'Nidoqueen',
 'Nidoran?',
 'Nidorino',
 'Nidoking',
 'Clefairy',
 'Clefable',
 'Vulpix',
 'Vulpix',
 'Ninetales',
 'Ninetales',
 'Jigglypuff',
 'Wigglytuff',
 'Zubat',
 'Golbat',
 'Oddish',
 'Gloom',
 'Vileplume',
 'Paras',
 'Parasect',
 'Venonat',
 'Venomoth',
 'Diglett',
 'Dugtrio',
 'Meowth',
 'Persian',
 'Persian',
 'Psyduck',
 'Golduck',
 'Mankey',
 'Primeape',
 'Growlithe',
 'Growlithe',
 'Arcanine',
 'Arcanine',
 'Poliwag',
 'Poliwhirl',
 'Poliwrath',

In [8]:
# extract features and labels
features = pokedex_relevant.iloc[:, 1:-1].values.astype(float)
# normalize numbers
for feature in features:
    s = sum(feature)
    feature /= s
labels = pokedex_relevant.iloc[:, 0].values
features, labels

(array([[0.14150943, 0.15408805, 0.15408805, 0.20440252, 0.20440252,
         0.14150943],
        [0.14814815, 0.15308642, 0.15555556, 0.19753086, 0.19753086,
         0.14814815],
        [0.15238095, 0.15619048, 0.15809524, 0.19047619, 0.19047619,
         0.15238095],
        ...,
        [0.12542373, 0.22033898, 0.15254237, 0.20338983, 0.10169492,
         0.19661017],
        [0.14925373, 0.20149254, 0.17164179, 0.12686567, 0.14925373,
         0.20149254],
        [0.14925373, 0.12686567, 0.14925373, 0.20149254, 0.17164179,
         0.20149254]]),
 array(['Grass', 'Grass', 'Grass', ..., 'Fairy', 'Fighting', 'Electric'],
       dtype=object))

In [9]:
# train test splits -- stats
# Stratify on labels to ensure each class has equal proportions
X_train, X_test, y_train, y_test = train_test_split(features, labels, test_size = 0.2, stratify=labels)  # random_state=42

In [10]:
# train test splits -- names
# Stratify on labels to ensure each class has equal proportions
features = pokedex_relevant.iloc[:, -1].values
X_train_name, X_test_name, y_train_name, y_test_name = train_test_split(features, labels, test_size = 0.2, stratify=labels)  # random_state=42

## Decision Tree Approach using Entropy

In [11]:
# Decision tree with entropy
clf_entropy = DecisionTreeClassifier(
        criterion = "entropy")
# Performing training
entropy_model = clf_entropy.fit(X_train, y_train)

# Prediction using entropy
y_pred = entropy_model.predict(X_test)
print("Confusion Matrix: ",
confusion_matrix(y_test, y_pred))

print ("Accuracy : ",
accuracy_score(y_test,y_pred)*100)

print("Report : ",
classification_report(y_test, y_pred))

Confusion Matrix:  [[ 6  0  0  3  0  0  0  0  0  1  0  0  2  1  0  1  3  3]
 [ 2  1  0  2  0  1  1  1  0  0  1  0  0  0  0  1  0  1]
 [ 0  1  0  0  0  0  0  0  0  1  1  0  2  1  1  0  2  0]
 [ 3  1  1  5  0  0  1  1  0  2  0  0  0  1  0  0  0  0]
 [ 0  0  0  0  2  0  0  0  0  1  0  0  1  0  1  0  0  1]
 [ 1  1  0  0  0  0  1  0  0  0  1  1  2  0  1  0  1  0]
 [ 0  2  1  0  0  0  3  0  0  3  1  0  1  0  1  0  0  3]
 [ 0  0  0  1  0  1  0  0  0  0  0  0  0  0  0  0  0  0]
 [ 0  0  0  2  0  0  0  0  4  0  0  0  1  0  0  0  0  2]
 [ 4  1  1  0  0  1  3  0  0  6  1  0  0  0  1  0  1  2]
 [ 1  1  0  0  0  1  0  0  0  1  0  0  2  0  0  2  1  1]
 [ 0  0  0  1  0  0  3  0  0  0  0  0  0  0  0  1  0  0]
 [ 2  1  1  0  0  1  1  1  2  0  0  1 11  1  1  2  1  0]
 [ 0  0  0  3  0  0  0  0  0  0  2  0  0  3  0  0  0  1]
 [ 1  1  0  1  1  0  1  0  1  1  0  0  3  0  2  1  0  2]
 [ 1  0  0  0  0  0  1  1  0  1  3  0  0  0  0  4  2  1]
 [ 0  0  0  0  0  0  0  0  0  0  0  0  0  0  0  2  5  1]
 [ 0  2  1  