# Determining if Pokemon Types can be found using Multilabel Classification Based on Stats

Models used include: Binary Relevance, Classifier Chains, Label Powerset, Logistic Regression, Decision Trees, Random Forests

In [1]:
pip install scikit-multilearn

You should consider upgrading via the '/home/kevinkurianai/.pyenv/versions/3.9.10/envs/venv/bin/python3.9 -m pip install --upgrade pip' command.[0m[33m
[0mNote: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd

pok = pd.read_csv("Pokemon.csv")

In [3]:
pok

Unnamed: 0,#,Name,Type 1,Type 2,Total,HP,Attack,Defense,Sp. Atk,Sp. Def,Speed,Generation,Legendary
0,1,Bulbasaur,Grass,Poison,318,45,49,49,65,65,45,1,False
1,2,Ivysaur,Grass,Poison,405,60,62,63,80,80,60,1,False
2,3,Venusaur,Grass,Poison,525,80,82,83,100,100,80,1,False
3,3,VenusaurMega Venusaur,Grass,Poison,625,80,100,123,122,120,80,1,False
4,4,Charmander,Fire,,309,39,52,43,60,50,65,1,False
...,...,...,...,...,...,...,...,...,...,...,...,...,...
795,719,Diancie,Rock,Fairy,600,50,100,150,100,150,50,6,True
796,719,DiancieMega Diancie,Rock,Fairy,700,50,160,110,160,110,110,6,True
797,720,HoopaHoopa Confined,Psychic,Ghost,600,80,110,60,150,130,70,6,True
798,720,HoopaHoopa Unbound,Psychic,Dark,680,80,160,60,170,130,80,6,True


In [4]:
#Group the Pokemon types in each row into a list
pok_types = []

for _, row in pok.iterrows():
    if pd.isna(row["Type 2"]):
        pok_types.append([row["Type 1"]])
    else:
        pok_types.append(row[["Type 1", "Type 2"]].to_list())

In [5]:
(pok_types)

[['Grass', 'Poison'],
 ['Grass', 'Poison'],
 ['Grass', 'Poison'],
 ['Grass', 'Poison'],
 ['Fire'],
 ['Fire'],
 ['Fire', 'Flying'],
 ['Fire', 'Dragon'],
 ['Fire', 'Flying'],
 ['Water'],
 ['Water'],
 ['Water'],
 ['Water'],
 ['Bug'],
 ['Bug'],
 ['Bug', 'Flying'],
 ['Bug', 'Poison'],
 ['Bug', 'Poison'],
 ['Bug', 'Poison'],
 ['Bug', 'Poison'],
 ['Normal', 'Flying'],
 ['Normal', 'Flying'],
 ['Normal', 'Flying'],
 ['Normal', 'Flying'],
 ['Normal'],
 ['Normal'],
 ['Normal', 'Flying'],
 ['Normal', 'Flying'],
 ['Poison'],
 ['Poison'],
 ['Electric'],
 ['Electric'],
 ['Ground'],
 ['Ground'],
 ['Poison'],
 ['Poison'],
 ['Poison', 'Ground'],
 ['Poison'],
 ['Poison'],
 ['Poison', 'Ground'],
 ['Fairy'],
 ['Fairy'],
 ['Fire'],
 ['Fire'],
 ['Normal', 'Fairy'],
 ['Normal', 'Fairy'],
 ['Poison', 'Flying'],
 ['Poison', 'Flying'],
 ['Grass', 'Poison'],
 ['Grass', 'Poison'],
 ['Grass', 'Poison'],
 ['Bug', 'Grass'],
 ['Bug', 'Grass'],
 ['Bug', 'Poison'],
 ['Bug', 'Poison'],
 ['Ground'],
 ['Ground'],
 ['Normal

In [6]:
len(pok_types)

800

In [7]:
#Convert the lists into numeric values to compute
from sklearn.preprocessing import MultiLabelBinarizer

mlb = MultiLabelBinarizer()
m = mlb.fit_transform(pok_types)
mlb.classes_

array(['Bug', 'Dark', 'Dragon', 'Electric', 'Fairy', 'Fighting', 'Fire',
       'Flying', 'Ghost', 'Grass', 'Ground', 'Ice', 'Normal', 'Poison',
       'Psychic', 'Rock', 'Steel', 'Water'], dtype=object)

In [8]:
m[25]

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0])

In [9]:
stats = ["HP", "Defense", "Sp. Atk", "Sp. Def", "Speed"]

In [10]:
#Input features as X and labels (y) as the elements
import numpy as np

X = pok[stats].to_numpy().astype(int)

y = m

X, y

(array([[ 45,  49,  65,  65,  45],
        [ 60,  63,  80,  80,  60],
        [ 80,  83, 100, 100,  80],
        ...,
        [ 80,  60, 150, 130,  70],
        [ 80,  60, 170, 130,  80],
        [ 80, 120, 130,  90,  70]]),
 array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 0],
        ...,
        [0, 0, 0, ..., 0, 0, 0],
        [0, 1, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 0, 1]]))

#  Method used to Split into Train/Test Sets

This dataset is imbalanced in terms of the variation of types, so to account
for that we use iterative stratification. This splts each input into subsets
(where each label is considered individually) and then it distributes the samples starting 
with the fewest and working up to the inputs with the most labels.

In [11]:
#Split into train and test set

from skmultilearn.model_selection import iterative_train_test_split

X_train, y_train, X_test, y_test = iterative_train_test_split(X, y, test_size=0.3)

# Metrics for Multilabel Classification

Various metrics are used to see how the classifiers will perform:

**Accuracy** - very strict measure, has to get all the elements correct in order to be considered correct. If one is incorrect, it is considered an incorrect prediction.

**Precision** - ratio of how much is predicted is correct.

**Recall** - ratio of the correct predictions and the total number of correct items in the set
 
**F1 Score** - 2 * Precision * Recall/(Precision + Recall). Harmonic mean of precision and recall and used to compare the 
performance of two classifiers.



In [12]:
# Establisin a Baseline, equivalent to just guessing, no training involved
# Predicting stricly Normal type
from sklearn.metrics import (
    accuracy_score,
    f1_score,
    precision_score,
    recall_score,
)


normal_type = m[25]
pred_singletype = [normal_type for i in range(len(y_test))]


print("Accuracy(%):", accuracy_score(y_test, pred_singletype) * 100)
print("F1 Score(%):", f1_score(y_test, pred_singletype, average="macro") * 100)
print(
    "Precision Score (%):",
    precision_score(y_test, pred_singletype, average="samples") * 100,
)

Accuracy(%): 7.659574468085106
F1 Score(%): 1.2949039264828737
Precision Score (%): 13.191489361702127


# Multilabel Classification Methods
 **1)** Binary Relevance - Treats each label as a separate single class classification. It is the simpelest method and is very similar to the One vs All classification. For example, it would ask yes or no questions for each element (Is it a water type: yes or no?)
 
 **2)** Classifier Chain - This approach involves linking together binary classifiers in a chain strucutre, 
such that class label predictions become features for other classifiers

**3)** Label Powerset - Transforms this multi-label problem into a multi-class problem. Creates a new label based on the 
combinations that are present in the training dataset. For example [Grass, Poison] -> GrassPoison. If there is a combination that doesn't exist in the training exist but exists in the test set, the model won't be able to predict (e.g - Volcanion).If the number of classes increases, the number of distinct combinations can grow exponentially which can 
lead to be computationally impractical. Previous methods would often predict 0 or 3 elements or more often than 1 of 2 elements. However, the label powerset method only uses existing combinations and no combination has 0 or 2+ elements. Leading this to likely being the best claassifer.


In [13]:

from skmultilearn.problem_transform import BinaryRelevance
from sklearn.linear_model import LogisticRegression


classifier = BinaryRelevance(classifier=LogisticRegression(random_state=69))

classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)


# The higher the better
print("Accuracy(%):", accuracy_score(y_test, predictions) * 100)
print("F1 Score(%):", f1_score(y_test, predictions, average="micro") * 100)

Accuracy(%): 1.276595744680851
F1 Score(%): 3.5805626598465476


In [14]:

from skmultilearn.problem_transform import ClassifierChain
from sklearn.linear_model import LogisticRegression

classifier = ClassifierChain(LogisticRegression(random_state=69, max_iter=400))
classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)

print("Accuracy(%):", accuracy_score(y_test, predictions) * 100)
print("F1 Score(%):", f1_score(y_test, predictions, average="micro") * 100)

Accuracy(%): 8.936170212765958
F1 Score(%): 16.97792869269949


In [15]:

from skmultilearn.problem_transform import ClassifierChain
from sklearn.tree import DecisionTreeClassifier

classifier = ClassifierChain(DecisionTreeClassifier(max_depth=8))
classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)

print("Accuracy(%):", accuracy_score(y_test, predictions) * 100)
print("F1 Score(%):", f1_score(y_test, predictions, average="micro") * 100)

Accuracy(%): 5.106382978723404
F1 Score(%): 20.632279534109816


In [16]:

from skmultilearn.problem_transform import ClassifierChain
from sklearn.ensemble import RandomForestClassifier

classifier = ClassifierChain(RandomForestClassifier(max_depth=28))
classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)

print("Accuracy(%):", accuracy_score(y_test, predictions) * 100)
print("F1 Score(%):", f1_score(y_test, predictions, average="micro") * 100)

Accuracy(%): 5.957446808510639
F1 Score(%): 16.595744680851066


In [17]:


from skmultilearn.problem_transform import LabelPowerset

classifier = LabelPowerset(
    LogisticRegression(solver="saga", random_state=69, max_iter=4000)
)

classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)


print("Accuracy(%):", accuracy_score(y_test, predictions) * 100)

print(
    "Precision Score (%):",
    precision_score(y_test, predictions, average="samples") * 100,
)
print("Recall Score (%): ", recall_score(y_test, predictions, average="samples") * 100)

print("F1 Score(%):", f1_score(y_test, predictions, average="micro") * 100)


Accuracy(%): 13.191489361702127
Precision Score (%): 24.46808510638298
Recall Score (%):  22.340425531914892
F1 Score(%): 22.4887556221889


In [18]:
from skmultilearn.problem_transform import LabelPowerset

classifier = LabelPowerset(DecisionTreeClassifier())

classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)


print("Accuracy(%):", accuracy_score(y_test, predictions) * 100)

print(
    "Precision Score (%):",
    precision_score(y_test, predictions, average="samples") * 100,
)
print("Recall Score (%): ", recall_score(y_test, predictions, average="samples") * 100)

print("F1 Score(%):", f1_score(y_test, predictions, average="micro") * 100)


Accuracy(%): 8.936170212765958
Precision Score (%): 19.574468085106382
Recall Score (%):  17.659574468085108
F1 Score(%): 18.95332390381895


In [19]:
from skmultilearn.problem_transform import LabelPowerset

classifier = LabelPowerset(RandomForestClassifier(max_depth=10))

classifier.fit(X_train, y_train)

predictions = classifier.predict(X_test)


print("Accuracy(%):", accuracy_score(y_test, predictions) * 100)


print(
    "Precision Score (%):",
    precision_score(y_test, predictions, average="samples") * 100,
)

print("Recall Score (%): ", recall_score(y_test, predictions, average="samples") * 100)
print("F1 Score(%):", f1_score(y_test, predictions, average="micro") * 100)

Accuracy(%): 16.170212765957448
Precision Score (%): 29.574468085106382
Recall Score (%):  25.53191489361702
F1 Score(%): 26.76691729323309


# Conclusion

Label Powerset did end up being the best classifier with an F1 Score around 25% and an accuracy of around 15%, however, this is not nearly enough to show any evidence that a pokemon's elements can be determined strictly from its stats.