<a href="https://colab.research.google.com/github/Sabarinathan-369/Predicting-Pokemon-Legendary-Status/blob/main/TensforFlow.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**Import Libraries**

In [15]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder
from sklearn.metrics import *

**Load & Preprocess Data**

In [3]:
df=pd.read_csv("/content/all_pokemon_data.csv")
df.head()

Unnamed: 0,Name,National Dex #,Primary Typing,Secondary Typing,Secondary Typing Flag,Generation,Legendary Status,Form,Alt Form Flag,Evolution Stage,Number of Evolution,Color ID,Catch Rate,Height (dm),Weight (hg),Height (in),Weight (lbs),Base Stat Total,Health,Attack,Defense,Special Attack,Special Defense,Speed
0,bulbasaur,1,grass,poison,True,generation-i,False,Base,False,1,3,green,45,7,69,28,15,318,45,49,49,65,65,45
1,ivysaur,2,grass,poison,True,generation-i,False,Base,False,2,3,green,45,10,130,39,29,405,60,62,63,80,80,60
2,venusaur,3,grass,poison,True,generation-i,False,Base,False,3,3,green,45,20,1000,79,220,525,80,82,83,100,100,80
3,venusaur-mega,3,grass,poison,True,generation-i,True,Mega,True,3,3,green,45,24,1555,94,343,625,80,100,123,122,120,80
4,charmander,4,fire,,False,generation-i,False,Base,False,1,3,red,45,6,85,24,19,309,39,52,43,60,50,65


In [5]:
df.dtypes

Unnamed: 0,0
Name,object
National Dex #,int64
Primary Typing,object
Secondary Typing,object
Secondary Typing Flag,bool
Generation,object
Legendary Status,bool
Form,object
Alt Form Flag,bool
Evolution Stage,int64


In [6]:
df.isnull().sum()

Unnamed: 0,0
Name,0
National Dex #,0
Primary Typing,0
Secondary Typing,530
Secondary Typing Flag,0
Generation,0
Legendary Status,0
Form,0
Alt Form Flag,0
Evolution Stage,0


seems like `secondary typing` has 530 Nan

In [8]:
df['Secondary Typing']=df['Secondary Typing'].fillna('None')
df['Legendary Status']=df['Legendary Status'].map({False:0,True:1})
df = pd.get_dummies(df, columns=['Primary Typing', 'Secondary Typing', 'Generation', 'Color ID'])

In [10]:
num=['Health','Attack','Defense','Special Attack','Special Defense','Speed']
scaler=StandardScaler()
df[num]=scaler.fit_transform(df[num])

# drop irrelevant columns
df=df.drop(columns=['Name','National Dex #','Form','Alt Form Flag','Evolution Stage'])

**Split the data into train/test**

In [11]:
x=df.drop(columns=['Legendary Status'])
y=df['Legendary Status']
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,stratify=y,random_state=42)


**Build Tensorflow model**

In [12]:
model=tf.keras.Sequential([
    tf.keras.layers.Dense(128,activation='relu',input_shape=(x_train.shape[1],)),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(1,activation='sigmoid')
])
model.compile(optimizer='adam',loss='binary_crossentropy',metrics=['accuracy'])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


**Train the model**

In [13]:
class_weights={0:1.,1:len(y_train[y_train==0])/len(y_train[y_train==1])}
history=model.fit(
    x_train,y_train,epochs=20,batch_size=32,validation_split=0.2,class_weight=class_weights
)

Epoch 1/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 15ms/step - accuracy: 0.3787 - loss: 41.7874 - val_accuracy: 0.6421 - val_loss: 6.9677
Epoch 2/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5752 - loss: 16.3107 - val_accuracy: 0.6158 - val_loss: 10.9879
Epoch 3/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.5808 - loss: 16.8620 - val_accuracy: 0.7053 - val_loss: 3.7684
Epoch 4/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6789 - loss: 10.3741 - val_accuracy: 0.8579 - val_loss: 2.0613
Epoch 5/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 5ms/step - accuracy: 0.6409 - loss: 9.7403 - val_accuracy: 0.8684 - val_loss: 1.7446
Epoch 6/20
[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - accuracy: 0.6786 - loss: 7.5458 - val_accuracy: 0.8211 - val_loss: 4.7269
Epoch 7/20
[1m24/24[0m [32m━━━━

**Evaluate performace**

In [16]:
y_pred=(model.predict(x_test)>0.5).astype('int32')

print(classification_report(y_test,y_pred))
print(confusion_matrix(y_test,y_pred))

[1m8/8[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 6ms/step 
              precision    recall  f1-score   support

           0       0.95      0.79      0.86       197
           1       0.42      0.78      0.55        40

    accuracy                           0.78       237
   macro avg       0.68      0.78      0.70       237
weighted avg       0.86      0.78      0.81       237

[[155  42]
 [  9  31]]


**Model Improvement**

In [18]:
# add more layers/neurons
model=tf.keras.Sequential([
    tf.keras.layers.Dense(256,activation='relu',input_shape=(x_train.shape[1],)),
    tf.keras.layers.Dropout(0.5),
    tf.keras.layers.Dense(128,activation='relu'),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(64,activation='relu'),
    tf.keras.layers.Dense(1,activation='sigmoid')
])
model.compile(
    optimizer=tf.keras.optimizers.Adam(learning_rate=0.001),
    loss='binary_crossentropy',
    metrics=['accuracy',tf.keras.metrics.Precision(),tf.keras.metrics.Recall()]
)

In [20]:
# handle class imbalance
from imblearn.over_sampling import SMOTE
smote=SMOTE(random_state=42)
x_train_resampled,y_train_resampled=smote.fit_resample(x_train,y_train)
history=model.fit(
    x_train_resampled,y_train_resampled,epochs=30,batch_size=32,
    validation_split=0.2)

Epoch 1/30
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 26ms/step - accuracy: 0.6200 - loss: 7.2628 - precision: 0.4823 - recall: 0.6030 - val_accuracy: 0.2911 - val_loss: 3.0721 - val_precision: 1.0000 - val_recall: 0.2911
Epoch 2/30
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 15ms/step - accuracy: 0.6262 - loss: 5.0459 - precision: 0.4886 - recall: 0.5557 - val_accuracy: 0.0000e+00 - val_loss: 6.5288 - val_precision: 0.0000e+00 - val_recall: 0.0000e+00
Epoch 3/30
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 12ms/step - accuracy: 0.6774 - loss: 3.5848 - precision: 0.5674 - recall: 0.6227 - val_accuracy: 0.4462 - val_loss: 1.4470 - val_precision: 1.0000 - val_recall: 0.4462
Epoch 4/30
[1m40/40[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.6687 - loss: 3.3653 - precision: 0.5590 - recall: 0.5532 - val_accuracy: 0.2373 - val_loss: 1.9945 - val_precision: 1.0000 - val_recall: 0.2373
Epoch 5/30
[1m4

In [26]:
# HyperParameter tuning

import keras_tuner as kt

def build_model(hp):
    model = tf.keras.Sequential()
    model.add(tf.keras.layers.Dense(
        units=hp.Int('units1', min_value=64, max_value=256, step=64),
        activation='relu',
        input_shape=(x_train.shape[1],)
    ))
    model.add(tf.keras.layers.Dropout(
        rate=hp.Float('dropout1', min_value=0.2, max_value=0.5, step=0.1)
    ))
    model.add(tf.keras.layers.Dense(
        units=hp.Int('units2', min_value=32, max_value=128, step=32),
        activation='relu'
    ))
    model.add(tf.keras.layers.Dense(1, activation='sigmoid'))

    model.compile(
        optimizer=tf.keras.optimizers.Adam(
            hp.Choice('learning_rate', [1e-2, 1e-3, 1e-4])
        ),
        loss='binary_crossentropy',
        metrics=['accuracy']
    )
    return model

tuner = kt.RandomSearch(
    build_model,
    objective='val_accuracy',
    max_trials=10,
    executions_per_trial=2,
    directory='keras_tuner',
    project_name='pokemon_legendary'
)

tuner.search(x_train, y_train, epochs=20, validation_split=0.2)
best_model = tuner.get_best_models(num_models=1)[0]

Trial 10 Complete [00h 00m 16s]
val_accuracy: 0.9105263352394104

Best val_accuracy So Far: 0.9105263352394104
Total elapsed time: 00h 02m 45s


  super().__init__(activity_regularizer=activity_regularizer, **kwargs)
  saveable.load_own_variables(weights_store.get(inner_path))
