# **Pokemon Classifier Using ResNet50**

---



In [4]:
# the images are in my google drive account, so we need to import drive

from google.colab import drive

In [5]:
# making the connection to Google drive

drive.mount('/content/gdrive')

Mounted at /content/gdrive


## ***Importing Libraries***

---



In [6]:
# Import the operating system module from python as we will work with folders and files
import os

# # import the image module in order to work with images easier
from keras.preprocessing import image
from keras.utils import np_utils

# the ResNet model has been trained with 1000 different labels
# import the ResNet model from Keras applications
# import the preprocess_input to transform our test images to the input that the ResNet50 model wants
# import decode_predictions to predict an actual output(label) from the model values


# we also import from kersas optimizers the adam optimizer
# we import all layers from keras layers 

from tensorflow.keras.applications.resnet50 import ResNet50
from keras.applications.resnet50 import preprocess_input
from tensorflow.keras.optimizers import Adam
from tensorflow.python.keras.layers import *
from tensorflow.python.keras.models import Model
import numpy as np
print("Imported Successfully!")


Imported Successfully!


In [7]:
# We can see the folders inside the train dataset
# there are 10 folders, each one containing a different type of Pokemons

# with the listdir function from the os module we can see the folders/directories
folders = os.listdir('/content/gdrive/My Drive/Train')

print(folders)

['Meowth', 'Pikachu', 'Charmander', 'Psyduck', 'Fearow', 'Bulbasaur', 'Aerodactyl', 'Spearow', 'Squirtle', 'Dratini']


## ***Preprocessing the data***

---



In [8]:
# we create a new list which will contain the pixels of all the images in the dataset
# in the image data list, on each list we will have an array of pixels, and in the labels list
# we will have, on the corresponding index the label of the array (image)
# the count variable will give us the label, as we will assign a label to each folder in the 
# training data, and we will increment count after each folder
# we will loop over each folder and for each folder we will loop over each image
# we will tranform each image to an array of pixels

image_data = []
labels = []
count = 0

for ix in folders:
    path = os.path.join("/content/gdrive/My Drive/Train", ix)

    print(path, count) 
    for im in os.listdir(path):
        try:
            img = image.load_img(os.path.join(path,im), target_size = (224,224))
            img_array = image.img_to_array(img)
            image_data.append(img_array)
            labels.append(count)
        except:
            pass
    count += 1

/content/gdrive/My Drive/Train/Meowth 0
/content/gdrive/My Drive/Train/Pikachu 1


  "Palette images with Transparency expressed in bytes should be "


/content/gdrive/My Drive/Train/Charmander 2
/content/gdrive/My Drive/Train/Psyduck 3
/content/gdrive/My Drive/Train/Fearow 4
/content/gdrive/My Drive/Train/Bulbasaur 5
/content/gdrive/My Drive/Train/Aerodactyl 6
/content/gdrive/My Drive/Train/Spearow 7
/content/gdrive/My Drive/Train/Squirtle 8
/content/gdrive/My Drive/Train/Dratini 9


In [9]:
# we wil shuffle the data so imgaes from each class won't be processed one after another
# and achieve more generalization
# we zip the image_data and labels so we have a tuple consisting of image_data[i] and labels[i]
# for each i
# we shuffle randomly the data
# then we unzip the data in two columns: array of pixels and corresponding label

import random

combined_dataset = list(zip(image_data, labels))
random.shuffle(combined_dataset)
image_data[:], labels[:] = zip(*combined_dataset)

***One hot encoding***

---



In [10]:
# we tranform the Y_train list to a matrix using one-hot encoding
# eg. 5 => 0 0 0 0 0 1 0 0 0 0 


X_train = np.array(image_data)
Y_train = np.array(labels)

Y_train = np_utils.to_categorical(Y_train)

## ***Model***

---



In [11]:
# we will use the model ResNet50 but without the classification layer
# we will use only the feature extracture
# for that reason we set inclu top to False
# we will use the weights trained for the imagenet dataset
# we have to specify the input shape if include_top is equal to false
# 3 specifies that we have a colored image

model = ResNet50(include_top = False, weights = 'imagenet', input_shape = (224,224,3))

#print(model.summary())

Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [12]:
# because after the convolution layers the output has the shape 7 x 7 x 2048 (aprox 100.000 params)
# we will do an average pooling the get fewer parameters, so fewer neurons

# the first layer av1 is the global average pooling model, that is right after the initial model's output
# the second layer fc1 is a dense layer
# the third layer is a droput layer
# and the final layer is a dense layer which will give us the class an image belongs to

#we specified the input of all these layers because these layers are not connected,
#and we want to connect our classifier to the pre-trained model.
#In this case, need to specify the inputs to the layers.

av1 = GlobalAveragePooling2D()(model.output)

fc1 = Dense(256, activation = 'relu')(av1)

d1 = Dropout(0.5)(fc1)

fc2 = Dense(10, activation = 'softmax')(d1)


In [13]:
# we will now connect the resnet model with our model
# we will call it model new 
# the input to this layer will be the initial model input
# and the output will be the our last defined model, fc2

model_new = Model(inputs = model.input, outputs = fc2)
model_new.summary()

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
______________________________________________________________________________________________

## ***Predict an image class***

---



In [14]:
# now let's predict the class for a random image
# we will preprocess the input to be cmpatible with the resnet50 model


image_path = '/content/gdrive/My Drive/pikachu.jpg'
img = image.load_img(image_path,target_size = (224,224))
x = image.img_to_array(img)
x = np.expand_dims(x,axis=0)
x = preprocess_input(x)


# we will predict the class with the predict function
# we will return the class with the highest probability

pred = model_new.predict(x)
print(np.argmax(pred))

5


We see the the class is incorrect, bacause we have not compiled the model yet

## ***Compile***

---



In [15]:
# we compile the model using the adam optimizer with a very small learning rate

adam = Adam(lr = 0.00003)

model_new.compile(loss = 'categorical_crossentropy', optimizer = adam, metrics = ['accuracy'])

**Set some layers to be trainable**

In [16]:
for ix in range(len(model_new.layers)):
    print(ix, model_new.layers[ix])

0 <tensorflow.python.keras.engine.input_layer.InputLayer object at 0x7f116e29a8d0>
1 <tensorflow.python.keras.layers.convolutional.ZeroPadding2D object at 0x7f116e3b4690>
2 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f116e27db90>
3 <tensorflow.python.keras.layers.normalization_v2.BatchNormalization object at 0x7f116ec1bdd0>
4 <tensorflow.python.keras.layers.core.Activation object at 0x7f116ec1bbd0>
5 <tensorflow.python.keras.layers.convolutional.ZeroPadding2D object at 0x7f1175416750>
6 <tensorflow.python.keras.layers.pooling.MaxPooling2D object at 0x7f116e2ea410>
7 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f11aa1104d0>
8 <tensorflow.python.keras.layers.normalization_v2.BatchNormalization object at 0x7f116e28ec50>
9 <tensorflow.python.keras.layers.core.Activation object at 0x7f116ec56110>
10 <tensorflow.python.keras.layers.convolutional.Conv2D object at 0x7f116a144e50>
11 <tensorflow.python.keras.layers.normalization_v2.BatchNormalization o

In [17]:
# we freeze the weights for the first 169 layers

for ix in range(169):
    model_new.layers[ix].trainable = False
print(model_new.summary())

Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_1 (InputLayer)            [(None, 224, 224, 3) 0                                            
__________________________________________________________________________________________________
conv1_pad (ZeroPadding2D)       (None, 230, 230, 3)  0           input_1[0][0]                    
__________________________________________________________________________________________________
conv1_conv (Conv2D)             (None, 112, 112, 64) 9472        conv1_pad[0][0]                  
__________________________________________________________________________________________________
conv1_bn (BatchNormalization)   (None, 112, 112, 64) 256         conv1_conv[0][0]                 
______________________________________________________________________________________________

## ***Train the model***

---



In [18]:
# now we are ready to train the model

hist = model_new.fit(X_train, Y_train, shuffle = True, batch_size = 16, epochs = 5, validation_split = 0.20)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


## ***Predict***

---



In [19]:
# now let's predict the class for a random image
# we will preprocess the input to be cmpatible with the resnet50 model


image_path = '/content/gdrive/My Drive/pikachu.jpg'
img = image.load_img(image_path,target_size = (224,224))
x = image.img_to_array(img)
x = np.expand_dims(x,axis=0)
x = preprocess_input(x)


# we will predict the class with the predict function
# we will return the class with the highest probability

pred = model_new.predict(x)
print(np.argmax(pred))

1


Now we see that the model predicted the correct class, 1, because the image was of a Pikachu.