# Title: Pokemon classification on iOS using Convolutional Neural Network based on Keras

---




***First you need to download dataset and Swift code to deploy image classification on iphone from Google Drive, since the files are too large to upload on github.***
Please follow the instructions:
1. Click the links to download [dataset]() and [pokemon_iOS]() from google drive.
2. Click `dataset` / `pokemon_iOS` icon on the top with a down-arrow to download the whole folder. 
3. Click `Download` and wait for downloading.
4. (If needed) Unzipping the file.
4. Move the `dataset` and `pokemon_iOS` folder to the folder containing this `jupyter notebook`. In this study, the folder is `Project`.

In [1]:
# set the matplotlib backend so figures can be saved in the background
import matplotlib
matplotlib.use("Agg")

# import the necessary packages
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Conv2D, MaxPooling2D
from tensorflow.keras.layers import Activation, Dropout, Flatten, Dense, BatchNormalization
from tensorflow.keras import backend as K
from tensorflow.keras.preprocessing.image import ImageDataGenerator, img_to_array
from tensorflow.keras.optimizers import RMSprop, SGD, Adam, Nadam
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.applications.resnet import ResNet50
from keras.applications.inception_v3 import InceptionV3
from keras.models import load_model
import coremltools
from sklearn.preprocessing import LabelBinarizer
from sklearn.model_selection import train_test_split



import matplotlib.pyplot as plt
from imutils import paths
import numpy as np
import argparse
import random
import pickle
import cv2
import os


def build_SmallerVGGNet(width, height, depth, classes):
    # initialize the model along with the input shape to be
    # "channels last" and the channels dimension itself
    model = Sequential()
    inputShape = (height, width, depth)
    chanDim = -1

    # if we are using "channels first", update the input shape
    # and channels dimension
    if K.image_data_format() == "channels_first":
        inputShape = (depth, height, width)
        chanDim = 1

    # CONV => RELU => POOL
    model.add(Conv2D(32, (3, 3), padding="same",
        input_shape=inputShape))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(3, 3)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(64, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # (CONV => RELU) * 2 => POOL
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(Conv2D(128, (3, 3), padding="same"))
    model.add(Activation("relu"))
    model.add(BatchNormalization(axis=chanDim))
    model.add(MaxPooling2D(pool_size=(2, 2)))
    model.add(Dropout(0.25))

    # first (and only) set of FC => RELU layers
    model.add(Flatten())
    model.add(Dense(1024))
    model.add(Activation("relu"))
    model.add(BatchNormalization())
    model.add(Dropout(0.5))

    # softmax classifier
    model.add(Dense(classes))
    model.add(Activation("softmax"))

    # return the constructed network architecture
    return model

Duplicate key in file '/Users/raymond/.matplotlib/matplotlibrc', line 2 ('backend: TkAgg')
Duplicate key in file '/Users/raymond/.matplotlib/matplotlibrc', line 3 ('backend: TkAgg')


In [14]:


# initialize the number of epochs to train for, initial learning rate,
# batch size, and image dimensions
EPOCHS = 50
INIT_LR = 1e-3
BS = 64
IMAGE_DIMS = (96, 96, 3)

plotting = 'plot.png'
save_path = 'Pokemon.model'

# initialize the data and labels
data = []
labels = []

# grab the image paths and randomly shuffle them
print("[INFO] loading images...")
imagePaths = sorted(list(paths.list_images("./dataset")))
random.seed(42)
random.shuffle(imagePaths)

# loop over the input images
for imagePath in imagePaths:
    # load the image, pre-process it, and store it in the data list
    image = cv2.imread(imagePath)
    image = cv2.resize(image, (IMAGE_DIMS[1], IMAGE_DIMS[0]))
    image = img_to_array(image)
    data.append(image)

    # extract the class label from the image path and update the
    # labels list
    label = imagePath.split(os.path.sep)[-2]
    labels.append(label)

# scale the raw pixel intensities to the range [0, 1]
data = np.array(data, dtype="float") / 255.0
labels = np.array(labels)
print("[INFO] data matrix: {:.2f}MB".format(
    data.nbytes / (1024 * 1000.0)))

# binarize the labels
lb = LabelBinarizer()
labels = lb.fit_transform(labels)

# partition the data into training and testing splits using 80% of
# the data for training and the remaining 20% for validation and testing
(trainX, testX, trainY, testY) = train_test_split(data,
    labels, test_size=0.2, random_state=42)
(valX, testX, valY, testY) = train_test_split(testX,
    testY, test_size=0.2, random_state=42)

# construct the image generator for data augmentation
aug = ImageDataGenerator(rotation_range=25, width_shift_range=0.1,
    height_shift_range=0.1, shear_range=0.2, zoom_range=0.2,
    horizontal_flip=True, fill_mode="nearest")

# initialize the model
print("[INFO] compiling model...")
model = build_SmallerVGGNet(width=IMAGE_DIMS[1], height=IMAGE_DIMS[0],
    depth=IMAGE_DIMS[2], classes=len(lb.classes_))
opt = Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS)
model.compile(loss="categorical_crossentropy", optimizer=opt, metrics=["accuracy"])

# train the network
print("[INFO] training network...")
stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
checkpoint = ModelCheckpoint(save_path, monitor='val_acc', mode='max', verbose=1, save_best_only=True)

H = model.fit(
    x=aug.flow(trainX, trainY, batch_size=BS),
    validation_data=(testX, testY),
    steps_per_epoch=len(trainX) // BS,
    epochs=EPOCHS, 
    verbose=1,
    callbacks = [checkpoint]
)

# save the model to disk
print("[INFO] serializing network...")
#model.save(save_path, save_format="h5")

# save the label binarizer to disk
print("[INFO] serializing label binarizer...")
f = open('label.pickle', "wb")
f.write(pickle.dumps(lb))
f.close()

# plot the training loss and accuracy
plt.style.use("ggplot")
plt.figure()
N = EPOCHS
plt.plot(np.arange(0, N), H.history["loss"], label="train_loss")
plt.plot(np.arange(0, N), H.history["val_loss"], label="val_loss")
plt.plot(np.arange(0, N), H.history["accuracy"], label="train_acc")
plt.plot(np.arange(0, N), H.history["val_accuracy"], label="val_acc")
plt.title("Training Loss and Accuracy")
plt.xlabel("Epoch #")
plt.ylabel("Loss/Accuracy")
plt.legend(loc="upper left")
plt.savefig(plotting)

[INFO] loading images...
[INFO] data matrix: 252.07MB
[INFO] compiling model...
[INFO] training network...
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50


Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
[INFO] serializing network...
[INFO] serializing label binarizer...


In [12]:
pretrain = ResNet50(weights = 'imagenet',
              include_top = False,
              input_shape = IMAGE_DIMS)

 
# Model definition
# Create the model
model = Sequential()

# Freeze the convolutional base of VGG16 to prevent the pre-trained weights being updated 
# during training inorder to extract features
pretrain.trainable=False
 
# Add the vgg convolutional base model
model.add(pretrain)
 
# Add new layers
model.add(Flatten())

model.add(Dense(1024, activation='relu'))
model.add(BatchNormalization())                 # normalize and scale inputs or activations
model.add(Dropout(0.2))                         # applies dopout to the input which will randomly disable 20% of hidden units

# add densely-connected NN layer with 128 hidden units
model.add(Dense(units=128, activation='relu')) # use ReLU activation function
model.add(BatchNormalization())                # normalize and scale inputs or activations
model.add(Dropout(0.2))     

model.add(Dense(5, activation='softmax'))

model.summary()


model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS),
              metrics=['acc'])

# Train the model
stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
checkpoint = ModelCheckpoint('Res50.hdf5', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

historyres = model.fit(
    x=aug.flow(trainX, trainY, batch_size=BS),
    validation_data=(testX, testY),
    steps_per_epoch=len(trainX) // BS,
    epochs=EPOCHS, 
    verbose=1,
    callbacks = [checkpoint]
)


Model: "sequential_4"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Functional)        (None, 3, 3, 2048)        23587712  
_________________________________________________________________
flatten_4 (Flatten)          (None, 18432)             0         
_________________________________________________________________
dense_9 (Dense)              (None, 1024)              18875392  
_________________________________________________________________
batch_normalization_20 (Batc (None, 1024)              4096      
_________________________________________________________________
dropout_14 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_10 (Dense)             (None, 128)               131200    
_________________________________________________________________
batch_normalization_21 (Batc (None, 128)              

In [13]:
pretrain = InceptionV3(weights = 'imagenet',
              include_top = False,
              input_shape = IMAGE_DIMS)

 
# Model definition
# Create the model
model = Sequential()

# Freeze the convolutional base of VGG16 to prevent the pre-trained weights being updated 
# during training inorder to extract features
pretrain.trainable=False
 
# Add the vgg convolutional base model
model.add(pretrain)
 
# Add new layers
model.add(Flatten())

model.add(Dense(1024, activation='relu'))
model.add(BatchNormalization())                 # normalize and scale inputs or activations
model.add(Dropout(0.2))                         # applies dopout to the input which will randomly disable 20% of hidden units

# add densely-connected NN layer with 128 hidden units
model.add(Dense(units=128, activation='relu')) # use ReLU activation function
model.add(BatchNormalization())                # normalize and scale inputs or activations
model.add(Dropout(0.2))     

model.add(Dense(5, activation='softmax'))

model.summary()


model.compile(loss='categorical_crossentropy',
              optimizer=Adam(lr=INIT_LR, decay=INIT_LR / EPOCHS),
              metrics=['acc'])

# Train the model
stopping = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=10)
checkpoint = ModelCheckpoint('Incep.model', monitor='val_acc', mode='max', verbose=1, save_best_only=True)

historyin = model.fit(
    x=aug.flow(trainX, trainY, batch_size=BS),
    validation_data=(testX, testY),
    steps_per_epoch=len(trainX) // BS,
    epochs=EPOCHS, 
    verbose=1,
    callbacks = [checkpoint]
)



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/inception_v3/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5
Model: "sequential_5"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
inception_v3 (Functional)    (None, 1, 1, 2048)        21802784  
_________________________________________________________________
flatten_5 (Flatten)          (None, 2048)              0         
_________________________________________________________________
dense_12 (Dense)             (None, 1024)              2098176   
_________________________________________________________________
batch_normalization_116 (Bat (None, 1024)              4096      
_________________________________________________________________
dropout_16 (Dropout)         (None, 1024)              0         
_________________________________________________________________
dense_13 (Dense)             (Non


Epoch 00033: val_acc did not improve from 0.78723
Epoch 34/50

Epoch 00034: val_acc did not improve from 0.78723
Epoch 35/50

Epoch 00035: val_acc did not improve from 0.78723
Epoch 36/50

Epoch 00036: val_acc did not improve from 0.78723
Epoch 37/50

Epoch 00037: val_acc did not improve from 0.78723
Epoch 38/50

Epoch 00038: val_acc improved from 0.78723 to 0.80851, saving model to Incep.model
INFO:tensorflow:Assets written to: Incep.model/assets
Epoch 39/50

Epoch 00039: val_acc did not improve from 0.80851
Epoch 40/50

Epoch 00040: val_acc did not improve from 0.80851
Epoch 41/50

Epoch 00041: val_acc did not improve from 0.80851
Epoch 42/50

Epoch 00042: val_acc did not improve from 0.80851
Epoch 43/50

Epoch 00043: val_acc did not improve from 0.80851
Epoch 44/50

Epoch 00044: val_acc did not improve from 0.80851
Epoch 45/50

Epoch 00045: val_acc did not improve from 0.80851
Epoch 46/50

Epoch 00046: val_acc did not improve from 0.80851
Epoch 47/50

Epoch 00047: val_acc did not i

In [2]:


# converting to CoreML

# load the class labels
print("[INFO] loading class labels from label binarizer")
lb = pickle.loads(open('label.pickle', "rb").read())
class_labels = lb.classes_.tolist()
print("[INFO] class labels: {}".format(class_labels))

# load the trained convolutional neural network
print("[INFO] loading model...")
model = load_model('Pokemon.model')

# convert the model to coreml format
print("[INFO] converting model")
coreml_model = coremltools.converters.convert(model,
    input_names="image",
    image_input_names="image",
    image_scale=1/255.0,
    class_labels=class_labels,
    is_bgr=True)

# save the model to disk
output = "pokemon.mlmodel"
print("[INFO] saving model as {}".format(output))
coreml_model.save(output)


[INFO] loading class labels from label binarizer
[INFO] class labels: ['bulbasaur', 'charmander', 'mewtwo', 'pikachu', 'squirtle']
[INFO] loading model...




[INFO] converting model


Running TensorFlow Graph Passes: 100%|██████████| 5/5 [00:01<00:00,  3.31 passes/s]
Converting Frontend ==> MIL Ops: 100%|██████████| 71/71 [00:00<00:00, 337.94 ops/s]
Running MIL optimization passes: 100%|██████████| 16/16 [00:00<00:00, 117.19 passes/s]
Translating MIL ==> MLModel Ops: 100%|██████████| 101/101 [00:01<00:00, 77.93 ops/s]


[INFO] saving model as pokemon.mlmodel
