In [None]:
import os, csv
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_val_score
from sklearn.metrics import accuracy_score
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten, BatchNormalization, Dropout
from keras.utils import np_utils
from PIL import Image
import tensorflow as tf
from tqdm import tqdm
import pandas as pd

In [None]:
# from google.colab import drive
# drive.mount('/content/Mydrive')


In [None]:
#!unzip "/content/Mydrive/MyDrive/Scraped-Image-Dataset.zip" -d "/content"

In [None]:
type_mapping = {} # pokemon type (string) => pokemon type (integer)
type_mapping_reversed = {} # pokemon type (integer) => pokemon type (string)
pokemon_type_mapping = {} # pokemon name => pokemon type1 (integer)

types = ['Grass', 'Poison', 'Fire', 'Flying', 'Water', 'Bug', 'Normal', 'Electric', 'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Steel', 'Ice', 'Ghost', 'Dragon', 'Dark']
#'Grass', 'Poison', 'Fire', 'Flying', 'Water', 'Bug', 'Normal', 'Electric', 'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Steel', 'Ice', 'Ghost', 'Dragon', 'Dark'

i = 0
with open('/content/Mydrive/MyDrive/pokemon.csv', newline='') as csvfile:
    reader = csv.reader(csvfile)
    next(reader, None) # skip header
    for row in reader:
        if row[1] in types or (len(row) > 2 and row[2] in types):
            if row[1] in types:
                if row[1] not in type_mapping:
                    type_mapping[row[1]] = i
                    type_mapping_reversed[i] = row[1]
                    i += 1
                pokemon_type_mapping[row[0]] = type_mapping[row[1]]
            if len(row) > 2 and row[2] in types:
                if row[2] not in type_mapping:
                    type_mapping[row[2]] = i
                    type_mapping_reversed[i] = row[2]
                    i += 1
                pokemon_type_mapping[row[0]] = type_mapping[row[2]]


In [None]:
'''
# original jpgs are (120 x 120 x 3) while original pngs are (120 x 120 x 4)
# change all pngs so that they are (120 x 120 x 3) and convert to jpg
# probably better to convert png -> jpg rather than jpg -> png
# because removing the alpha channel doesn't remove info (png -> jpg)
# but adding the alpha channel adds unnecessary info (jpg -> png)

i = 0
for filename in os.listdir('Downloads/archive1/images/images/'):
    if filename.endswith(".png"):
        im = Image.open(f'Downloads/archive1/images/images/{filename}').convert('RGBA')
        pokemon_name = filename[:-4]
        background = Image.new("RGB", im.size, (255, 255, 255))
        background.paste(im, mask=im.split()[-1])
        name=f'Downloads/archive1/images/images/{pokemon_name}.jpg'
        background.save(name, 'JPEG')
        os.remove(f'Downloads/archive1/images/images/{filename}') # delete original png image
        i += 1

print(f'{i} images converted')
'''

'\n# original jpgs are (120 x 120 x 3) while original pngs are (120 x 120 x 4)\n# change all pngs so that they are (120 x 120 x 3) and convert to jpg\n# probably better to convert png -> jpg rather than jpg -> png\n# because removing the alpha channel doesn\'t remove info (png -> jpg)\n# but adding the alpha channel adds unnecessary info (jpg -> png)\n\ni = 0\nfor filename in os.listdir(\'Downloads/archive1/images/images/\'):\n    if filename.endswith(".png"):\n        im = Image.open(f\'Downloads/archive1/images/images/{filename}\').convert(\'RGBA\')\n        pokemon_name = filename[:-4]\n        background = Image.new("RGB", im.size, (255, 255, 255))\n        background.paste(im, mask=im.split()[-1])\n        name=f\'Downloads/archive1/images/images/{pokemon_name}.jpg\'\n        background.save(name, \'JPEG\')\n        os.remove(f\'Downloads/archive1/images/images/{filename}\') # delete original png image\n        i += 1\n\nprint(f\'{i} images converted\')\n'

In [None]:
'''
i = 0
for filename in os.listdir('Downloads/archive1/images/images/'):
    if filename.endswith(".jpg"):
        im = Image.open(f'Downloads/archive1/images/images/{filename}')
        pokemon_name = filename[:-4]
        new_image = im.resize((227, 227))
        new_image.save(f'Downloads/archive1/images/images/{pokemon_name}.jpg', 'JPEG')
        i += 1
'''        

'\ni = 0\nfor filename in os.listdir(\'Downloads/archive1/images/images/\'):\n    if filename.endswith(".jpg"):\n        im = Image.open(f\'Downloads/archive1/images/images/{filename}\')\n        pokemon_name = filename[:-4]\n        new_image = im.resize((227, 227))\n        new_image.save(f\'Downloads/archive1/images/images/{pokemon_name}.jpg\', \'JPEG\')\n        i += 1\n'

In [None]:
# feature matrix will have 43200 columns
directory = '/content/Scraped-Image-Dataset'
X = []
y = []
z = []

for pokemon_name in tqdm(pokemon_type_mapping.keys()):
#             print('Loading images for ', pokemon_name)
    for image_path in os.listdir(os.path.join(directory,pokemon_name)):
#                 i = mpimg.imread(f'images/{pokemon_name}.jpg
        i = Image.open(os.path.join(directory,pokemon_name,image_path))  
#                 i = mpimg.imread(os.path.join(directory,pokemon_name,image_path))
        if np.array(i).shape != (227,227, 3):
#                     print('resizing and converting image', image_path, np.array(i).shape)
            i = i.convert('RGB')
            i = i.resize((227,227))
                
        i = np.array(i)
                
        X.append(i)
        y.append(pokemon_type_mapping[pokemon_name])
        z.append(image_path)

100%|██████████| 808/808 [02:27<00:00,  5.46it/s]


In [None]:
X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(X, y, z, test_size=0.20, random_state=5)

y_train = np_utils.to_categorical(y_train, len(types))
y_test = np_utils.to_categorical(y_test, len(types))

# convert to numpy arrays so keras won't complain
X_train = np.array(X_train) / 255
X_test = np.array(X_test) / 255
y_train = np.array(y_train)
y_test = np.array(y_test)

In [None]:
# 1 Hidden Layer with 30 neurons:
# my_ANN = MLPClassifier(hidden_layer_sizes=(30,), activation='logistic', 
                     #  solver='adam', alpha=1, random_state=1, 
                     #  learning_rate_init=0.02, verbose=True, tol=0.0001)

In [None]:
# CV:
# accuracy_list = cross_val_score(my_ANN, X_train, y_train, cv=10, scoring='accuracy')

# print('\n\n','accuracy: ',accuracy_list)

In [None]:
# Alexnet Model
# Had to resize images to at least 227x227
model = Sequential()
model.add(Conv2D(96, kernel_size=(11,11), strides=(4,4), padding="valid", activation="relu", input_shape=(227, 227, 3)))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(256,kernel_size=(5,5),strides=(1,1),padding="valid",activation="relu"))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(BatchNormalization())
model.add(Conv2D(384,kernel_size=(3,3),strides=(1,1),padding="valid",activation="relu"))
model.add(Conv2D(384,kernel_size=(3,3),strides=(1,1),padding="valid",activation="relu"))
model.add(Conv2D(256,kernel_size=(3,3),strides=(1,1),padding="valid",activation="relu"))
model.add(MaxPooling2D(pool_size=(3,3),strides=(2,2)))
model.add(BatchNormalization())
model.add(Flatten())
model.add(Dense(4096,activation="relu"))
model.add(Dropout(0.4))
model.add(BatchNormalization())
model.add(Dense(4096,activation="relu"))
model.add(Dropout(0.4))
model.add(BatchNormalization())
model.add(Dense(1000,activation="relu"))
model.add(Dropout(0.4))
model.add(BatchNormalization())
model.add(Dense(18,activation="softmax"))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
# this model outputs .22% accuracy

In [None]:
'''
# Modified CNN Model / Outputs .59% accuracy
model = Sequential()
# First Layer on CNN
model.add(Conv2D(48, (3, 3), activation='relu', input_shape=(120,120,3), padding="same"))
# Pooling Layer:
model.add(MaxPooling2D(pool_size=(3,3)))
# Batch Normalization
model.add(BatchNormalization())
# Second Layer of CNN
model.add(Conv2D(128, (3, 3), activation='relu', padding="same"))
# Pooling Layer:
model.add(MaxPooling2D(pool_size=(3,3)))
model.add(Flatten())
model.add(Dense(512, activation='relu'))
model.add(Dense(4, activation='softmax'))
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
'''

In [None]:
fitted_model = model.fit(X_train, y_train, validation_split=0.25, epochs=20, verbose=1)

In [None]:
# summarize history for accuracy
plt.plot(fitted_model.history['accuracy'])
plt.plot(fitted_model.history['val_accuracy'])
plt.title('model accuracy')
plt.ylabel('accuracy')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

# summarize history for loss
plt.plot(fitted_model.history['loss'])
plt.plot(fitted_model.history['val_loss'])
plt.title('model loss')
plt.ylabel('loss')
plt.xlabel('epoch')
plt.legend(['train', 'val'], loc='upper left')
plt.show()

In [None]:
# Evaluation:
score = model.evaluate(X_test, y_test, verbose=1)
print('The accuracy is: ', score[1])

In [None]:
y_predict = model.predict(X_test)

In [None]:
import re
for y, z in zip(y_predict, z_test):
    pokemon_name = re.search(r'pokemon(\D*)', z).group(1)
#     print(pokemon_name)
    actual_type = type_mapping_reversed[pokemon_type_mapping[pokemon_name]]
    predicted_type = type_mapping_reversed[np.argmax(y)]

    %matplotlib inline
#     i = mpimg.imread(f'images/{z}.jpg')
    i = mpimg.imread(f'Scraped-Image-Dataset/{pokemon_name}/{z}')
    plt.imshow(i)
    plt.show()

    print(f'{z} is {actual_type}. model predicted {predicted_type}')