In [1]:
import os, csv, random, time
import numpy as np
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import tensorflow as tf
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score
from keras.models import Sequential
from keras.layers import Dense, Activation, Conv2D, MaxPooling2D, Flatten
from keras.utils import np_utils
from skimage.transform import resize
from tqdm import tqdm

from PIL import Image
tf.get_logger().setLevel('ERROR')

In [2]:
# # original jpgs are (120 x 120 x 3) while original pngs are (120 x 120 x 4)
# # change all pngs so that they are (120 x 120 x 3) and convert to jpg
# # probably better to convert png -> jpg rather than jpg -> png
# # because removing the alpha channel doesn't remove info (png -> jpg)
# # but adding the alpha channel adds unnecessary info (jpg -> png)

# i = 0
# for filename in os.listdir('images'):
#     if filename.endswith(".png"):
#         im = Image.open(f'images/{filename}').convert('RGBA')
#         pokemon_name = filename[:-4]
#         background = Image.new("RGB", im.size, (255, 255, 255))
#         background.paste(im, mask=im.split()[-1])
#         name=f'images/{pokemon_name}.jpg'
#         background.save(name, 'JPEG')
#         os.remove(f'images/{filename}') # delete original png image
#         i += 1

# print(f'{i} images converted')

In [3]:
def define_my_model():
    model = Sequential()
    model.add(Conv2D(filters=16, kernel_size=(5,5), activation='relu', input_shape=(120,120,3)))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
    model.add(MaxPooling2D(pool_size=(2,2)))
    model.add(Flatten())
    model.add(Dense(len(types)))
    model.add(Activation('sigmoid'))
    model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
    return model

In [None]:
t0 = time.perf_counter()

all_types = ['Grass', 'Poison', 'Fire', 'Flying', 'Water', 'Bug', 'Normal', 'Electric', 'Ground', 'Fairy', 'Fighting', 'Psychic', 'Rock', 'Steel', 'Ice', 'Ghost', 'Dragon', 'Dark']

results_per_type = {} # k => {'accuracy' => average accuracy, 'best_types' => [], 'worst_types' => [], ...}

# k will be the number of types to use for classification (min 2, max 18)
for k in range(2, 19):
    # keep track of best, worst types to classify
    accuracy_list = []
    best_types = []
    worst_types = []
    highest_accuracy = 0
    lowest_accuracy = 1

    # keep track of predictions for worst types to classify
    worst_y_predict = []
    worst_y_test = []
    worst_z = []
    worst_type_mapping_reversed = {}
    worst_pokemon_type_mapping = {}
    
    results_per_type[k] = {}

    # choose k random types and perform training 10 times
    # this is to see which combinations of types are better/worse for classification
    # e.g. one iteration can be for fire vs water, which is expected to have a high accuracy
    # and another iteration can be for ground vs rock, which is probably harder to distinguish
    for _ in range(10):

        # choose k out of the 18 total types to perform training
        num_types = k
        types = random.sample(all_types, k=num_types)

        # convert pokemon types to integers and map pokemon to their types
        type_mapping = {} # pokemon type (string) => pokemon type (integer)
        type_mapping_reversed = {} # pokemon type (integer) => pokemon type (string)
        pokemon_type_mapping = {} # pokemon name => pokemon type1 (integer)
        i = 0
        with open('pokemon.csv', newline='') as csvfile:
            reader = csv.reader(csvfile)
            next(reader, None) # skip header
            for row in reader:
                if row[1] in types or (len(row) > 2 and row[2] in types):
                    if row[1] in types:
                        if row[1] not in type_mapping:
                            type_mapping[row[1]] = i
                            type_mapping_reversed[i] = row[1]
                            i += 1
                        pokemon_type_mapping[row[0]] = type_mapping[row[1]]
                    if len(row) > 2 and row[2] in types:
                        if row[2] not in type_mapping:
                            type_mapping[row[2]] = i
                            type_mapping_reversed[i] = row[2]
                            i += 1
                        pokemon_type_mapping[row[0]] = type_mapping[row[2]]
        directory = r'C:\Users\hookg\Desktop\Spring2023\CS 466-ADV-MACHINE-&-DEEP-LEARNING\Pokemon-Notebooks\photos'
        # prepare X and y data
        X = []
        y = []
        z = [] # keep track of pokemon names
        
        for pokemon_name in tqdm(pokemon_type_mapping.keys()):
#             print('Loading images for ', pokemon_name)
            for image_path in os.listdir(os.path.join(directory,pokemon_name)):
#                 i = mpimg.imread(f'images/{pokemon_name}.jpg
                i = Image.open(os.path.join(directory,pokemon_name,image_path))  
#                 i = mpimg.imread(os.path.join(directory,pokemon_name,image_path))
                if np.array(i).shape != (120,120, 3):
#                     print('resizing and converting image', image_path, np.array(i).shape)
                    i = i.convert('RGB')
                    i = i.resize((120,120))
                
                i = np.array(i)
                
                X.append(i)
                y.append(pokemon_type_mapping[pokemon_name])
                z.append(pokemon_name)

        # split into training and testing
        X_train, X_test, y_train, y_test, z_train, z_test = train_test_split(X, y, z, test_size=0.25)

        y_train = np_utils.to_categorical(y_train, len(types))
        y_test = np_utils.to_categorical(y_test, len(types))

        # image augmentation
        X_train_augmented = []
        y_train_augmented = []

        for img, y in zip(X_train, y_train):
            X_train_augmented.append(img)
            y_train_augmented.append(y)
            X_train_augmented.append(tf.image.flip_left_right(img))
            y_train_augmented.append(y)
            X_train_augmented.append(tf.image.rot90(img))
            y_train_augmented.append(y)

        X_train = np.array(X_train_augmented)
        y_train = np.array(y_train_augmented)
        X_test = np.array(X_test)
        y_test = np.array(y_test)

        # define model
        model = define_my_model()

        # train model
        fitted_model = model.fit(X_train, y_train, validation_split=0.33, epochs=20, verbose=1)

        # evaluate model
        score = model.evaluate(X_test, y_test, verbose=0)
        accuracy = score[1]
        accuracy_list.append(accuracy)

        if accuracy > highest_accuracy:
            highest_accuracy = accuracy
            best_types = types

        if accuracy < lowest_accuracy:
            lowest_accuracy = accuracy
            worst_types = types
            worst_y_predict = model.predict(X_test, verbose=0)
            worst_y_test = y_test
            worst_z = z_test
            worst_type_mapping_reversed = type_mapping_reversed
            worst_pokemon_type_mapping = pokemon_type_mapping
    
    results_per_type[k]['accuracy'] = np.mean(accuracy_list)
    results_per_type[k]['best_types'] = best_types
    results_per_type[k]['best_accuracy'] = highest_accuracy
    results_per_type[k]['worst_types'] = worst_types
    results_per_type[k]['worst_accuracy'] = lowest_accuracy
    results_per_type[k]['worst_y_predict'] = worst_y_predict
    results_per_type[k]['worst_y_test'] = worst_y_test
    results_per_type[k]['worst_z'] = worst_z
    results_per_type[k]['worst_type_mapping_reversed'] = worst_type_mapping_reversed
    results_per_type[k]['worst_pokemon_type_mapping'] = worst_pokemon_type_mapping
    
    print("---------------------------")
    print(f"Average accuracy: {results_per_type[k]['accuracy']}")
    print("best types:", best_types)
    print(f"Best accuracy: {highest_accuracy}")
    print("worst types:", worst_types)
    print(f"Worst accuracy: {lowest_accuracy}")

100%|████████████████████████████████████████████████████████████████████████████████| 156/156 [00:39<00:00,  3.99it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 103/103 [00:20<00:00,  4.96it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 139/139 [00:27<00:00,  5.14it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 133/133 [00:44<00:00,  2.97it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 173/173 [00:29<00:00,  5.88it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 154/154 [00:21<00:00,  7.04it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|██████████████████████████████████████████████████████████████████████████████████| 75/75 [00:12<00:00,  5.86it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 110/110 [00:24<00:00,  4.45it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 144/144 [00:20<00:00,  7.06it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 167/167 [00:18<00:00,  8.85it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20
---------------------------
Average accuracy: 0.6681592524051666
best types: ['Grass', 'Ghost']
Best accuracy: 0.7849892377853394
worst types: ['Dragon', 'Rock']
Worst accuracy: 0.5600000023841858


100%|████████████████████████████████████████████████████████████████████████████████| 237/237 [00:36<00:00,  6.49it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 174/174 [00:30<00:00,  5.76it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 206/206 [00:47<00:00,  4.35it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 170/170 [00:30<00:00,  5.50it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 202/202 [00:51<00:00,  3.90it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 175/175 [00:38<00:00,  4.54it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 229/229 [00:58<00:00,  3.89it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 219/219 [00:59<00:00,  3.68it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 263/263 [00:58<00:00,  4.47it/s]


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


100%|████████████████████████████████████████████████████████████████████████████████| 182/182 [00:44<00:00,  4.13it/s]


Epoch 1/20
154/838 [====>.........................] - ETA: 4:31 - loss: 7.2902 - accuracy: 0.4038

In [None]:
t1 = time.perf_counter()
print(f"In minutes: {(t1-t0)//60}")

In [None]:
def see_the_results(y_predict, z_test, type_mapping_reversed, pokemon_type_mapping, photos_folder):
    for y, z in zip(y_predict, z_test):
        actual_type = type_mapping_reversed[pokemon_type_mapping[z]]
        predicted_type = type_mapping_reversed[np.argmax(y)]

        # find image path
        image_path = None
        for root, dirs, files in os.walk(photos_folder):
            if z in files:
                image_path = os.path.join(root, z)
                break

        # open and display image
        if image_path:
            with Image.open(image_path) as img:
                plt.imshow(img)
                plt.show()

        print(f'{z} is {actual_type}. model predicted {predicted_type}')

see_the_results(results_per_type[2]['worst_y_predict'], results_per_type[2]['worst_z'], results_per_type[2]['worst_type_mapping_reversed'], results_per_type[2]['worst_pokemon_type_mapping'], 'photos')


In [None]:
plt.plot(results_per_type.keys(), [results_per_type[k]['accuracy'] for k in results_per_type.keys()])
plt.plot(results_per_type.keys(), [results_per_type[k]['best_accuracy'] for k in results_per_type.keys()])
plt.plot(results_per_type.keys(), [results_per_type[k]['worst_accuracy'] for k in results_per_type.keys()])
plt.xlabel('Number of Types')
plt.ylabel('Accuracy')
plt.legend(['average accuracy', 'best accuracy', 'worst accuracy'])
plt.show()