We are here going to create a folder which will have the same shape of the pokemon-dataset-folder. The folder will look like this:

dataset_embedding

>>> Pokemon 1

>>>>>> Embedding Pokemon_1_image_1

>>>>>> Embedding Pokemon_1_image_2

>>>>>> Embedding Pokemon_1_image_n

>>> Pokemon 2 

>>>>>> Embedding Pokemon_2_image_1

>>>>>> Embedding Pokemon_2_image_2

>>>>>> Embedding Pokemon_2_image_n

>>> Pokemon n ...

In this folder we will have all the embeddings for all 20000 images that are in the base pokemon images dataset.

We will use the EfficientNetB0 model we just finetuned to create the embeddings.





Imports

In [1]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, GlobalAveragePooling2D
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.applications.efficientnet import preprocess_input
from tensorflow.keras.preprocessing.image import img_to_array, load_img
from tensorflow.keras.preprocessing import image_dataset_from_directory
import numpy as np
import os
import numpy as np
from tqdm import tqdm


In [2]:
# Dimension the images we want
IMAGE_SIZE = (224, 224)

# Size of the vector that will contain one image
DIMENSION_EMBEDDING = 256

# Number of images per batch
BATCH_SIZE = 32

# Directory paths for the folder which contains all the images and the folder in which we are going to put our embeddings.
DATASET_IMAGE_POKEMON_DIRECTORY = "../data/pokemon-dataset-1000"
OUTPUT_IMAGE_POKEMON_EMBEDDE_DIRECTORY = "../data/dataset_embedding"



Loading our EfficientNetB0 fine-tuned model

In [3]:
def loader_dataset_finetuner():
    model_finetune = load_model("../models/finetuned_efficientnetb0_pour_pokemon.h5")

    model_finetune = Model(inputs=model_finetune.input, outputs=model_finetune.get_layer("embedding").output)

    return model_finetune

In [4]:
# We call the function that load the model
model_finetune = loader_dataset_finetuner()

# We load all the images from the base dataset that contains all of them
# Shuffle = false to make the images remain in the same order as their filenames.
dataset = image_dataset_from_directory(
    DATASET_IMAGE_POKEMON_DIRECTORY, 
    image_size = IMAGE_SIZE,
    batch_size = BATCH_SIZE,
    shuffle=False
)



Found 20921 files belonging to 1000 classes.


In [None]:
# Names of each pokemon species
class_names = dataset.class_names

In [6]:
print(class_names)

['abomasnow', 'abra', 'absol', 'accelgor', 'aegislash-shield', 'aerodactyl', 'aggron', 'aipom', 'alakazam', 'alcremie', 'alomomola', 'altaria', 'amaura', 'ambipom', 'amoonguss', 'ampharos', 'annihilape', 'anorith', 'appletun', 'applin', 'araquanid', 'arbok', 'arboliva', 'arcanine', 'arceus', 'archen', 'archeops', 'arctibax', 'arctovish', 'arctozolt', 'ariados', 'armaldo', 'armarouge', 'aromatisse', 'aron', 'arrokuda', 'articuno', 'audino', 'aurorus', 'avalugg', 'axew', 'azelf', 'azumarill', 'azurill', 'bagon', 'baltoy', 'banette', 'barbaracle', 'barboach', 'barraskewda', 'basculegion-male', 'basculin-red-striped', 'bastiodon', 'baxcalibur', 'bayleef', 'beartic', 'beautifly', 'beedrill', 'beheeyem', 'beldum', 'bellibolt', 'bellossom', 'bellsprout', 'bergmite', 'bewear', 'bibarel', 'bidoof', 'binacle', 'bisharp', 'blacephalon', 'blastoise', 'blaziken', 'blipbug', 'blissey', 'blitzle', 'boldore', 'boltund', 'bombirdier', 'bonsly', 'bouffalant', 'bounsweet', 'braixen', 'brambleghast', 'bra

We apply the preprocess_input function and we use AUTOTUNE to make the system able to manage dynamically the CPU to be able to speed up the processus.

In [None]:
#To speed up the process
dataset = dataset.map(
    lambda x, y: preprocess_input(x),
    num_parallel_calls=tf.data.AUTOTUNE
).prefetch(tf.data.AUTOTUNE)

In [None]:
# List that will contain class names and file name for each image
image_infos = []

#Iterate through pokemon species to add all species name and all pokemon folder paths into image_infos to be able to know what the name of the embedded file for each pokemon will be and in which pokemon species folder to put the embedding
for class_name in class_names:
    classe_directory = os.path.join(DATASET_IMAGE_POKEMON_DIRECTORY, class_name)
    for filename in sorted(os.listdir(classe_directory)):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):
            image_infos.append((class_name, filename))

Creating the embeddings for each pokemon images and store them in the good folder for each of them.

The embeddings will be stored as .npy files like this: 


>>> Pokemon 1

>>>>>> Embedding_Pokemon_1_image_1.npy

>>>>>> Embedding_Pokemon_1_image_2.npy

>>>>>> Embedding_Pokemon_1_image_n.npy

>>> Pokemon 2 

>>>>>> Embedding_Pokemon_2_image_1.npy

>>>>>> Embedding_Pokemon_2_image_2.npy

>>>>>> Embedding_Pokemon_2_image_n.npy

>>> Pokemon n ...

In [None]:
index = 0

for batch in tqdm(dataset):
    #generate the embedding 
    embeddings = model_finetune.predict(batch, verbose=0)
    # L2 normalization
    embeddings = embeddings / np.linalg.norm(embeddings, axis=1, keepdims=True)

    # Save the embeddings as numpy binary files with one file for each images
    for embedding in embeddings:
        pokemon, filename = image_infos[index]
        os.makedirs(os.path.join(OUTPUT_IMAGE_POKEMON_EMBEDDE_DIRECTORY, pokemon), exist_ok=True)
        os.makedirs(OUTPUT_IMAGE_POKEMON_EMBEDDE_DIRECTORY, exist_ok=True)
        output_path = os.path.join(OUTPUT_IMAGE_POKEMON_EMBEDDE_DIRECTORY,pokemon, filename.rsplit('.', 1)[0] + '.npy')
        np.save(output_path, embedding)
        index += 1

100%|██████████| 654/654 [04:08<00:00,  2.63it/s]
