In [2]:
import os
import requests
import zipfile

# Retrieve the data
if not os.path.exists(os.path.join('data','tiny-imagenet-200.zip')):
    url = "http://cs231n.stanford.edu/tiny-imagenet-200.zip"
    # Get the file from web
    r = requests.get(url)

    if not os.path.exists('data'):
        os.mkdir('data')
    
    # Write to a file
    with open(os.path.join('data','tiny-imagenet-200.zip'), 'wb') as f:
        f.write(r.content)
else:
    print("The zip file already exists.")
    
if not os.path.exists(os.path.join('data', 'tiny-imagenet-200')):
    with zipfile.ZipFile(os.path.join('data','tiny-imagenet-200.zip'), 'r') as zip_ref:
        zip_ref.extractall('data')
else:
    print("The extracted data already exists")

In [30]:
from functools import partial
import tensorflow as tf
#import tensorflow_hub as hub
import requests
import zipfile
import requests
import os
import time
import pandas as pd
import random
import shutil
from tensorflow.keras.preprocessing.image import ImageDataGenerator
import os
from tensorflow.keras.layers import Input, Conv2D, MaxPool2D, AvgPool2D, Dense, Concatenate, Flatten, Lambda, Dropout
from tensorflow.keras.models import Model
from tensorflow.keras.losses import CategoricalCrossentropy
import tensorflow.keras.backend as K
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger
import numpy as np
from PIL import Image
import tensorflow.keras.backend as K
import pickle
from tensorflow.keras.models import load_model, Model
gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        # Currently, memory growth needs to be the same across GPUs
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
    except:
        print("Couldn't set memory_growth")
        pass
    
    
def fix_random_seed(seed):
    """ Setting the random seed of various libraries """
    try:
        np.random.seed(seed)
    except NameError:
        print("Warning: Numpy is not imported. Setting the seed for Numpy failed.")
    try:
        tf.random.set_seed(seed)
    except NameError:
        print("Warning: TensorFlow is not imported. Setting the seed for TensorFlow failed.")
    try:
        random.seed(seed)
    except NameError:
        print("Warning: random module is not imported. Setting the seed for random failed.")

# Fixing the random seed
random_seed = 1997
fix_random_seed(random_seed)

print("TensorFlow version: {}".format(tf.__version__))

TensorFlow version: 2.16.1


In [31]:
def get_test_labels_df(test_labels_path):
    """ Reading the test data labels for all files in the test set as a data frame """
    test_df = pd.read_csv(test_labels_path, sep='\t', index_col=None, header=None)
    test_df = test_df.iloc[:,[0,1]].rename({0:"filename", 1:"class"}, axis=1)
    return test_df

def get_train_valid_test_data_generators(batch_size, target_size):
    """ Get the training/validation/testing data generators """
    
    # Defining a data-augmenting image data generator and a standard image data generator
    image_gen_aug = ImageDataGenerator(
        samplewise_center=False, rotation_range=30, width_shift_range=0.2,
        height_shift_range=0.2, brightness_range=(0.5,1.5), shear_range=5, 
        zoom_range=0.2, horizontal_flip=True, fill_mode='constant', cval=127.5, 
        validation_split=0.1
    )
    image_gen = ImageDataGenerator(samplewise_center=False)
    
    # Define a training data generator
    partial_flow_func = partial(
        image_gen_aug.flow_from_directory, 
        directory=os.path.join('data','tiny-imagenet-200', 'train'), 
        target_size=target_size, classes=None,
        class_mode='categorical', batch_size=batch_size, 
        shuffle=True, seed=random_seed)
    
    # Get the training data subset
    train_gen = partial_flow_func(subset='training')
    # Get the validation data subset
    valid_gen = partial_flow_func(subset='validation')    

    # Defining the test data generator
    test_df = get_test_labels_df(os.path.join('data','tiny-imagenet-200',  'val', 'val_annotations.txt'))
    test_gen = image_gen.flow_from_dataframe(
        test_df, directory=os.path.join('data','tiny-imagenet-200',  'val', 'images'), target_size=target_size, classes=None,
        class_mode='categorical', batch_size=batch_size, shuffle=False
    )
    return train_gen, valid_gen, test_gen


batch_size = 48
target_size = (224,224)
# Getting the train,valid, test data generators
train_gen, valid_gen, test_gen = get_train_valid_test_data_generators(batch_size, target_size)
# Modifying the data generators to fit the model targets
train_gen_inceptionV3, valid_gen_inceptionV3, test_gen_inceptionV3 = get_train_valid_test_data_generators(batch_size,(299,299))

with open(os.path.join('data','class_indices'), 'wb') as f:
    pickle.dump(train_gen.class_indices, f)

Found 90000 images belonging to 200 classes.
Found 10000 images belonging to 200 classes.
Found 10000 validated image filenames belonging to 200 classes.
Found 90000 images belonging to 200 classes.
Found 10000 images belonging to 200 classes.
Found 10000 validated image filenames belonging to 200 classes.


In [36]:
from tensorflow.keras.applications import VGG19,ResNet50V2,InceptionV3
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Input, Dense, Dropout
from tensorflow.keras.layers import GlobalAveragePooling2D
def create_inceptionv3_model(input_shape=(299, 299, 3), num_classes=200):
    base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=input_shape)
    x = GlobalAveragePooling2D()(base_model.output)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    for layer in base_model.layers:
        layer.trainable = False
    return model

inceptionv3_model = create_inceptionv3_model()
inceptionv3_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

def create_resnet50v2_model(input_shape=(224, 224, 3), num_classes=200):
    base_model = ResNet50V2(weights='imagenet', include_top=False, input_shape=input_shape)
    x = GlobalAveragePooling2D()(base_model.output)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    for layer in base_model.layers:
        layer.trainable = False
    return model

resnet50v2_model = create_resnet50v2_model()
resnet50v2_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
def create_vgg19_model(input_shape=(224, 224, 3), num_classes=200):
    base_model = VGG19(weights='imagenet', include_top=False, input_shape=input_shape)
    x = Flatten()(base_model.output)
    predictions = Dense(num_classes, activation='softmax')(x)
    model = Model(inputs=base_model.input, outputs=predictions)
    for layer in base_model.layers:
        layer.trainable = False
    return model

vgg19_model = create_vgg19_model()
vgg19_model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
vgg19_model.summary()
resnet50v2_model.summary()
#inceptionv3_model.summary()

In [38]:
from tensorflow.keras.callbacks import EarlyStopping, CSVLogger, ReduceLROnPlateau
es_callback = EarlyStopping(monitor='val_loss', patience=25)
lr_callback = ReduceLROnPlateau(
    monitor='val_loss', factor=0.1, patience=5, verbose=1, mode='auto'
)
def get_steps_per_epoch(n_data, batch_size):
    """ Given the data size and batch size, gives the number of steps to travers the full dataset """
    if n_data%batch_size==0:
        return int(n_data/batch_size)
    else:
        return int(n_data*1.0/batch_size)+1

n_epochs=10
history = vgg19_model.fit(
    train_gen, validation_data=valid_gen, 
    steps_per_epoch=get_steps_per_epoch(int(0.9*(500*200)), batch_size), 
    validation_steps=get_steps_per_epoch(int(0.1*(500*200)), batch_size),
    epochs=n_epochs, callbacks=[es_callback, lr_callback]
)
if not os.path.exists('models'):
    os.mkdir("models")
vgg19_model.save(os.path.join('models', 'VGG19.h5'))

# Evaluate the model
test_res = vgg19_model.evaluate(test_gen, steps=get_steps_per_epoch(500*50, batch_size))

# Print the results as a dictionary {<metric name>: <value>}
test_res_dict = dict(zip(model.metrics_names, test_res))
print(test_res_dict)

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2398s[0m 1s/step - accuracy: 0.1344 - loss: 71.9380 - val_accuracy: 0.1792 - val_loss: 87.6753 - learning_rate: 0.0010
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 23us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - learning_rate: 0.0010
Epoch 3/10


  self.gen.throw(typ, value, traceback)


[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2401s[0m 1s/step - accuracy: 0.2048 - loss: 84.7710 - val_accuracy: 0.1995 - val_loss: 98.3656 - learning_rate: 0.0010
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - learning_rate: 0.0010
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2327s[0m 1s/step - accuracy: 0.2261 - loss: 91.3884 - val_accuracy: 0.2150 - val_loss: 105.6772 - learning_rate: 0.0010
Epoch 6/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 14us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - learning_rate: 0.0010
Epoch 7/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 1s/step - accuracy: 0.2447 - loss: 95.4093
Epoch 7: ReduceLROnPlateau reducing learning rate to 0.00010000000474974513.
[1m1875/1875[0m



[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m226s[0m 433ms/step - accuracy: 0.2683 - loss: 104.4380
{'loss': 103.57467651367188, 'compile_metrics': 0.26840001344680786}


In [41]:
n_epochs=10
history = resnet50v2_model.fit(
    train_gen, validation_data=valid_gen, 
    steps_per_epoch=get_steps_per_epoch(int(0.9*(500*200)), batch_size), 
    validation_steps=get_steps_per_epoch(int(0.1*(500*200)), batch_size),
    epochs=n_epochs, callbacks=[es_callback, lr_callback]
)
if not os.path.exists('models'):
    os.mkdir("models")
resnet50v2_model.save(os.path.join('models', 'RESNET50.h5'))


Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1153s[0m 614ms/step - accuracy: 0.0844 - loss: 11.0608 - val_accuracy: 0.0760 - val_loss: 11.3525 - learning_rate: 1.0000e-04
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1151s[0m 613ms/step - accuracy: 0.0822 - loss: 10.7372 - val_accuracy: 0.0695 - val_loss: 10.8958 - learning_rate: 1.0000e-04
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 28us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1153s[0m 614ms/step - accuracy: 0.0786 - loss: 10.4283 - val_accuracy: 0.0716 - val_loss: 10.6036 - learning_ra



In [42]:
# Evaluate the model
test_res = resnet50v2_model.evaluate(test_gen, steps=get_steps_per_epoch(500*50, batch_size))

# Print the results as a dictionary {<metric name>: <value>}
test_res_dict = dict(zip(model.metrics_names, test_res))
print(test_res_dict)

[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m99s[0m 190ms/step - accuracy: 0.1102 - loss: 9.2185 
{'loss': 9.21091079711914, 'compile_metrics': 0.11180000007152557}


In [43]:
n_epochs=10
history = inceptionv3_model.fit(
    train_gen_inceptionV3, validation_data=valid_gen_inceptionV3, 
    steps_per_epoch=get_steps_per_epoch(int(0.9*(500*200)), batch_size), 
    validation_steps=get_steps_per_epoch(int(0.1*(500*200)), batch_size),
    epochs=n_epochs, callbacks=[es_callback, lr_callback])
if not os.path.exists('models'):
    os.mkdir("models")
inceptionv3_model.save(os.path.join('models', 'InceptionV3.h5'))

Epoch 1/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1568s[0m 835ms/step - accuracy: 0.0569 - loss: 6.3051 - val_accuracy: 0.0479 - val_loss: 6.4472 - learning_rate: 1.0000e-04
Epoch 2/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 35us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 3/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1572s[0m 837ms/step - accuracy: 0.0568 - loss: 6.1339 - val_accuracy: 0.0524 - val_loss: 6.2429 - learning_rate: 1.0000e-04
Epoch 4/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33us/step - accuracy: 0.0000e+00 - loss: 0.0000e+00 - val_accuracy: 0.0000e+00 - val_loss: 0.0000e+00 - learning_rate: 1.0000e-04
Epoch 5/10
[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1569s[0m 835ms/step - accuracy: 0.0554 - loss: 6.0417 - val_accuracy: 0.0510 - val_loss: 6.1951 - learning_rate: 1.



In [44]:
# Evaluate the model
test_res = inceptionv3_model.evaluate(test_gen_inceptionV3, steps=get_steps_per_epoch(500*50, batch_size))

# Print the results as a dictionary {<metric name>: <value>}
test_res_dict = dict(zip(model.metrics_names, test_res))
print(test_res_dict)

[1m521/521[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m112s[0m 214ms/step - accuracy: 0.0800 - loss: 5.3361
{'loss': 5.319585800170898, 'compile_metrics': 0.08060000091791153}
