# 1. Imports

**1. Basic**

In [86]:
import tensorflow
import numpy as np
from numpy.linalg import norm
import os
from tqdm import tqdm, tqdm_notebook
import time
import math
import sys


**2. Models**

In [70]:
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input


**3. Data Generator**

In [56]:
from tensorflow.keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator

# 2. Pipeline

In [57]:
root_dir = 'input/caltech101/101_ObjectCategories'
extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']
filenames = sorted(get_file_list(root_dir))
batch_size = 64

# 3. Functions

In [58]:
def get_file_list(root_dir):
    file_list = []
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(ext in filename for ext in extensions):
                file_list.append(os.path.join(root, filename))
    return file_list

In [74]:
def extract_features_resnet(img_path, model):
    input_shape = (224, 224, 3)
    img = image.load_img(img_path,
                         target_size=(input_shape[0], input_shape[1]))
    
    img_array = image.img_to_array(img)
    
    expanded_img_array = np.expand_dims(img_array, axis=0)
    
    preprocessed_img = preprocess_input(expanded_img_array)
    
    features = model.predict(preprocessed_img)
    
    flattened_features = features.flatten()
    normalized_features = flattened_features / norm(flattened_features)
    return normalized_features

In [60]:
def convert_size(size_bytes):
    if size_bytes == 0:
        return "0B"
    size_name = ("B", "KB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB")
    i = int(math.floor(math.log(size_bytes, 1024)))
    p = math.pow(1024, i)
    s = round(size_bytes / p, 2)
    return "%s %s" % (s, size_name[i])

# 4. Model Benchmarking

**1. RESNET50**

**1.1 - Custom Function**

*Create Feature List from input data*

In [71]:
model =ResNet50(weights='imagenet',
                         include_top=False,
                         input_shape=(224, 224, 3),
                        pooling='max')

In [72]:
feature_list_resnet_custom = []

In [76]:
start_time = time.time()

for i in tqdm_notebook(range(len(filenames))):
    feature_list_resnet_custom.append(extract_features_resnet(filenames[i], model))
    
end_time = time.time()

HBox(children=(IntProgress(value=0, max=8677), HTML(value='')))




*Analysis*

a. Number of Images

In [77]:
number_of_images = len(feature_list_resnet_custom)

In [81]:
print("The number of images in the dataset:",number_of_images)

The number of images in the dataset: 8677


b. Number of features per image

In [82]:
number_of_features = []    

In [83]:
for i in feature_list_resnet_custom:
    number_of_features.append(i.size)
    

In [85]:
print("The number of features per image:", number_of_features[0])

The number of features per image: 2048


c. Size of the feature list

In [87]:
size_of_feature_list = sys.getsizeof(feature_list_resnet_custom)

In [90]:
print("The size of the feature list is:", convert_size(size_of_feature_list))

The size of the feature list is: 76.03 KB


d. Size of each image in the feature list

In [106]:
single_array = feature_list_resnet_custom[0]


In [118]:
print("Size of feature list of each image:",convert_size(sys.getsizeof(single_array)))

Size of feature list of each image: 8.09 KB


e. Size of each feature in feature list

In [111]:
feature = single_array[0]

In [112]:
feature

0.015260109

In [120]:
print("Size of each feature: ",convert_size(feature.itemsize))

Size of each feature:  4.0 B
