# Extracting and saving features.

In [None]:
import tensorflow

In [None]:
import numpy as np
from numpy.linalg import norm
import tqdm
import os
import time
from tensorflow.keras.preprocessing import image
from tensorflow.keras.applications.resnet50 import ResNet50, preprocess_input

In [None]:
import pickle

In [None]:
model = ResNet50(weights='imagenet', include_top=False,
                input_shape=(224,224,3))

In [None]:
def extract_features(img_path, model):
    input_shape = (224, 224, 3)
    img = image.load_img(img_path, target_size=(
        input_shape[0], input_shape[1]))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    normalized_features = flattened_features / norm(flattened_features)
    return normalized_features
    

In [None]:
features = extract_features('datasets/test_cat.jpg', model)
print(len(features))

In [None]:
extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']
def get_file_list(root_dir):
    file_list = []
    counter = 1
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(ext in filename for ext in extensions):
                file_list.append(os.path.join(root, filename))
                counter += 1
    return file_list

In [None]:
root_dir = 'datasets/caltech101'
filenames = sorted(get_file_list(root_dir))

In [None]:
print(filenames)

In [None]:
feature_list = []
for i in tqdm.tqdm_notebook(range(len(filenames))):
    feature_list.append(extract_features(filenames[i], model))

In [None]:
pickle.dump(feature_list, open('datasets/features-caltech101-resnet.pickle', 'wb'))

In [None]:
pickle.dump(filenames, open('datasets/filenames-caltech101.pickle', 'wb'))

# Realoading the seariliazed objects.

In [None]:
filenames = pickle.load(open('datasets/filenames-caltech101.pickle', 'rb'))

In [None]:
feature_list = pickle.load(open('datasets/features-caltech101-resnet.pickle', 'rb'))

# Querying the engine with images

In [None]:
from sklearn.neighbors import NearestNeighbors
neighbors = NearestNeighbors(n_neighbors=5, algorithm='brute', metric='euclidean').fit(feature_list)
distances, indices = neighbors.kneighbors([feature_list[1]])

In [None]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline

In [None]:
# Query image.
plt.imshow(mpimg.imread(filenames[1]))

In [None]:
# Print the second most similar image to query image
# since the most similar image would be query image itself
plt.imshow(mpimg.imread(filenames[indices[0][1]]))

In [None]:
# distances between query image and returned images.
for i in range(5):
    print(distances[0][i])

In [None]:
import random

In [None]:
def plot_images(similar_image_paths, distances):
    for i in range(len(similar_image_paths)):
        plt.figure()
        plt.imshow(mpimg.imread(similar_image_paths[i]))
        if i == 0:
            plt.title('Query image')
        else:
            plt.title(f"similarity with query image: {distances[i]}")

In [None]:
for i in range(6):
    random_image_index = random.randint(0, len(feature_list))
    distances, indices = neighbors.kneighbors([feature_list[random_image_index]])
    similar_image_paths = [filenames[random_image_index]] + [filenames[indices[0][i]] for i in range(1,len(indices[0]))]
    plot_images(similar_image_paths, distances[0])