# **All Imports**

In [1]:
from keras.src.applications.resnet import ResNet50
from keras.src.applications.resnet import preprocess_input
from sklearn.neighbors import NearestNeighbors

from PIL import Image
from tqdm import tqdm
from os import listdir
from os.path import splitext

import matplotlib.pyplot as plt
import numpy as np
import pickle as pk
import os

## **ALL PATHs**

In [6]:
PATH_TO_DRIVE = '/content/drive/MyDrive'

DATASET_PATH = '/content/drive/MyDrive/dataset'
UNLABELED_DATASET_PATH = '/content/drive/MyDrive/unlabeled_dataset'
LABELED_TRAINING_PATH = '/content/drive/MyDrive/labeled_training'
TRAINING_PATH = '/content/drive/MyDrive/training'
##PATH_TO_FOUND_IMAGES = "/content/drive/MyDrive/reverse_search_result"

PATH_TO_FOUND_IMAGES = "/content/drive/MyDrive"

TEST_IMAGES = [
    '59109174.jpg.jpg',
    '59964405.jpg.jpg',
]

LOCAL_PATH = '../../Resources/training'


### *Function for Show All Images*

In [3]:
def show_images(images, figsize=(20,10), columns=5):
  plt.figure(figsize=figsize)
  for i, image in enumerate(images):
      plt.subplot(int(len(images) / columns + 1), columns, i + 1)
      plt.imshow(image)

In [7]:
#dataset_files_names = os.listdir(UNLABELED_DATASET_PATH)
training_files_names = os.listdir(LOCAL_PATH)
print(f"number of train images: {len(training_files_names)}")
#print(f"number of all images: {len(dataset_files_names)}")

number of train images: 1500


## Helpful Functions

In [12]:
def read_img_file(f):
    img = Image.open(f)
    if img.mode != 'RGB':
        img = img.convert('RGB')
    return img

def resize_img_to_array(img, img_shape):
    img_array = np.array(
        img.resize(
            img_shape,
            Image.Resampling.NEAREST
        )
    )
    return img_array

def get_features(img):
    img_width, img_height = 224, 224
    np_img = resize_img_to_array(img, img_shape=(img_width, img_height))
    expanded_img_array = np.expand_dims(np_img, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    X_conv = model.predict(preprocessed_img)
    image_features=X_conv[0]
    image_features /=  np.linalg.norm(image_features)
    return image_features

def generate_resnet_features(path_to_files_folder):
    all_image_features = []
    image_filenames = listdir(path_to_files_folder)
    image_ids = set(map(lambda el: splitext(el)[0], image_filenames))
    try:
       all_image_features = pk.load(open("resnet_image_features.pkl", "rb"))
    except (OSError, IOError) as e:
       print("file_not_found")

    def exists_in_all_image_features(image_id):
        for image in all_image_features:
            if image['image_id'] == image_id:
                return True
        return False

    def exists_in_image_folder(image_id):
        if image_id in image_ids:
                return True
        return False

    def sync_resnet_image_features():
        for_deletion=[]
        for i in range(len(all_image_features)):
            if not exists_in_image_folder(all_image_features[i]['image_id']):
                print("deleting "+ str(all_image_features[i]['image_id']))
                for_deletion.append(i)
        for i in reversed(for_deletion):
            del all_image_features[i]

    sync_resnet_image_features()
    for image_filename in tqdm(image_filenames):
        image_id=splitext(image_filename)[0]
        if exists_in_all_image_features(image_id):
            continue
        img_arr = read_img_file(path_to_files_folder + "/" + image_filename)
        image_features = get_features(img_arr)
        # print(image_filename)
        # print(image_features)
        all_image_features.append({'image_id': image_id,'features': image_features})
    pk.dump(all_image_features, open(f"resnet_image_features.pkl","wb"))

# **CREATING MODELS**
## *TRAINING DATASET*

In [None]:
model = ResNet50(weights='imagenet', include_top=False,
                 input_shape=(224, 224, 3), pooling='max')
generate_resnet_features(LOCAL_PATH)

## *WHOLE DATASET*


In [None]:
model = ResNet50(weights='imagenet', include_top=False,
                 input_shape=(224, 224, 3), pooling='max')
generate_resnet_features(UNLABELED_DATASET_PATH)

## *PREPARE DATA*

In [None]:
query_image_pillow = Image.open(f'{TRAINING_PATH}/{TEST_IMAGES[0]}').convert('RGB')
query_image_features = get_features(query_image_pillow)
show_images([np.array(query_image_pillow)])
print(query_image_features.shape)

# **SKLEARN**
## *Found nearest neighbor in training*

In [None]:
image_features = pk.load( open(f"resnet_image_features_for_1500.pkl", "rb"))
features = []
for image in image_features:
    features.append(np.array(image['features']))
features = np.array(features)
features = np.squeeze(features)

knn = NearestNeighbors(n_neighbors=20, algorithm='kd_tree', metric='l2')
knn.fit(features)
file_names = listdir(TRAINING_PATH)

indices = knn.kneighbors([query_image_features], return_distance=False)
found_images = []
for x in indices[0]:
    image = Image.open(TRAINING_PATH + "/" + file_names[x])
    image = image.resize((224, 224))
    found_images.append(np.array(image))

found_images = np.array(found_images)
#found_images = found_images.reshape((20, 224, 224, 3))
show_images(found_images)

## *Found nearest neighbor in whole dataset*

In [None]:
image_features = pk.load( open("resnet_image_features.pkl", "rb"))
features = []
for image in image_features:
    features.append(np.array(image['features']))
features = np.array(features)
features = np.squeeze(features)

knn = NearestNeighbors(n_neighbors=20, algorithm='kd_tree', metric='l2')
knn.fit(features)
file_names = listdir(TRAINING_PATH)

indices = knn.kneighbors([query_image_features], return_distance=False)
found_images = []
for x in indices[0]:
    image = Image.open(TRAINING_PATH + "/" + file_names[x])
    image = image.resize((224, 224))
    found_images.append(np.array(image))

found_images = np.array(found_images)
#found_images = found_images.reshape((20, 224, 224, 3))
show_images(found_images)