In [1]:
dataset_root = "./dataset/"
images_dir = dataset_root + "complete/"

In [2]:
from tensorflow import keras
from keras.applications.vgg16 import VGG16, preprocess_input
from data_loader import NoLabelDataLoader
import numpy as np
import os
from tqdm import tqdm
import gc

batch_size = 128
image_size = (224, 224)
loader = NoLabelDataLoader(images_dir, batch_size, image_size)


feature_extractor = VGG16()
feature_extractor = keras.Model(inputs=feature_extractor.input, outputs=feature_extractor.get_layer("fc1").output)
feature_extractor = keras.Sequential([feature_extractor, keras.layers.Flatten()])
feature_extractor.summary()

images_count = len(os.listdir(images_dir))
filenames = []
extracted_features = np.zeros((images_count, feature_extractor.output_shape[1]))

batch_num = loader.number_of_batches()

for i in tqdm(range(loader.number_of_batches())):
    if i % 50 == 0:
        del feature_extractor
        keras.backend.clear_session()
        gc.collect()
        feature_extractor = VGG16()
        feature_extractor = keras.Model(inputs=feature_extractor.input, outputs=feature_extractor.get_layer("fc1").output)
        feature_extractor = keras.Sequential([feature_extractor, keras.layers.Flatten()])
    images, names = loader.get_batch(i, preprocessing=preprocess_input)
    extracted_features[i * batch_size: i * batch_size + len(images), :] = feature_extractor.predict(images, verbose=False, batch_size=16)
    for f in names:
        filenames.append(f)

2023-12-31 14:09:21.972343: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-31 14:09:21.972381: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-31 14:09:21.978029: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-31 14:09:21.993975: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-31 14:09:24.896116: I external/local_xla/xla/

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 model (Functional)          (None, 4096)              117479232 
                                                                 
 flatten (Flatten)           (None, 4096)              0         
                                                                 
Total params: 117479232 (448.15 MB)
Trainable params: 117479232 (448.15 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________


  0%|          | 0/1020 [00:00<?, ?it/s]2023-12-31 14:09:32.489274: W external/local_tsl/tsl/framework/cpu_allocator_impl.cc:83] Allocation of 411041792 exceeds 10% of free system memory.
2023-12-31 14:09:35.243145: I external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:454] Loaded cuDNN version 8904
2023-12-31 14:09:38.407624: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.74GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-12-31 14:09:38.408121: W external/local_tsl/tsl/framework/bfc_allocator.cc:296] Allocator (GPU_0_bfc) ran out of memory trying to allocate 1.74GiB with freed_by_count=0. The caller indicates that this is not a failure, but this may mean that there could be performance gains if more memory were available.
2023-12-31 14:09:38.458162: W external/local_tsl/tsl/framework/bfc_alloca

In [3]:
np.savetxt('extracted_features_vgg16', extracted_features)

In [4]:
file = open('vgg16_similarity_filenames.txt','w')
for name in filenames:
	file.write(name + "\n")
file.close()

In [5]:
import cv2 as cv
from scipy.spatial.distance import cdist

query_image = cv.imread("ramen.jpg")[:, :, ::-1]
query_image = cv.resize(query_image, image_size)
query_image = preprocess_input(query_image)
img_array = np.zeros((1, image_size[0], image_size[0], 3))
img_array[0] = query_image
extracted_query = feature_extractor.predict(img_array)
print(extracted_query.shape)

distances = cdist(extracted_query, extracted_features)

(1, 4096)


In [6]:
dist = np.argsort(distances[0])
dist = dist[-10:]
print(dist)
for d in dist:
    print(filenames[d])

[117472 127436  71571  20254  99680  95467 108251  28648 108372  15261]
train_113664.jpg
train_059952.jpg
train_095518.jpg
train_016896.jpg
train_036300.jpg
train_049194.jpg
train_067421.jpg
train_059940.jpg
train_062611.jpg
train_062437.jpg


In [1]:
import numpy as np
import pandas as pd

extracted_features = np.loadtxt('extracted_features_efficientnet')
filenames = pd.read_csv("./efficientnet_similarity_filenames.txt", header=None).iloc[:, 0].values

extracted_features.shape, filenames

((130469, 48),
 array(['train_104651.jpg', 'train_054035.jpg', 'train_063177.jpg', ...,
        'train_118399.jpg', 'train_095549.jpg', 'train_077670.jpg'],
       dtype=object))

In [2]:
from tensorflow import keras
from keras.applications.efficientnet_v2 import EfficientNetV2B0, preprocess_input

feature_extractor = EfficientNetV2B0()
feature_extractor = keras.Model(inputs=feature_extractor.input, outputs=feature_extractor.get_layer("block6c_se_reduce").output)
feature_extractor = keras.Sequential([feature_extractor, keras.layers.Flatten()])

import cv2 as cv
from scipy.spatial.distance import cdist
image_size = (224, 224)

query_image = cv.imread("macaron.jpeg")[:, :, ::-1]
query_image = cv.resize(query_image, image_size)
img_array = np.zeros((1, image_size[0], image_size[1], 3))
img_array[0] = query_image
extracted_query = feature_extractor.predict(preprocess_input(img_array))
print(extracted_query.shape)



2023-12-31 14:01:58.665941: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2023-12-31 14:01:58.666004: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2023-12-31 14:01:58.861013: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2023-12-31 14:01:59.253872: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2023-12-31 14:02:04.296106: I external/local_xla/xla/

(1, 48)


In [20]:
query_image = cv.imread("macaron.jpeg")[:, :, ::-1]
query_image = cv.resize(query_image, image_size)
img_array = np.zeros((1, image_size[0], image_size[1], 3))
img_array[0] = query_image
extracted_query = feature_extractor.predict(preprocess_input(img_array))
print(extracted_query.shape)
#distances = cdist(extracted_query, extracted_features)[0]


(1, 4096)


In [11]:
dist = np.argsort(distances)[0:10]
print(dist)
cv.destroyAllWindows()
for d in dist:
    im = cv.imread(images_dir + filenames[d])
    cv.imshow(filenames[d], im)
    cv.waitKey()

[ 77332  35074  23840  69374  12560  74015 116831  76306   9452  55098]


In [18]:
keras.applications.EfficientNetV2B0().summary()

Model: "efficientnetv2-b0"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 input_3 (InputLayer)        [(None, 224, 224, 3)]        0         []                            
                                                                                                  
 rescaling_2 (Rescaling)     (None, 224, 224, 3)          0         ['input_3[0][0]']             
                                                                                                  
 normalization_2 (Normaliza  (None, 224, 224, 3)          0         ['rescaling_2[0][0]']         
 tion)                                                                                            
                                                                                                  
 stem_conv (Conv2D)          (None, 112, 112, 32)         864       ['normalizatio

In [37]:
cv.destroyAllWindows()

In [22]:
from sklearn.preprocessing import normalize

normalized_features = normalize(extracted_features)
normalized_extracted = normalize(extracted_query)

distances = cdist(normalized_extracted, normalized_features)[0]
dist = np.argsort(distances)[0:10]
print(dist)


[ 1567 32194 32730 78419 34782 14456 97895 85115 45469  5347]


In [25]:
cv.destroyAllWindows()
for d in dist:
    im = cv.imread(images_dir + filenames[d])
    cv.imshow(filenames[d], im)
    cv.waitKey()

In [26]:
cv.destroyAllWindows()