In [1]:
import tensorflow
from tensorflow.keras.preprocessing import image
from tensorflow.keras.layers import GlobalMaxPooling2D
from tensorflow.keras.applications.resnet50 import ResNet50,preprocess_input
import numpy as np
from numpy.linalg import norm
import os
from tqdm import tqdm
import pickle
import PIL



In [2]:

model = ResNet50(weights='imagenet',include_top=False,input_shape=(224,224,3))
model.trainable = False

model = tensorflow.keras.Sequential([
    model,
    GlobalMaxPooling2D()
])

#print(model.summary())

def extract_features(img_path,model):
    img = tensorflow.keras.preprocessing.image.load_img(img_path,target_size=(224,224))
    #Img to array
    img_array = image.img_to_array(img)
    #keras need data in  batches, so converting single image batch
    expanded_img_array = np.expand_dims(img_array, axis=0)
    # Resnet preprcess the image before making any predictions
    preprocessed_img = preprocess_input(expanded_img_array)
    # Predicting the by using model
    result = model.predict(preprocessed_img).flatten()
    # Noemalizing
    normalized_result = result / norm(result)

    return normalized_result



Downloading data from https://storage.googleapis.com/tensorflow/keras-applications/resnet/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5


In [3]:
!wget --header="Host: storage.googleapis.com" --header="User-Agent: Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.0.0 Safari/537.36" --header="Accept: text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9" --header="Accept-Language: en-US,en;q=0.9" --header="Referer: https://www.kaggle.com/" "https://storage.googleapis.com/kaggle-data-sets/175990/396802/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20221001%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20221001T101507Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=4f6fbe212a3d8a9493c732ba59f7df33293c236f0004050f5e8f4f2bc1f77040ab7fa49eac2eb05cfdf262a2e98dd8fd29aa9bcf6a9a2c6ff95f1e8edd9d7d8a22b9b76bfb84d2f31cc0a85836045504ce55ffcb92a7e947baa3a50e5cc3e471210c545a28be92fde909c753dae0d0522e7b6f0565c629129b71234151610488727e86a972732b92ef4ceca53439ddfce62d83227f142efae187015788e579650f11da9d75146dd90cb366644fab99e46545421a385f757ff37b58d540bd10593fc6d0187a64cf63c198300f89c462bf41043bbfb57480ae787c95dd3df856b61200fb3967ea71ffa7fc36de974d126fd1c7c7c2d2427db27a13b0c3e1d89ff3" -c -O 'archive.zip'

--2022-10-01 10:15:29--  https://storage.googleapis.com/kaggle-data-sets/175990/396802/bundle/archive.zip?X-Goog-Algorithm=GOOG4-RSA-SHA256&X-Goog-Credential=gcp-kaggle-com%40kaggle-161607.iam.gserviceaccount.com%2F20221001%2Fauto%2Fstorage%2Fgoog4_request&X-Goog-Date=20221001T101507Z&X-Goog-Expires=259200&X-Goog-SignedHeaders=host&X-Goog-Signature=4f6fbe212a3d8a9493c732ba59f7df33293c236f0004050f5e8f4f2bc1f77040ab7fa49eac2eb05cfdf262a2e98dd8fd29aa9bcf6a9a2c6ff95f1e8edd9d7d8a22b9b76bfb84d2f31cc0a85836045504ce55ffcb92a7e947baa3a50e5cc3e471210c545a28be92fde909c753dae0d0522e7b6f0565c629129b71234151610488727e86a972732b92ef4ceca53439ddfce62d83227f142efae187015788e579650f11da9d75146dd90cb366644fab99e46545421a385f757ff37b58d540bd10593fc6d0187a64cf63c198300f89c462bf41043bbfb57480ae787c95dd3df856b61200fb3967ea71ffa7fc36de974d126fd1c7c7c2d2427db27a13b0c3e1d89ff3
Resolving storage.googleapis.com (storage.googleapis.com)... 172.253.115.128, 172.253.122.128, 172.217.1.208, ...
Connecting to storage.

In [None]:
!unzip "/content/archive.zip"

In [5]:
filenames = []

for file in os.listdir('images'):
    filenames.append(os.path.join('images',file))

In [6]:
print(len(filenames))

44441


In [7]:


feature_list = []

for file in tqdm(filenames):
    feature_list.append(extract_features(file,model))

pickle.dump(feature_list,open('embeddings.pkl','wb'))
pickle.dump(filenames,open('filenames.pkl','wb'))

100%|██████████| 44441/44441 [42:57<00:00, 17.24it/s]
