In [1]:
import numpy as np
from numpy.linalg import norm
import pickle
from tqdm import tqdm, tqdm_notebook
import tensorflow as tf
import os
import time
from keras.preprocessing import image
from keras.applications.efficientnet import EfficientNetB7, preprocess_input


In [2]:
model = EfficientNetB7(weights='imagenet', include_top=False,
                 input_shape=(224, 224, 3))
def extract_features(img_path, model):
    input_shape = (224, 224, 3)
    img = image.load_img(img_path, target_size=(
        input_shape[0], input_shape[1]))
    img_array = image.img_to_array(img)
    expanded_img_array = np.expand_dims(img_array, axis=0)
    preprocessed_img = preprocess_input(expanded_img_array)
    features = model.predict(preprocessed_img)
    flattened_features = features.flatten()
    normalized_features = flattened_features / norm(flattened_features)
    return normalized_features

In [3]:
extensions = ['.jpg', '.JPG', '.jpeg', '.JPEG', '.png', '.PNG']
def get_file_list(root_dir):
    file_list = []
    counter = 1
    for root, directories, filenames in os.walk(root_dir):
        for filename in filenames:
            if any(ext in filename for ext in extensions):
                file_list.append(os.path.join(root, filename))
                counter += 1
    return file_list

In [4]:
root_dir = 'C:/Users/DELL/Desktop/TP2/unsplash-25k-photos'
filenames = sorted(get_file_list(root_dir))

In [5]:
%pip install -c conda-forge ipywidgets

Note: you may need to restart the kernel to use updated packages.


ERROR: Could not open requirements file: [Errno 2] No such file or directory: 'conda-forge'


In [27]:
%pip install --upgrade jupyter ipywidgets


Note: you may need to restart the kernel to use updated packages.


In [6]:
from tqdm.notebook import tqdm
feature_list = []
for i in tqdm(range(len(filenames))):
    feature_list.append(extract_features(filenames[i], model))

  0%|          | 0/24996 [00:00<?, ?it/s]



In [7]:
pickle.dump(feature_list, open("C:/Users/DELL/Desktop/TP2/data/feature_resent.pickle", 'wb'))


In [9]:
pickle.dump(filenames, open("C:/Users/DELL/Desktop/TP2/filenames_unsplash.pickle",'wb'))

In [6]:
filenames = pickle.load(open('C:/Users/DELL/Desktop/TP2/data/filenames_unsplash.pickle', 'rb'))
feature_list = pickle.load(open('C:/Users/DELL/Desktop/TP2/data/feature_resnet.pickle', 'rb'))


In [7]:
feature_vector_length = len(feature_list[0])
print("Feature vector length:", feature_vector_length)

total_feature_vectors = len(feature_list)
print("Total feature vectors:", total_feature_vectors)

Feature vector length: 125440
Total feature vectors: 24996


In [8]:
n_features = feature_vector_length = len(feature_list[0])
X = np.array(feature_list).reshape(-1,n_features)
print(X)

[[-0.00105543 -0.00108894 -0.00160639 ... -0.0015562  -0.00099878
   0.00120592]
 [-0.00067938 -0.00124502 -0.00158353 ... -0.00096752  0.00380869
  -0.00040059]
 [-0.00074034  0.00449317 -0.00100883 ... -0.00126282 -0.000101
   0.00382703]
 ...
 [-0.00137721 -0.00150601 -0.00143292 ... -0.00147265  0.00149508
   0.00943895]
 [-0.00114652  0.00023111  0.00126398 ...  0.00044331  0.0064351
   0.01319985]
 [-0.00065376 -0.00043297 -0.00145062 ... -0.00109635  0.00588039
   0.01067572]]


In [9]:
from sklearn.neighbors import NearestNeighbors
neighbors = NearestNeighbors(n_neighbors=5, algorithm='kd_tree', metric='euclidean').fit(X)
distances, indices = neighbors.kneighbors([X[0]])

In [10]:
import matplotlib.pyplot as plt
import matplotlib.image as mpimg
%matplotlib inline
plt.imshow(mpimg.imread(filenames[2167]))

ModuleNotFoundError: No module named 'matplotlib'

In [None]:
for i in range(5):
    print(distances[0][i])

In [None]:
def plot_images(image_paths, distances):
    fig, axes = plt.subplots(1, len(image_paths), figsize=(15, 5))
    for ax, image_path, distance in zip(axes, image_paths, distances):
        image = mpimg.imread(image_path)
        ax.imshow(image)
        ax.set_title(f"Distance: {distance:.2f}")
        ax.axis('off')
    plt.show()

In [None]:
import random
for i in range(6):
    random_image_index = random.randint(0,9144)
    distances, indices = neighbors.kneighbors([feature_list[random_image_index]])
    # don't take the first closest image as it will be the same image
    similar_image_paths = [filenames[random_image_index]] + [filenames[indices[0][i]] for i in range(1,5)]
    plot_images(similar_image_paths, distances[0])