# CREATING AN INVERSE IMAGE SEARCH INDEX WITH DEEP LEARNING

In [1]:
#!pip install opencv-python

In [2]:
#!python.exe -m pip install --upgrade pip

In [3]:
import cv2
import numpy as np
from tensorflow.keras import Model
from tensorflow.keras.datasets import fashion_mnist 
from tensorflow.keras.layers import *

DEFINE BUID_AUTOENCODER(), WHICH INSTANTIATES THE AUTOENCODER.
FIRST, LET'S ASSEMBLE THE ENCODER PART:

In [4]:
def build_autoencoder(input_shape = (28, 28, 1), 
                      encoding_size = 32,
                     negative_slope = 0.3):
    inputs = Input(shape = input_shape)
    encoder = Conv2D(filters = 32,
                    kernel_size = (3, 3),
                    strides = 2,
                    padding = "same") (inputs)
    encoder = LeakyReLU(negative_slope = negative_slope) (encoder)
    encoder = BatchNormalization() (encoder)
    encoder = Conv2D(filters = 64,
                    kernel_size = (3, 3),
                    strides = 2,
                    padding = "same") (encoder)
    encoder = LeakyReLU(negative_slope = negative_slope) (encoder)
    encoder = BatchNormalization() (encoder)

    encoder_output_shape = encoder.shape
    encoder = Flatten() (encoder)
    encoder_output = Dense(units = encoding_size,
                          name = "encoder_output") (encoder)



    #DEFINE THE DECODER PORTION:
    target_shape = tuple(encoder_output_shape[1:])
    decoder = Dense(np.prod(target_shape)) (encoder_output)
    decoder = Reshape(target_shape) (decoder)

    decoder = Conv2DTranspose(filters = 64,
                             kernel_size = (3, 3),
                             strides = 2,
                             padding = "same") (decoder)
    decoder = LeakyReLU(negative_slope = negative_slope) (decoder)
    decoder = BatchNormalization() (decoder)

    decoder = Conv2DTranspose(filters = 32,
                             kernel_size = (3, 3),
                             strides = 2,
                             padding = "same") (decoder)
    decoder = LeakyReLU(negative_slope = negative_slope) (decoder)
    decoder = BatchNormalization() (decoder)

    decoder = Conv2DTranspose(filters = 1,
                             kernel_size = (3, 3),
                             padding = "same") (decoder)
    outputs = Activation(activation = "sigmoid",
                        name = "decoder_output") (decoder)


    # FINALLY, BUILD THE AUTOENCODER AND RETURN IT:
    autoencoder_model = Model(inputs, outputs)
    return autoencoder_model

DEFINE A FUNCTION THAT WILL OUTPUT THE EUCLIDEAN DISTANCE BETWEEN TWO VECTORS:

In [5]:
def euclidean_dist(x, y):
    return np.linalg.norm(x - y)

DEFINE THE SEARCH() FUNCTION, WHICH USES THE SEARCH INDEX(A DICTIONARY OF FEATURE VECTOR 
PAIRED WITH THEIR CORRESPONDING IMAGES) TO RETRIEVE THE MOST SIMILAR RESULTS
TO A QUERY VECTOR:

In [6]:
def search(query_vector, search_index, max_results = 16):
    vectors = search_index["features"]
    results = []

    for i in range(len(vectors)):
        distance = euclidean_dist(query_vector,
                                 vectors[i])

        results.append((distance,
                       search_index["images"] [i]))
        results = sorted(results,
                        key = lambda p: p[0]) [:max_results]
        return results

LOAD THE FASHION-MNIST DATASET. KEEP ONLY THE IMAGES:

In [7]:
(x_train, _), (x_test, _) = fashion_mnist.load_data()

NORMALIZE THE IMAGES AND ADD A COLOR CHANNEL DIMENSION:

In [8]:
x_train = x_train.astype("float32") / 255.0
x_test = x_test.astype("float32") / 255.0
x_train = np.expand_dims(x_train, axis = -1)
x_test = np.expand_dims(x_test, axis = -1)

BUILD THE AUTOENCODER AND COMPILE IT.

In [9]:
autoencoder = build_autoencoder()
autoencoder.compile(optimizer = "adam", loss = "mse")

TRAIN THE AUTOENCODER FOR 10 EPOCHS, ON BATCHES OF 512 IMAGES AT A TIME:

In [10]:
EPOCHS = 50
BATCH_SIZE = 512
autoencoder.fit(x_train, x_train,
               epochs = EPOCHS,
               batch_size = BATCH_SIZE,
               shuffle = True,
               validation_data = (x_test, x_test))

Epoch 1/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m103s[0m 803ms/step - loss: 0.0393 - val_loss: 0.1294
Epoch 2/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 775ms/step - loss: 0.0164 - val_loss: 0.0906
Epoch 3/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 773ms/step - loss: 0.0125 - val_loss: 0.0526
Epoch 4/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 778ms/step - loss: 0.0113 - val_loss: 0.0284
Epoch 5/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 777ms/step - loss: 0.0106 - val_loss: 0.0145
Epoch 6/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m92s[0m 784ms/step - loss: 0.0100 - val_loss: 0.0108
Epoch 7/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 774ms/step - loss: 0.0096 - val_loss: 0.0098
Epoch 8/50
[1m118/118[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m91s[0m 775ms/step - loss: 0.0094 - val_loss: 0.0096
Epoch 9/50
[1m

<keras.src.callbacks.history.History at 0x1751e2c85d0>

Create a new model, which we'll use as a feature extractor. It'll receive the same
inputs as the autoencoder and will output the encoding learned by the autoencoder.
In essence, we are using the encoder part of the autoencoder to turn images into
vectors:

In [13]:
fe_input = autoencoder.input
fe_output = autoencoder.get_layer("encoder_output").output
feature_extractor = Model(inputs = fe_input,
                         outputs = fe_output)

Create the search index, comprised of the feature vectors of X_train, along with
the original images (which must be reshaped back to 28x28 and rescaled to the
range [0, 255]):

In [15]:
train_vectors = feature_extractor.predict(x_train)

x_train = (x_train * 255.0).astype("uint8")
x_train = x_train.reshape((x_train.shape[0], 28, 28))
search_index = {
    "features": train_vectors,
    "images": x_train
}

[1m1875/1875[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 5ms/step


Compute the feature vectors of X_test, which we will use as our sample of query
images. Also, reshape X_test to 28x28 and rescale its values to the range [0, 255]:

In [16]:
test_vectors = feature_extractor.predict(x_test)

x_test = (x_test * 255.0).astype("uint8")
x_test = x_test.reshape((x_test.shape[0], 28, 28))

[1m313/313[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 6ms/step


Select 16 random test images (with their corresponding feature vectors) to use as
queries:

In [17]:
sample_indices = np.random.randint(0, x_test.shape[0], 16)
sample_images = x_test[sample_indices]
sample_queries = test_vectors[sample_indices]

Perform a search for each of the images in the test sample and save a side-to-side
visual comparison of the test query, along with the results fetched from the index
(which, remember, is comprised of the train data):

In [31]:
for i, (vector, image) in enumerate(zip(sample_queries, sample_images)):
    results = search(vector, search_index)
    results = [r[1] for r in results]
    if len(results) < 16:
        print(f"Warning: Only {len(results)} search results found for {i}, skipping.")
        continue  # Or break, depending on your workflow.

    query_image = cv2.resize(image, (28 * 4, 28 * 4),
                             interpolation=cv2.INTER_AREA)
    results_mosaic = np.vstack([
        np.hstack(results[0:4]),
        np.hstack(results[4:8]),
        np.hstack(results[8:12]),
        np.hstack(results[12:16])
    ])
    result_image = np.hstack([query_image, results_mosaic])
    cv2.imwrite(f"{i}.jpg", result_image)


