<a href="https://colab.research.google.com/github/jangedoo/image-similarity-demo/blob/master/notebooks/Image_Search_Fine_tuning.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import tensorflow_addons as tfa
import tensorflow_datasets as tfds
import tensorflow_hub as hub
import functools
import plotly.express as px
import pandas as pd

In [2]:
ds, ds_info = tfds.load("cars196", as_supervised=True, with_info=True)

2022-04-03 21:19:07.441077: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:26:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-03 21:19:07.469252: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:26:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-03 21:19:07.469597: I tensorflow/stream_executor/cuda/cuda_gpu_executor.cc:923] could not open file to read NUMA node: /sys/bus/pci/devices/0000:26:00.0/numa_node
Your kernel may have been built without NUMA support.
2022-04-03 21:19:07.470201: I tensorflow/core/platform/cpu_feature_guard.cc:142] This TensorFlow binary is optimized with oneAPI Deep Neural Network Library (oneDNN) to use the following CPU instructions in performance-critical operations:  SSE4.1 SSE4.2 AVX AVX2 FMA
To enable them in other operations, rebuild TensorFlow wi

In [3]:
int_to_class_label = ds_info.features['label'].int2str

In [4]:
CLASSES_TO_CONSIDER = [0, 1, 5, 10, 20, 30, 40, 50, 60, 70, 80, 90, 100, 105, 110, 115, 120, 125, 130, 135, 140]
IMG_WIDTH = IMG_HEIGHT = 224

def filter_by_classes(img, label):
    bools = tf.equal(label, CLASSES_TO_CONSIDER)
    return tf.reduce_any(bools)

def normalize_img(img, label):
    img = tf.image.resize_with_crop_or_pad(img, target_height=IMG_HEIGHT, target_width=IMG_WIDTH)
    img = tf.cast(img, tf.float32) / 255.0
    return (img, label)

train_ds, test_ds = ds['train'], ds['test']

train_ds = train_ds.filter(filter_by_classes).map(normalize_img).shuffle(1024).prefetch(tf.data.AUTOTUNE)
test_ds = test_ds.filter(filter_by_classes).map(normalize_img).prefetch(tf.data.AUTOTUNE)

In [5]:
x_train, y_train = [], []
for img, label in train_ds.cache().as_numpy_iterator():
    x_train.append(img)
    y_train.append(label)
x_train = np.array(x_train)    
    
x_test, y_test = [], []
for img, label in test_ds.cache().as_numpy_iterator():
    x_test.append(img)
    y_test.append(label)
    
x_test = np.array(x_test)
    
print(len(x_train), len(y_train), len(x_test), len(y_test))

2022-04-03 21:19:08.757085: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)
2022-04-03 21:19:18.767143: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:175] Filling up shuffle buffer (this may take a while): 605 of 1024
2022-04-03 21:19:23.247193: I tensorflow/core/kernels/data/shuffle_dataset_op.cc:228] Shuffle buffer filled.


838 838 831 831


# Extract vectors and plot the embeddings

In [6]:
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE

def plot_embeddings(features, labels):    
    pca = TSNE(n_components=2, learning_rate='auto', init='pca')
    reduced_features = pca.fit_transform(features)
    str_labels = list(map(int_to_class_label, labels))
    fig = px.scatter(x=reduced_features[:,0], y=reduced_features[:,1], color=str_labels, symbol=labels)
    fig.show()

In [96]:
vectorizer = tf.keras.Sequential([
    hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/5", trainable=False)
])
vectorizer.build([None, IMG_HEIGHT, IMG_WIDTH, 3])

In [97]:
vectorizer.summary()

Model: "sequential_16"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
keras_layer_16 (KerasLayer)  (None, 2048)              23564800  
Total params: 23,564,800
Trainable params: 0
Non-trainable params: 23,564,800
_________________________________________________________________


In [98]:
pre_trained_features = vectorizer.predict(x_test)

In [99]:
plot_embeddings(features=pre_trained_features, labels=y_test)


The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



In [100]:
from sklearn.neighbors import NearestNeighbors
def get_knn(features):
    knn = NearestNeighbors(n_neighbors=5, metric="cosine")
    knn.fit(features)
    return knn

import ipywidgets as w
def show_similar_images(images, labels, vectorizer, knn, start_image_idx, n_inputs=5, n_neighbors=10):
    input_images = images[start_image_idx:start_image_idx+n_inputs]
    features = vectorizer.predict(input_images)
    knn_output = knn.kneighbors(features, n_neighbors=n_neighbors)
    
    images_with_distances_and_nbors = zip(input_images, *knn_output)
    
    fig, axes = plt.subplots(len(input_images), n_neighbors+1, figsize=(20, len(input_images)*4))
    
    for i, (image, distances, nbors) in enumerate(images_with_distances_and_nbors):
        for j in range(n_neighbors+1):
            ax = axes[i, j]
            img = (image if j==0 else images[nbors[j-1]])
            if j == 0:
                ax.set_title("Input Image")
            else:
                ax.set_title(f"Sim: {1-distances[j-1]:.2f}")
                ax.set_xlabel(f"lbl: {labels[nbors[j-1]]}")
            ax.get_xaxis().set_ticks([])
            ax.get_yaxis().set_ticks([])
            ax.imshow(img)

In [101]:
pretrained_knn = get_knn(features=pre_trained_features)
w.interact(show_similar_images, images=w.fixed(x_test), labels=w.fixed(y_test), vectorizer=w.fixed(vectorizer), knn=w.fixed(pretrained_knn),
    start_image_idx=w.IntSlider(max=len(x_test)-1, continuous_update=False),
    n_inputs=w.IntSlider(min=2, value=5, max=10, continuous_update=False),
    n_neighbors=w.IntSlider(min=2, value=5, max=10, continuous_update=False),
)

interactive(children=(IntSlider(value=0, continuous_update=False, description='start_image_idx', max=830), Int…

<function __main__.show_similar_images(images, labels, vectorizer, knn, start_image_idx, n_inputs=5, n_neighbors=10)>

The embeddings are overlapping and are not very separated. For this demo, we want each cluster to be formed for each car label "make model year"

 # Finetune the model

In [118]:
tuned_vectorizer = tf.keras.Sequential([
    tf.keras.layers.RandomFlip("horizontal_and_vertical"),
    tf.keras.layers.RandomRotation(0.2),
    hub.KerasLayer("https://tfhub.dev/google/imagenet/resnet_v2_50/feature_vector/5", trainable=False),
    tf.keras.layers.Dense(384, activation="relu"),
    tf.keras.layers.Dropout(0.3),
    tf.keras.layers.Dense(256, activation=None), # No activation on final dense layer
    tf.keras.layers.Lambda(lambda x: tf.math.l2_normalize(x, axis=1)) # L2 normalize embeddings
])
tuned_vectorizer.build([None, IMG_HEIGHT, IMG_WIDTH, 3])

In [119]:
tuned_vectorizer.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
random_flip_18 (RandomFlip)  (None, 224, 224, 3)       0         
_________________________________________________________________
random_rotation_18 (RandomRo (None, 224, 224, 3)       0         
_________________________________________________________________
keras_layer_19 (KerasLayer)  (None, 2048)              23564800  
_________________________________________________________________
dense_17 (Dense)             (None, 384)               786816    
_________________________________________________________________
dropout (Dropout)            (None, 384)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 256)               98560     
_________________________________________________________________
lambda_18 (Lambda)           (None, 256)             

In [124]:
initial_lr = 1e-3
epochs = 100
bs = 128
decay = initial_lr / epochs

def lr_scheduler(epoch, lr):
    return lr * 1 / (1 + decay * epoch)

tuned_vectorizer.compile(optimizer=tf.keras.optimizers.RMSprop(learning_rate=initial_lr),
                         loss=tfa.losses.TripletHardLoss())

history = tuned_vectorizer.fit(x=x_train, y=np.array(y_train), 
                               callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)],
                               epochs=epochs, 
                               batch_size=bs, 
                               validation_data=(x_test, np.array(y_test)),
                               validation_batch_size=bs,
                              )

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100
Epoch 23/100
Epoch 24/100
Epoch 25/100
Epoch 26/100
Epoch 27/100
Epoch 28/100
Epoch 29/100
Epoch 30/100
Epoch 31/100
Epoch 32/100
Epoch 33/100
Epoch 34/100
Epoch 35/100
Epoch 36/100
Epoch 37/100
Epoch 38/100
Epoch 39/100
Epoch 40/100
Epoch 41/100
Epoch 42/100
Epoch 43/100
Epoch 44/100
Epoch 45/100
Epoch 46/100
Epoch 47/100
Epoch 48/100
Epoch 49/100
Epoch 50/100
Epoch 51/100
Epoch 52/100
Epoch 53/100
Epoch 54/100
Epoch 55/100
Epoch 56/100
Epoch 57/100
Epoch 58/100
Epoch 59/100
Epoch 60/100
Epoch 61/100
Epoch 62/100
Epoch 63/100
Epoch 64/100
Epoch 65/100
Epoch 66/100
Epoch 67/100
Epoch 68/100
Epoch 69/100
Epoch 70/100
Epoch 71/100
Epoch 72/100
Epoch 73/100
Epoch 74/100
Epoch 75/100
Epoch 76/100
Epoch 77/100
Epoch 78

Epoch 84/100
Epoch 85/100
Epoch 86/100
Epoch 87/100
Epoch 88/100
Epoch 89/100
Epoch 90/100
Epoch 91/100
Epoch 92/100
Epoch 93/100
Epoch 94/100
Epoch 95/100
Epoch 96/100
Epoch 97/100
Epoch 98/100
Epoch 99/100
Epoch 100/100


In [125]:
frozen_features = tuned_vectorizer.predict(x_test)
frozen_features.shape

(831, 256)

In [126]:
plot_embeddings(features=frozen_features, labels=y_test)


The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



In [127]:
frozen_knn = get_knn(features=frozen_features)
w.interact(show_similar_images, images=w.fixed(x_test), labels=w.fixed(y_test), vectorizer=w.fixed(tuned_vectorizer), knn=w.fixed(frozen_knn),
    start_image_idx=w.IntSlider(max=len(x_test)-1, continuous_update=False),
    n_inputs=w.IntSlider(min=2, value=5, max=10, continuous_update=False),
    n_neighbors=w.IntSlider(min=2, value=5, max=10, continuous_update=False),
)

interactive(children=(IntSlider(value=0, continuous_update=False, description='start_image_idx', max=830), Int…

<function __main__.show_similar_images(images, labels, vectorizer, knn, start_image_idx, n_inputs=5, n_neighbors=10)>

In [128]:
tuned_vectorizer.layers[2].trainable = True
tuned_vectorizer.compile(optimizer=tf.keras.optimizers.SGD(learning_rate=1e-5),
                         loss=tfa.losses.TripletHardLoss())
tuned_vectorizer.summary()

Model: "sequential_19"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
random_flip_18 (RandomFlip)  (None, 224, 224, 3)       0         
_________________________________________________________________
random_rotation_18 (RandomRo (None, 224, 224, 3)       0         
_________________________________________________________________
keras_layer_19 (KerasLayer)  (None, 2048)              23564800  
_________________________________________________________________
dense_17 (Dense)             (None, 384)               786816    
_________________________________________________________________
dropout (Dropout)            (None, 384)               0         
_________________________________________________________________
dense_18 (Dense)             (None, 256)               98560     
_________________________________________________________________
lambda_18 (Lambda)           (None, 256)             

In [129]:
epochs = 20
bs = 16
history = tuned_vectorizer.fit(x=x_train, y=np.array(y_train),
                               callbacks=[tf.keras.callbacks.EarlyStopping(patience=5)],
                               epochs=epochs, 
                               batch_size=bs, 
                               validation_data=(x_test, np.array(y_test)),
                               validation_batch_size=bs,
                              )

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20


In [130]:
tuned_vectorizer.evaluate(x_test, np.array(y_test))



1.2544819116592407

In [131]:
fine_tuned_features = tuned_vectorizer.predict(x_test)

In [132]:
plot_embeddings(features=fine_tuned_features, labels=y_test)


The PCA initialization in TSNE will change to have the standard deviation of PC1 equal to 1e-4 in 1.2. This will ensure better convergence.



In [133]:
fine_tuned_knn = get_knn(features=fine_tuned_features)
w.interact(show_similar_images, images=w.fixed(x_test), labels=w.fixed(y_test), vectorizer=w.fixed(tuned_vectorizer), knn=w.fixed(fine_tuned_knn),
    start_image_idx=w.IntSlider(max=len(x_test)-1, continuous_update=False),
    n_inputs=w.IntSlider(min=2, value=5, max=10, continuous_update=False),
    n_neighbors=w.IntSlider(min=2, value=5, max=10, continuous_update=False),
)

interactive(children=(IntSlider(value=0, continuous_update=False, description='start_image_idx', max=830), Int…

<function __main__.show_similar_images(images, labels, vectorizer, knn, start_image_idx, n_inputs=5, n_neighbors=10)>