Euclidean Distance

In [None]:
import numpy as np
import matplotlib.pyplot as plt
import pickle
import os

from PIL import Image
import tensorflow as tf
from tensorflow import keras
from keras.datasets import cifar10

In [None]:
def load_batch(file_path, label_key='labels'):
    """Load a batch of CIFAR data"""
    with open(file_path, 'rb') as f:
        d = pickle.load(f, encoding='bytes')
        # decode utf8
        d_decoded = {}
        for k, v in d.items():
            d_decoded[k.decode('utf8')] = v
        d = d_decoded
    data = d['data']
    labels = d[label_key]

    data = data.reshape(data.shape[0], 3, 32, 32).transpose(0, 2, 3, 1)
    return data, labels


def load_data(path):
    """Load CIFAR10 dataset"""
    num_train_samples = 50000

    x_train_local = np.empty((num_train_samples, 32, 32, 3), dtype='uint8')
    y_train_local = np.empty((num_train_samples,), dtype='uint8')

    for i in range(1, 6):
        batch_file_path = os.path.join(path, 'data_batch_' + str(i))
        (x_train_local[(i - 1) * 10000: i * 10000, :, :, :],
         y_train_local[(i - 1) * 10000: i * 10000]) = load_batch(batch_file_path)
        
    fpath = os.path.join(path, 'test_batch')
    x_test_local, y_test_local = load_batch(fpath)

    y_train_local = np.reshape(y_train_local, (len(y_train_local), 1))
    y_test_local = np.reshape(y_test_local, (len(y_test_local), 1))
    
    # x_train_local = x_train_local.transpose(0, 2, 3, 1)
    # x_test_local = x_test_local.transpose(0, 2, 3, 1)

    return (x_train_local, y_train_local), (x_test_local, y_test_local)

In [None]:
# (x_train, y_train), (x_test, y_test) = cifar10.load_data()
path = 'cifar-10-batches-py'
(x_train, y_train), (x_test, y_test) = load_data(path)

print("Train data (x_train): ", x_train.shape)
print("Train labels (y_train): ", y_train.shape)
print("Test data (x_test): ", x_test.shape)
print("Test labels (y_test): ", y_test.shape)

In [None]:
label = ["" for _ in range(10)]
label[0]="airplane"
label[1]="automobile"
label[2]="bird"
label[3]="cat"
label[4]="deer"
label[5]="dog"
label[6]="frog"
label[7]="horse"
label[8]="ship"
label[9]="truck"

In [None]:
num_plot = 5
figure, axes = plt.subplots(1, num_plot)
for i, ax in enumerate(axes.flat):
    img_id = np.random.randint(0, x_train.shape[0])
    ax.imshow(x_train[img_id])
    ax.set_title(label[y_train[img_id][0]])


figure.tight_layout()
figure.set_facecolor('w')
plt.show()

In [None]:
def filter_class(class_name):
    """Utility function for filtering data of specified class"""
    idx = (y_train == label.index(class_name)).reshape(x_train.shape[0])
    x_train_filter = x_train[idx]
    y_train_filter = y_train[idx]
    
    idx = (y_test == label.index(class_name)).reshape(x_test.shape[0])
    x_test_filter = x_test[idx]
    y_test_filter = y_test[idx]
    
    return (x_train_filter, y_train_filter), (x_test_filter, y_test_filter)

In [None]:
def rgb2gray(rgb):
    return np.dot(rgb[...,:3], [0.2989, 0.5870, 0.1140])

In [None]:
def euclidean_distance(image1, image2):
    gray1 = rgb2gray(image1)
    gray2 = rgb2gray(image2)
    
    distance = gray1 - gray2
    distance_squared = distance ** 2
    
    return np.sqrt(np.sum(distance_squared))

In [None]:
num_samples = x_test.shape[0]
take_one_img = np.random.randint(0, num_samples)

distances = [euclidean_distance(x_test[take_one_img], x_test[i]) for i in range(num_samples)]
min_distance_ids = np.argsort(distances)

num_plot = 6
figure, axes = plt.subplots(1, num_plot)

for i, ax in enumerate(axes.flat):
    img_id = min_distance_ids[i]
    ax.imshow(x_test[img_id])
    ax.set_title(label[y_test[img_id][0]] + '\n' +
                 str(distances[img_id]))


figure.tight_layout()
figure.set_facecolor('w')
figure.subplots_adjust(top=1.0, right=2.0)
plt.show()
