In [184]:
# util for printing
from utils import score_fn 

In [1]:
import tensorflow_datasets as tf_ds
import tensorflow as tf
import matplotlib.pyplot as plt
import numpy as np


## Data preparation

In [7]:

# download the data
data = tf_ds.load('cifar10_1/v6', split='test')

In [8]:
data

<PrefetchDataset element_spec={'image': TensorSpec(shape=(32, 32, 3), dtype=tf.uint8, name=None), 'label': TensorSpec(shape=(), dtype=tf.int64, name=None)}>

#### labels

airplane : 0

automobile : 1

bird : 2

cat : 3

deer : 4

dog : 5

frog : 6

horse : 7

ship : 8

truck : 9



In [9]:
tensor_dict = next(iter(data))
print('Shape of image', tensor_dict['image'].shape)
print('Shape of label', tensor_dict['label'].shape)

Shape of image (32, 32, 3)
Shape of label ()


In [10]:
images, labels = [], []

for example in data:
    images.append(example['image'].numpy())
    labels.append(example['label'].numpy())
        

In [11]:
images = np.array(images)
labels = np.array(labels)

print('shape of images',images.shape)
print('shape of labels',labels.shape)

shape of images (2000, 32, 32, 3)
shape of labels (2000,)


In [12]:
# train test split

train_images = images[:1500]
test_images = images[1500:]
train_labels = labels[:1500]
test_labels = labels[1500:]


In [13]:
test_images.reshape(test_images.shape[0],-1).shape

(500, 3072)

## Nearest Neighbours

In [222]:
%%writefile './Nearest_Neighbours.py'

import numpy as np

class NearestNeighbours:

    def __init__(self, images, labels, norm='l1'):
        self.train_images = images.reshape(images.shape[0],-1)
        self.train_labels = labels
        self.norm=norm.lower()
    
    def distance_measure(self, train, test, i):
        if self.norm == 'l2':
            return np.sum(np.sqrt( np.square( train-test[i,:] )), axis=1)
        else:
            return np.sum(np.abs(train - test[i,:]), axis=1)


    def __call__(self, images):
        num_images = images.shape[0]
        test_images = images.reshape(num_images,-1)
        preds = np.zeros(num_images, dtype=np.int32)

        # find nearest training image for each of the test-images
        # using L1-distance (sum of absolute differences)
        # or     
        # using L2-distance (sum of squared differences)
        for i in range(num_images):
            distance = self.distance_measure(self.train_images, test_images, i)
            least_dist_id = np.argmin(distance)
            preds[i]=self.train_labels[least_dist_id]
        
        return preds

Overwriting ./Nearest_Neighbours.py


In [223]:
# create instances with L1 and L2 penalty 
neighbour_l1 = NearestNeighbours(train_images, train_labels,'L1')
neighbour_l2 = NearestNeighbours(train_images, train_labels,'L2')

In [224]:
%%time
predictions_l1 = neighbour_l1(test_images)

CPU times: total: 4.94 s
Wall time: 4.97 s


In [225]:
%%time
predictions_l2 = neighbour_l2(test_images)

CPU times: total: 44.9 s
Wall time: 45.5 s


In [226]:
print('Nearest Neighbour classifier with L1 penalty')
accuracy = np.sum(predictions_l1  == test_labels) / len(predictions_l1)
print('Model accuracy',accuracy)

# correct prediction (true positives)
t = np.sum(predictions_l1==test_labels)
f = len(predictions_l1) - t

print('Correct prediction ',t)
print('Incorrect prediction ',f)

Nearest Neighbour classifier with L1 penalty
Model accuracy 0.168
Correct prediction  84
Incorrect prediction  416


In [227]:
print('Nearest Neighbour classifier with L2 penalty')

score_fn(predictions_l2, test_labels)

Nearest Neighbour classifier with L2 penalty
Model accuracy 0.162
Correct prediction  81
Incorrect prediction  419


## K Nearest Neighbours

In [234]:
%%writefile K_Nearest_Neighbours.py

import numpy as np
from collections import Counter

class KNearestNeighbours:

    def __init__(self, images, labels, k=1, norm='l1'):
        self.train_images = images.reshape(images.shape[0],-1)
        self.train_labels = labels
        self.norm=norm.lower()
        self.k=k
    
    def distance_measure(self, train, test, i):
        if self.norm == 'l2':
            return np.sum(np.sqrt( np.square( train - test[i,:] )), axis=1)
        else:
            return np.sum(np.abs(train - test[i,:]), axis=1)


    def __call__(self, images):
        num_images = images.shape[0]
        test_images = images.reshape(num_images,-1)
        predictions = np.zeros(num_images, dtype=np.int32)

        # find nearest training image for each of the test-images
        # using L1-distance (sum of absolute differences)
        # or     
        # using L2-distance (sum of squared differences)
        for i in range(num_images):
            distance = self.distance_measure(self.train_images, test_images, i)
            ids = np.argsort(distance)

            preds = self.train_labels[ids]
            votes = Counter(preds[:self.k])
            winner = sorted(votes.items(), key=lambda x:x[1], reverse=True)[0]
            
            predictions[i] = winner[0]

        return predictions

Writing K_Nearest_Neighbours.py


In [229]:
%%time

# create instances with L1 norm and k = 6
knn_l1 = KNearestNeighbours(train_images, train_labels, k=6, norm='L1')

knn_predictions_l1 = knn_l1(test_images)

print('KNearest Neighbour classifier with L1 norm')
score_fn(knn_predictions_l1, test_labels)

KNearest Neighbour classifier with L1 norm
Model accuracy 0.196
Correct prediction  98
Incorrect prediction  402
CPU times: total: 5.08 s
Wall time: 5.21 s


In [236]:
%%time

# create instances with L2 norm 

knn_l2 = KNearestNeighbours(train_images, train_labels, k=3, norm='L2')

knn_predictions_l2 = knn_l2(test_images)

print('KNearest Neighbour classifier with L2 norm')
score_fn(knn_predictions_l2, test_labels)

KNearest Neighbour classifier with L2 norm
Model accuracy 0.166
Correct prediction  83
Incorrect prediction  417
CPU times: total: 46.8 s
Wall time: 51.3 s
