In [36]:
import torchvision
import torchvision.transforms as transforms
from torchvision.datasets import CIFAR10
from torch.utils.data import Dataset, DataLoader
import numpy as np

In [37]:
trainset  = CIFAR10(root='./data', train=True , download=True)
testset   = CIFAR10(root='./data', train=False, download=True)
classDict = {'plane':0, 'car':1, 'bird':2, 'cat':3, 'deer':4, 'dog':5, 'frog':6, 'horse':7, 'ship':8, 'truck':9}


# Separating trainset/testset data/label
Xtr  = trainset.data
Xte  = testset.data
Ytr  = np.array(trainset.targets)
Yte   = np.array(testset.targets)

# flatten out all images to be one-dimensional
Xtr_rows = Xtr.reshape(Xtr.shape[0], 32 * 32 * 3) # Xtr_rows becomes 50000 x 3072
Xte_rows = Xte.reshape(Xte.shape[0], 32 * 32 * 3) # Xte_rows becomes 10000 x 3072

Files already downloaded and verified
Files already downloaded and verified


In [38]:
class NearestNeighbor(object):
  def __init__(self):
    pass

  def train(self, X, y):
    self.Xtr = X
    self.ytr = y

  def predict(self, X):
    num_test = X.shape[0]

    Ypred = np.zeros(num_test, dtype = self.ytr.dtype)
    xrange=range
    
    for i in xrange(num_test):
      distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1)
      min_index = np.argmin(distances) 
      Ypred[i] = self.ytr[min_index] 
    return Ypred  


nn = NearestNeighbor()
nn.train(Xtr_rows, Ytr) 
NN_Yte_predict = nn.predict(Xte_rows) 


In [44]:
class KNearestNeighbor(object):
  def __init__(self):
    pass

  def train(self, X, y):
    self.Xtr = X
    self.ytr = y

  def predict(self, X, k):
    num_test = X.shape[0]

    Ypred = np.zeros(num_test, dtype = self.ytr.dtype)
    xrange=range
    
    for i in xrange(num_test):
      distances = np.sum(np.abs(self.Xtr - X[i,:]), axis = 1)
      sorted_distance = np.argsort(distances)[:k]
        
      if(np.size(np.where(np.bincount(self.ytr[sorted_distance]) > 1)[0])==0):
         Ypred[i] = self.ytr[sorted_distance[0]];
      else:
         Ypred[i] = np.bincount(self.ytr[sorted_distance]).argmax();
          
    return Ypred  


KNN_Yte_predict = np.zeros(shape=(2,Xte_rows.shape[0]));

index = 0;
for k in [3,5]:
    knn = KNearestNeighbor()
    knn.train(Xtr_rows, Ytr)
    KNN_Yte_predict[index] = knn.predict(Xte_rows,k) 
    index = index + 1

In [77]:
import pandas as pd
from sklearn.metrics import accuracy_score
from sklearn.metrics import f1_score

y_Actual =  Yte.tolist()
NN_y_Predicted = NN_Yte_predict.astype('int32').tolist()

print("Nearest Neighbour => ")
print()
data = {'y_Actual':  y_Actual,
        'NN_y_Predicted': NN_y_Predicted
        }

df = pd.DataFrame(data, columns=['y_Actual','NN_y_Predicted'])
confusion_matrix = pd.crosstab(df['y_Actual'], df['NN_y_Predicted'], rownames=['Actual'], colnames=['Predicted'])
print (confusion_matrix)
print ("Accuracy Score =>")
print(accuracy_score(y_Actual, NN_y_Predicted))
print ("F1 Score =>")
print(f1_score(y_Actual, NN_y_Predicted, average=None))
print()
print()



K = [3,5]
for i in range(KNN_Yte_predict.shape[0]):
    KNN_y_Predicted = KNN_Yte_predict[i].astype('int32').tolist()
    print("K - Nearest Neighbour => ")
    print("K = ", end ='')
    print(K[i])
    print()
    data = {'y_Actual':  y_Actual,
            'KNN_y_Predicted': KNN_y_Predicted
            }

    df = pd.DataFrame(data, columns=['y_Actual','KNN_y_Predicted'])
    confusion_matrix = pd.crosstab(df['y_Actual'], df['KNN_y_Predicted'], rownames=['Actual'], colnames=['Predicted'])
    print (confusion_matrix)
    print ("Accuracy Score =>" , end='')
    print(accuracy_score(y_Actual, KNN_y_Predicted))
    print()
    print ("F1 Score =>" , end='')
    print(f1_score(y_Actual, KNN_y_Predicted, average=None))
    print()
    print()


# KNN_Yte_predict = KNN_Yte_predict.astype('int32') 
# NN_Yte_predict

Nearest Neighbour => 

Predicted    0    1    2    3    4    5    6    7    8   9
Actual                                                    
0          524   24  155   24   31   18   42   12  161   9
1          189  126   99   68   81   71   53   30  236  47
2          233    2  313   88  140   65   52   20   76  11
3          196   17  161  171  106  146   86   28   80   9
4          178    6  259   66  246   80   51   16   94   4
5          195    7  138  149   82  242   60   37   82   8
6          114    3  233  114  154  124  165   10   78   5
7          190   16  167   99  160   65   65  133   88  17
8          273   45   64   32   36   27   17   16  481   9
9          219   89   76   64   60   74   53   42  232  91
Accuracy Score =>
0.2492
F1 Score =>
[0.31652069 0.18876404 0.23489681 0.1824     0.23473282 0.25313808
 0.20072993 0.19791667 0.36886503 0.15041322]


K - Nearest Neighbour => 
K = 3

Predicted    0    1    2    3    4    5    6    7    8   9
Actual                   