# KNN

## Initialization

In [122]:
import numpy as np
from scipy.stats import mode
from sklearn.metrics import confusion_matrix

## Define the Class

In [123]:
class KNN(object):
    def __init__(self):
        self.x_train = None
        self.label = None

    def fit(self, x_train, label):
        self.x_train = x_train
        if self.x_train.shape[0] < self.x_train.shape[1]:
            self.x_train = self.x_train.T
        self._input_dimension = self.x_train.shape[1]
        self.label = label

    def predict(self, x, k):
        self.k = k
        self._xtest = x
        self.__distance(k)
        return self.final_label

    def __distance(self, k):
        self._metrics = np.linalg.norm(self._xtest.reshape(1, self._input_dimension) - self.x_train, axis=1)
        self.__sort(k)

    def __sort(self, k):
        index = np.argsort(self._metrics)
        self.label_sort = self.label[index]
        self.final_label = mode(self.label_sort[0:k])[0][0]


In [124]:
N = 1000
X1 = 2 + np.random.randn(N, 1)
y1 = np.zeros((N, 1))
X2 = 4 + np.random.randn(N, 1)
y2 = np.ones((N, 1))
X_train = np.vstack((X1, X2))
y_train = np.vstack((y1, y2))

X1 = 2 + np.random.randn(int(N/2), 1)
y1 = np.zeros((int(N/2), 1))
X2 = 4 + np.random.randn(int(N/2), 1)
y2 = np.ones((int(N/2), 1))

X_test = np.vstack((X1, X2))
y_test = np.vstack((y1, y2))


## Create an object 

In [131]:
model = KNN()
model.fit(X_train, y_train)

In [133]:
y_pre = []
k = 10
for i in range(len(X_test)):
    y_pre.append(model.predict(X_test[i, ], k))

In [134]:
2*confusion_matrix(y_pre, y_test)/len(X_test)

array([[0.81 , 0.202],
       [0.19 , 0.798]])

## Using Sklearn Package 

In [135]:
from sklearn.neighbors import KNeighborsClassifier
model = KNeighborsClassifier(n_neighbors=10)
model.fit(X_train, y_train)

  This is separate from the ipykernel package so we can avoid doing imports until


KNeighborsClassifier(n_neighbors=10)

In [136]:
y_pre = model.predict(X_test)
2*confusion_matrix(y_pre, y_test)/len(X_test)

array([[0.81 , 0.202],
       [0.19 , 0.798]])

# Parzen density estimation


In [137]:
class Parzen(object):
    def __init__(self):
        self.x_train = None

    def fit(self, x_train, h):
        self.h = h
        self.x_train = x_train
        self.dimension = self.x_train.shape[1]

    def __kernel(self, x):
        self.__distance = (x.reshape(1, self.dimension) - self.x_train)/self.h

    def __p(self, x):
        self.__kernel(x)
        index = np.where(np.abs(self.__distance) < 0.55)
        self.__distance[index[0]] = 1
        self.__distance[self.__distance != 1] = 0
        self.sum = np.sum(self.__distance)
        return np.sum(self.__distance)/(len(self.x_train)*(self.h**self.dimension))

    def predict(self, x):
        y_pre = self.__p(x)
        return y_pre

In [138]:
model1 = Parzen()
model2 = Parzen()
h = 1
model1.fit(X1, h)
model2.fit(X2, h)
y_pre = []
for i in range(len(X_test)):
    p1 = model1.predict(X_test[i, ])
    p2 = model2.predict(X_test[i, ])
    if p1 >= p2:
        y_pre.append(0)
    else:
        y_pre.append(1) 

In [139]:
2*confusion_matrix(y_pre, y_test)/len(X_test)

array([[0.832, 0.168],
       [0.168, 0.832]])