# K Nearest Neighbor

### Implementation 

In [12]:
import numpy as np
from collections import Counter

In [21]:
class KNearestNeighbor:
    
    def __init__(self, k):
        self.k = k
        self.X = None
        self.y = None
    
    def fit(self, X_train, y_train):
        self.X = X_train
        self.y = y_train
    
    def predict(self, X_test):
        
        # k nearest distance for each unseen datap points
        k_nearest_distances = []
        
        # find the k X_train data points that are closest to X_test
        for xi_test in X_test:
            # stores distance from every x_train to xi_test
            distances = []
            for xi_train in self.X:
                # distances between xi_test and all X_train
                l2_distance = np.sqrt(np.sum((xi_test - xi_train)**2))
                distances.append(l2_distance)
                
            # sort the distance from smallest to larger
            # find the first k value
            # argsort will return the index from the original unsorted list (distance)
            k_nearest_distances.append(np.argsort(distances)[:self.k])
        
        # find the labels of K nearest neighbor
        neighbors = []
        for x_neighbors in k_nearest_distances:
            label = []
            for x_train_index in x_neighbors:
                label.append(self.y[x_train_index])
            neighbors.append(label)
            
        # find the majority class
        predictions = []
        for neighbor in neighbors:
            majority = Counter(neighbor).most_common()
            # Counter example : [('class_1',3),('class_2',2)]
            predictions.append(majority[0][0])
        
        return predictions

In [22]:
from sklearn import datasets
from sklearn.model_selection import train_test_split
from sklearn import metrics
from sklearn import preprocessing

In [23]:
wine = datasets.load_wine()
X_train, X_test, y_train, y_test = train_test_split(wine.data, wine.target, test_size=0.3)

# normalize x
scaler = preprocessing.StandardScaler().fit(X_train)
X_train = scaler.transform(X_train)
X_test = scaler.transform(X_test)

In [25]:
# K = 1
knn = KNearestNeighbor(k = 1)
knn.fit(X_train,y_train)

y_train_predict = knn.predict(X_train)
y_test_predict = knn.predict(X_test)

print("Train Accuracy:",metrics.accuracy_score(y_train, y_train_predict))
print("Test Accuracy:",metrics.accuracy_score(y_test, y_test_predict))

Train Accuracy: 1.0
Test Accuracy: 0.9444444444444444
