# K Nearest Neighbors
### from Scratch

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

### Reading the Data of Telecomunication 

In [2]:
data = pd.read_csv('../data/teleCust1000t.csv')
data.head()

Unnamed: 0,region,tenure,age,marital,address,income,ed,employ,retire,gender,reside,custcat
0,2,13,44,1,9,64.0,4,5,0.0,0,2,1
1,3,11,33,1,7,136.0,5,5,0.0,0,6,4
2,3,68,52,1,24,116.0,1,29,0.0,1,2,3
3,2,33,33,0,12,33.0,2,0,0.0,1,1,1
4,2,23,30,1,9,30.0,1,2,0.0,0,4,3


### Normalizing the X(independent variables) and Y(labels)

In [3]:
y = data[['custcat']]
x = data[list(data.columns[:-1])]

from sklearn import preprocessing
x = preprocessing.StandardScaler().fit(x).transform(x.astype(float))
y = np.asanyarray(y).ravel()  # ravel() is used to convert nd array to 1d array

### Spliting the Data 80:20 Train/Test

In [4]:
from sklearn import model_selection
x_train, x_test, y_train, y_test = model_selection.train_test_split(x, y, test_size=0.2)

### K-Nearest Neigbors Class

In [5]:
from scipy.spatial import distance 
import heapq
from collections import Counter

class KNNScrappy():
    
    def fit(self, x, y, k):
        """Used to Train x independent Variables for its y labels"""
        self.x = x
        self.y = y
        self.k = k
        
    def predict(self, x_test):
        """Used to predict y_hat on trained data"""
        y_hat = []
        for row in x_test:
            label = self.closest(row)
            y_hat.append(label)
        return y_hat
    
    def closest(self, row):
        """Closest k neighbors wrt the given row"""
        minheap = []
        k_Nearest_labels = []
        
        for i in range(len(self.x)):
            heapq.heappush(minheap, [distance.euclidean(row, self.x[i]), self.y[i]])
            
        for i in range(self.k):
            k_Nearest_labels.append(heapq.heappop(minheap)[1])
        
        return Counter(k_Nearest_labels).most_common(1)[0][0]

### Training KNN Scrappy Classifier and Finding Best K

In [None]:
from sklearn import metrics

Ks = 25
mean_accuracy = np.zeros(Ks)

for i in range(1, Ks + 1):
    clf = KNNScrappy()
    clf.fit(x_train, y_train, k=i)
    
    y_hat = clf.predict(x_test)
    mean_accuracy[i - 1] =  metrics.accuracy_score(y_test, y_hat)

plt.plot(np.arange(Ks), mean_accuracy)
plt.xlabel('K-values')
plt.ylabel('Accuracy')
plt.show()

print("Best Accuracy: ", max(mean_accuracy), ", with K = ", mean_accuracy.argmax() + 1)

### Training KNN Classifier & Finding Best K (for Comparsion Purpose)

In [None]:
from sklearn import neighbors

mean_accuracy = np.zeros(Ks)

for i in range(1, Ks + 1):
    clf = neighbors.KNeighborsClassifier(n_neighbors=i)
    clf.fit(x_train, y_train)
    
    y_hat = clf.predict(x_test)
    mean_accuracy[i - 1] =  metrics.accuracy_score(y_test, y_hat)

plt.plot(np.arange(Ks), mean_accuracy)
plt.xlabel('K-values')
plt.ylabel('Accuracy')
plt.show()

print("Best Accuracy: ", max(mean_accuracy), ", with K = ", mean_accuracy.argmax() + 1)