In [10]:
from sklearn import datasets
from collections import Counter 
from sklearn.neighbors import KNeighborsClassifier
import numpy as np
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split 
from sklearn import preprocessing
from sklearn.model_selection import cross_val_score

In [17]:
bdata = datasets.load_breast_cancer()
scaler = preprocessing.StandardScaler()

x = bdata.data
y = bdata.target

scaler.fit(x) # feature scaling important with KNN

x1 = scaler.transform(x)

x_train, x_test, y_train, y_test = train_test_split(x1,y,random_state = 1,test_size = 0.3)

In [20]:
# Checking accuracy with in built KNN

clf = KNeighborsClassifier(n_neighbors = 7)
clf.fit(x_train,y_train)
y_pred = clf.predict(x_test)

print("Inbuilt KNN gives score :",accuracy_score(y_test,y_pred))

Inbuilt KNN gives score : 0.959064327485


## Our implementation

In [58]:
# Doing nothing in the predict function

def fit(x_train,y_train):
    return 

# predict one function predicts class for one row of x_test
# parameters taken : 4 .

def predict_one(x_train,y_train,x_test_row,k):
    distances_list = [] # list of the distances of ith row of training data and test row stored
    # with the indices in training data
    for i in range(len(x_train)):
        distance = ((x_train[i,:] - x_test_row)**2).sum() # Euclidean without sq root
        distances_list.append([distance,i])
        
    #sort distances
    distances_list = sorted(distances_list) # sorts in increasing order using index one data(distance) as parameter
    
    # store k classes of minimum distance
    target_class = []
    for i in range(k):
        training_index = distances_list[i][1] # will give index of training class falling in k closest neighbors
        target_class.append(y_train[training_index]) # appending the class
        
    return Counter(target_class).most_common(1)[0][0]

# predict all function

def predict_all(x_train, y_train, x_test, k):
    y_pred = []
    for x2 in x_test:
        class_ans = predict_one(x_train,y_train,x2,k)
        y_pred.append(class_ans)
    return y_pred

In [59]:
# How Counter works in sklearn

arr = [1,2,3,2,1,2,2,2,1,1,1,2,23,3]
print(Counter(arr))

print(Counter(arr).most_common(1))

print(Counter(arr).most_common(2))

print(Counter(arr).most_common(1)[0])

print(Counter(arr).most_common(1)[0][0])

Counter({2: 6, 1: 5, 3: 2, 23: 1})
[(2, 6)]
[(2, 6), (1, 5)]
(2, 6)
2


In [62]:
y_pred = predict_all(x_train,y_train,x_test,7)
print("Implemented KNN gives result accuracy:", accuracy_score(y_test,y_pred))

Implemented KNN gives result accuracy: 0.959064327485


# Accuracy Results are pretty much the same.