In [7]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.model_selection import train_test_split
from sklearn.model_selection import LeaveOneOut
from sklearn.preprocessing import Normalizer
from sklearn.preprocessing import normalize
from sklearn import datasets
from sklearn import metrics
import numpy as np

In [8]:
#1 Normal KNN
def normalKNN(trainX, testX, trainY, testY):
  model = KNeighborsClassifier(n_neighbors=3, algorithm='ball_tree')
  model.fit(trainX, trainY)
  predicted = model.predict(testX)
  print("Accuracy Score = ", metrics.accuracy_score(testY, predicted))

Leave-one-out cross validation is K-fold cross validation taken to its logical extreme, with K equal to N, the number of data points in the set. That means that N separate times, the function approximator is trained on all the data except for one point and a prediction is made for that point

In [9]:
#2 Local Unrestricted KNN
def getValidk(trainX):
  loo = LeaveOneOut()
  loo.get_n_splits(trainX)
  validK = []
  for train_index, test_index in loo.split(trainX):
    trainData, testData = trainX[train_index], trainX[test_index]
    trainLabel, testLabel = trainY[train_index], trainY[test_index]
    validSampleK = []
    for k in range(2, 30):
        model = KNeighborsClassifier(n_neighbors=k)
        model.fit(trainData, trainLabel)
        predicted = model.predict(testData)
        if predicted == testLabel:   
            validSampleK.append(k)
    validK.append(validSampleK)
  validK = np.array(validK)
  return validK

def getPreTrainedModels(trainX, trainY):
  trainedModels = []
  for i in range(2, 30):
    model = KNeighborsClassifier(n_neighbors=i, algorithm='ball_tree')
    model.fit(trainX, trainY)
    trainedModels.append(model)
  return trainedModels    

def getPrediction(validK, trainedModels, M, trainX, testX, trainY):
  finalPredictions = []
  for i in range(0, len(testX)):
    distances = []
    for j in range(0, len(trainX)):
        distances.append(np.linalg.norm(testX[i] - trainX[j]))
    distances = np.array(distances)
    indices = np.argsort(distances)
    nebrIndices = indices[0:M]
    nebrLabels = trainY[nebrIndices]

    kCorrectClassified = np.zeros(30)
    for ind in indices:
        kcurrArray = validK[ind]
        for z in range(len(kcurrArray)):
            k = kcurrArray[z]
            kCorrectClassified[k] += 1
    kbest = np.argmax(kCorrectClassified)
    predictedClass = trainedModels[kbest - 2].predict(testX[i].reshape(1, -1))
    finalPredictions.append(predictedClass)
  return finalPredictions

def localUnrestrictedKNN(trainX, testX, trainY, testY, M):
  validK = getValidk(trainX)
  trainedModels = getPreTrainedModels(trainX, trainY)
  predicted = getPrediction(validK, trainedModels, M, trainX, testX, trainY)
  print("Accuracy Score = ", metrics.accuracy_score(testY, predicted)) 

In [10]:
def get_dataset():
  dataset = datasets.load_digits()
  X = dataset.data
  y = dataset.target
  return X, y

X, y = get_dataset()

trainX, testX, trainY, testY = train_test_split(X, y, test_size=0.2, random_state=42)

normalized_x_train = normalize(trainX)
normalized_x_test = normalize(testX)

In [11]:
normalKNN(normalized_x_train, normalized_x_test, trainY, testY)

Accuracy Score =  0.9861111111111112


In [12]:
localUnrestrictedKNN(normalized_x_train, normalized_x_test, trainY, testY, 1)



Accuracy Score =  0.9861111111111112
