## Librerías

In [1]:
import numpy as np
import pandas as pd
import math

from sklearn.datasets import load_digits

from sklearn.model_selection import train_test_split

from sklearn.neighbors import KNeighborsClassifier

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

## Dataset

In [2]:
X, Y = load_digits(return_X_y=True)
classes = load_digits().target_names
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.30, random_state=0)

## K Neighbors Classifier

In [3]:
class myKNeighborsClassifier:
    def __init__(self, k=5):
        self.k = k
        self.Xtrain = None
        self.Ytrain = None

    def fit(self,Xtrain,Ytrain):
        self.Xtrain = Xtrain
        self.Ytrain = Ytrain
    def predict(self, X):
    # 1. Calcular la distancia de los datos X a los datos Xtrain. Matriz nxm
        # Obtener tamaño de las X.
        n = len(self.Xtrain)
        m = len(X)
        
        # Crear matriz de distancias. 
        zeroMatrix = np.zeros([n, m])
        
        # Calcular las distancias.
        distanceMatrix = self.calculateEuclideanDistance(zeroMatrix, X)
        
        # Crear vectores de predicción de las Y y la clasificación de las Y de los k vecinos más cercanos.
        Ypred = np.zeros(m)
        Y_KN = []
        
        # Iterar para cada conjunto de X
        for i in range(m):
            # 2. Ordenar las distancias de menor a mayor.
            distanceMatrix = distanceMatrix.sort_values(by=[i], ascending=True)
        
            # Obtener los índices de las filas.
            data_top = distanceMatrix.head(n=self.k) 
            index_top = list(data_top.index.values)
            # print(index_top)
            
            # Obtener la clasificación de los k vecinos más cercanos.
            Y_KN = self.Ytrain[index_top]
                
            # 3. Calcular la clase de acuerdo a la mayoría de los k vecinos más cercanos.
            counts = np.bincount(Y_KN)
            Ypred[i] = np.argmax(counts)
            
    # 4-. Regresar valores.
        return Ypred

    def calculateEuclideanDistance(self, distanceMatrix, X):
        # Obtener dimensiones de la matriz de distancias.
        n, m = distanceMatrix.shape
        
        for i in range(n):
            for j in range(m):
                distanceMatrix[i,j] = math.sqrt( np.sum( np.power(X[j]-self.Xtrain[i],2) ) )
        
        distanceMatrix = pd.DataFrame(distanceMatrix)
        return distanceMatrix

In [4]:
KNeighborsModel = myKNeighborsClassifier()
KNeighborsModel.fit(Xtrain,Ytrain)
Ypred =KNeighborsModel.predict(Xtest)

print('Accuracy:', accuracy_score(Ytest,Ypred))
m = confusion_matrix(Ytest,Ypred)
df = pd.DataFrame(m, index=classes,columns=classes)
df

Accuracy: 0.9814814814814815


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,45,0,0,0,0,0,0,0,0,0
1,0,51,0,0,0,1,0,0,0,0
2,0,0,52,0,0,0,0,1,0,0
3,0,0,1,53,0,0,0,0,0,0
4,0,0,0,0,47,0,0,1,0,0
5,0,0,0,0,0,55,1,0,0,1
6,0,0,0,0,0,0,60,0,0,0
7,0,0,0,0,0,0,0,53,0,0
8,0,1,0,1,0,0,1,0,58,0
9,0,0,0,0,0,1,0,0,0,56


### Own KNeighborsClassifier vs. Sklearn KNeighborsClassifier

In [5]:
SKNeighborsClassifierModel = KNeighborsClassifier()
SKNeighborsClassifierModel.fit(Xtrain,Ytrain)
Ypred2 = SKNeighborsClassifierModel.predict(Xtest)

In [6]:
print('Own KNeighborsClassifier Accuracy:', accuracy_score(Ytest,Ypred))
print('Sklearn KNeighborsClassifier Accuracy:', accuracy_score(Ytest,Ypred2))

Own KNeighborsClassifier Accuracy: 0.9814814814814815
Sklearn KNeighborsClassifier Accuracy: 0.9814814814814815
