## Librerías

In [25]:
import numpy as np
import pandas as pd
import math

from sklearn.datasets import load_digits

from sklearn.model_selection import train_test_split

from sklearn.neighbors import NearestCentroid

from sklearn.metrics import accuracy_score
from sklearn.metrics import confusion_matrix

## Dataset

In [26]:
X, Y = load_digits(return_X_y=True)
classes = load_digits().target_names
Xtrain, Xtest, Ytrain, Ytest = train_test_split(X, Y, test_size=0.30, random_state=0)

## Nearest Centroid

In [27]:
class myNearestCentroid:
    def __init__(self):
        self.centroids = None
        
    def fit(self,Xtrain,Ytrain):
        # Asignaciones a las variables de la clase.
        self.Xtrain = Xtrain
        self.Ytrain = Ytrain
        
        # Componentes de Xtrain.
        n_samples, n_features = Xtrain.shape
        
        # 1. Calcular el número de clases.
        classes = np.unique(Ytrain)
        n_classes = len(classes)
        
        # Inicializar los centroides en cero.
        self.centroids = np.zeros((n_classes, n_features))
        
        # 2. Calcular los centroides de cada clase y guardarlo en self.centroids.
        for i in range(n_classes):
            # Crear máscara
            mask = Ytrain == i
            
            # Calcular centroide
            self.centroids[i] = np.median(Xtrain[mask], axis=0)
            
    def predict(self,X):
        # Obtener características de X.
        n_samples, n_features = X.shape
        
        # Arreglo de predicciones.
        Ypred = np.zeros(n_samples)
        
        for i in range(n_samples):
            # 1. Calcular la distancia de cada muestra en X a cada uno de los centroides.
            distanceArray = self.calculateEuclideanDistance(X[i])

            # Ordenar de menor a mayor (regresar índices).
            distanceArray =  np.argsort(distanceArray)

            # 2. Asignar la clase de acuerdo al centroide más cercano.
            Ypred[i] = distanceArray[0]
        
        # 3. Regresar la clase que corresponde.
        return Ypred
    
    def calculateEuclideanDistance(self, X):
        # Obtener tamaño del arreglo de centroides.
        n = len(self.centroids)
        
        # Inicializar arreglo de distancia en ceros.
        distanceArray = np.zeros(n)
        
        for i in range(n):
            distanceArray[i] = math.sqrt( np.sum( np.power(X-self.centroids[i],2) ) )
        
        return distanceArray
    
    def calculateManhattanDistance(self, X):
        # Obtener tamaño del arreglo de centroides.
        n = len(self.centroids)
        
        # Inicializar arreglo de distancia en ceros.
        distanceArray = np.zeros(n)
        
        for i in range(n):
            distanceArray[i] = np.sum( np.absolute(X-self.centroids[i]) )
        
        return distanceArray
    

In [28]:
NearestCentroidModel = myNearestCentroid()
NearestCentroidModel.fit(Xtrain,Ytrain)
Ypred = NearestCentroidModel.predict(Xtest)

print('Accuracy:', accuracy_score(Ytest,Ypred))
m = confusion_matrix(Ytest,Ypred)
df = pd.DataFrame(m, index=classes,columns=classes)
df

Accuracy: 0.8833333333333333


Unnamed: 0,0,1,2,3,4,5,6,7,8,9
0,45,0,0,0,0,0,0,0,0,0
1,0,35,4,0,0,1,1,0,2,9
2,1,0,44,3,0,0,0,3,1,1
3,0,0,1,45,0,0,0,2,2,4
4,0,0,0,0,45,0,0,3,0,0
5,0,0,0,0,0,48,1,0,0,8
6,0,1,0,0,0,0,59,0,0,0
7,0,0,0,0,0,1,0,51,1,0
8,0,3,0,1,0,1,0,1,51,4
9,0,0,0,0,0,2,0,1,0,54


### Own Nearest Centroid vs. Sklearn Nearest Centroid

In [29]:
SNearestCentroidModel = NearestCentroid()
SNearestCentroidModel.fit(Xtrain,Ytrain)
Ypred2 = SNearestCentroidModel.predict(Xtest)

In [30]:
print('Own Nearest Centroid Accuracy:', accuracy_score(Ytest,Ypred))
print('Sklearn Nearest Centroid Accuracy:', accuracy_score(Ytest,Ypred2))

Own Nearest Centroid Accuracy: 0.8833333333333333
Sklearn Nearest Centroid Accuracy: 0.8925925925925926
