In [15]:
import numpy as np

In [16]:
# Función para calcular la distancia euclidiana entre dos puntos
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

# Clase del clasificador K-NN
class KNNClassifier:
    def __init__(self, k):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        y_pred = []
        for i in range(len(X_test)):
            distances = []
            for j in range(len(self.X_train)):
                dist = euclidean_distance(X_test[i], self.X_train[j])
                distances.append((dist, self.y_train[j]))
            distances.sort(key=lambda x: x[0])  # Ordenar distancias de menor a mayor
            neighbors = distances[:self.k]  # Obtener los k vecinos más cercanos
            classes = [neighbor[1] for neighbor in neighbors]  # Obtener las clases de los vecinos
            y_pred.append(max(set(classes), key=classes.count))  # Clasificación por voto mayoritario
        return y_pred
    

In [26]:
def readArchive(fileName):
    with open(fileName, 'r') as file:
        lines = file.readlines()

    # Elimina los caracteres de salto de línea y divide los valores por comas
    data = [line.strip().split(',') for line in lines]

    # Convierte la lista en una matriz de numpy
    return np.array(data, dtype=int)

dataTrain = readArchive('optdigits.tra')

X_train = dataTrain[:, :64]
Y_train = dataTrain[:, 64]

dataTest = readArchive('optdigits.tes')
X_test = dataTest[:, :64]
Y_test = dataTest[: , 64]

clases = np.unique(Y_train)
confussionMatrix = np.zeros((len(clases),len(clases)), dtype=int)

In [18]:
knn = KNNClassifier(k=3)
knn.fit(X_train, Y_train)
predictions = knn.predict(X_test)

[0, 1, 1, 3, 4, 9, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 9, 5, 5, 6, 5, 0, 9, 8, 9, 8, 4, 1, 7, 7, 3, 5, 1, 0, 0, 2, 2, 7, 8, 2, 0, 1, 1, 6, 3, 3, 7, 3, 3, 4, 6, 6, 6, 4, 9, 1, 5, 0, 9, 5, 1, 8, 1, 0, 0, 1, 7, 6, 3, 2, 1, 7, 1, 6, 3, 1, 3, 9, 1, 7, 6, 8, 4, 3, 1, 4, 0, 5, 3, 6, 9, 6, 1, 7, 5, 4, 4, 7, 2, 8, 2, 2, 5, 7, 9, 5, 4, 8, 8, 4, 9, 0, 8, 9, 1, 0, 1, 2, 3, 1, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 9, 5, 5, 6, 5, 0, 9, 8, 9, 8, 4, 1, 7, 7, 3, 5, 1, 0, 0, 2, 2, 7, 8, 2, 0, 1, 2, 6, 3, 3, 7, 3, 3, 4, 6, 6, 6, 4, 9, 1, 5, 0, 9, 5, 2, 8, 2, 0, 0, 1, 7, 6, 3, 2, 1, 7, 3, 1, 3, 9, 1, 7, 6, 8, 4, 3, 1, 4, 0, 5, 3, 6, 9, 6, 1, 7, 5, 4, 4, 7, 2, 1, 2, 2, 5, 5, 4, 8, 8, 4, 9, 0, 8, 9, 8, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 0, 9, 5, 5, 6, 5, 0, 9, 8, 9, 8, 4, 1, 7, 7, 3, 5, 1, 0, 0, 2, 2, 7, 8, 2, 0, 1, 2, 6, 3, 3, 7, 3, 3, 4, 6, 6, 6, 4, 9, 1, 5, 0, 9, 5, 2, 8, 

In [27]:
for i in range (len(predictions)):
    confussionMatrix[predictions[i]][Y_test[i]] += 1

print(confussionMatrix)

1797
[[178   0   0   0   0   0   0   0   0   0]
 [  0 180   4   0   2   0   0   0   9   0]
 [  0   0 173   0   0   0   0   0   0   0]
 [  0   0   0 180   0   0   0   0   1   2]
 [  0   0   0   0 178   1   0   0   0   0]
 [  0   0   0   0   0 179   0   0   0   1]
 [  0   1   0   0   0   0 181   0   0   0]
 [  0   0   0   1   0   0   0 172   0   0]
 [  0   1   0   1   1   0   0   1 162   1]
 [  0   0   0   1   0   2   0   6   2 176]]


In [28]:
accuracy = np.trace(confussionMatrix)/np.sum(confussionMatrix)
print(accuracy)

0.9788536449638287


In [None]:
def knnAlgorithm(X_train, Y_train, X_test, Y_test, k):
    clases = np.unique(Y_train)
    confussionMatrix = np.zeros((len(clases),len(clases)), dtype=int)
    knn = KNNClassifier(k)
    knn.fit(X_train, Y_train)
    predictions = knn.predict(X_test)
    for i in range (len(predictions)):
        confussionMatrix[predictions[i]][Y_test[i]] += 1
    accuracy = np.trace(confussionMatrix)/np.sum(confussionMatrix)
    return accuracy
    
    