In [7]:
from math import sqrt
import numpy as np

In [8]:
class KNNClassifier:
    def __init__(self, k):
        self.k = k
        self.data_train = None
        self.label_train = None

    def fit(self, data_train, label_train):
        self.data_train = data_train
        self.label_train = label_train

    def euclidean_distance(self, x1, x2):
        squared_diff_sum = 0
        for i in range(len(x1)):
            squared_diff_sum += (x1[i] - x2[i]) ** 2
        return sqrt(squared_diff_sum)

    def get_neighbors(self, x):
        distances = []
        for i in range(len(self.data_train)):
            distance = self.euclidean_distance(x, self.data_train[i])
            distances.append((distance, self.label_train[i]))

        #ordenamiento
        for i in range(len(distances)):
            min_idx = i
            for j in range(i + 1, len(distances)):
                if distances[j][0] < distances[min_idx][0]:
                    min_idx = j
            distances[i], distances[min_idx] = distances[min_idx], distances[i]

        k_nearest_neighbors = distances[:self.k]

        return k_nearest_neighbors


    def predict(self, data_train):
        y_pred = []
        for data_train in data_train:
            neighbors = self.get_neighbors(data_train)
            label_counts = {}
            for neighbor in neighbors:
                label = neighbor[1]
                if label in label_counts:
                    label_counts[label] += 1
                else:
                    label_counts[label] = 1

            most_common_label = None
            max_count = -1
            for label, count in label_counts.items():
                if count > max_count:
                    max_count = count
                    most_common_label = label

            y_pred.append(most_common_label)

        return y_pred



In [9]:
def getNPmatrix(archive):
    with open(archive, 'r') as file:
        lines = file.readlines()
        
    data = [line.strip().split(',') for line in lines]

    return np.array(data, dtype=float)

data = getNPmatrix('irism.data')
print(data.shape)

dataTrain = data[:90, :4]
label_train = data[:90, 4]
print(dataTrain.shape)
print(label_train.shape)

dataTest = data[90:150, :4]
label_test = data[90:150, 4]
print(dataTest.shape)
print(label_test.shape)

(150, 5)
(90, 4)
(90,)
(60, 4)
(60,)


In [10]:
def knnAlgorithm(data_train, label_train, X_test, Y_test, k):
    clases = np.unique(label_train)
    confussionMatrix = np.zeros((len(clases),len(clases)), dtype=int)
    knn = KNNClassifier(k)
    knn.fit(data_train, label_train)
    predictions = knn.predict(X_test)
    for i in range (len(predictions)):
        confussionMatrix[int(predictions[i])-1][int(Y_test[i])-1] += 1
    accuracy = np.trace(confussionMatrix)/np.sum(confussionMatrix)
    #se imprime matriz de confusion
    print("the accuracy with k = ",k, " is ",accuracy*100, "%")
    print(confussionMatrix, "\n")
    #return accuracy

In [11]:
knnAlgorithm(dataTrain, label_train, dataTest, label_test, 1)
knnAlgorithm(dataTrain, label_train, dataTest, label_test, 5)


the accuracy with k =  1  is  95.0 %
[[20  0  0]
 [ 0 18  1]
 [ 0  2 19]] 

the accuracy with k =  5  is  96.66666666666667 %
[[20  0  0]
 [ 0 18  0]
 [ 0  2 20]] 

