In [24]:
from math import sqrt
import numpy as np

class KNNClassifier:
    def __init__(self, k):
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def euclidean_distance(self, x1, x2):
        squared_diff_sum = 0
        for i in range(len(x1)):
            squared_diff_sum += (x1[i] - x2[i]) ** 2
        return sqrt(squared_diff_sum)

    def get_neighbors(self, x):
        distances = []
        # Calculate the Euclidean distance between the test point (x) and each training point
        for i in range(len(self.X_train)):
            distance = self.euclidean_distance(x, self.X_train[i])
            distances.append((distance, self.y_train[i]))

        # Sort the distances in ascending order
        for i in range(len(distances)):
            min_idx = i
            for j in range(i + 1, len(distances)):
                if distances[j][0] < distances[min_idx][0]:
                    min_idx = j
            distances[i], distances[min_idx] = distances[min_idx], distances[i]

        # Return the k nearest neighbors
        k_nearest_neighbors = distances[:self.k]

        return k_nearest_neighbors


    def predict(self, X_test):
        y_pred = []
        for x_test in X_test:
            print(x_test)
            neighbors = self.get_neighbors(x_test)
            print(neighbors)
            label_counts = {}
            for neighbor in neighbors:
                label = neighbor[1]
                if label in label_counts:
                    label_counts[label] += 1
                else:
                    label_counts[label] = 1

            most_common_label = None
            max_count = -1
            for label, count in label_counts.items():
                if count > max_count:
                    max_count = count
                    most_common_label = label

            y_pred.append(most_common_label)

        return y_pred

In [17]:
def getNPmatrix(archive):
    with open(archive, 'r') as file:
        lines = file.readlines()
        
    data = [line.strip().split(',') for line in lines]

    return np.array(data, dtype=float)

data = getNPmatrix('irism.data')
print(data.shape)

dataTrain = data[:90, :4]
label_train = data[:90, 4]
print(dataTrain.shape)
print(label_train.shape)

dataTest = data[90:150, :4]
label_test = data[90:150, 4]
print(dataTest.shape)
print(label_test.shape)

(150, 5)
(90, 4)
(90,)
(60, 4)
(60,)


In [18]:
def knnAlgorithm(data_train, label_train, X_test, Y_test, k):
    clases = np.unique(label_train)
    confussionMatrix = np.zeros((len(clases),len(clases)), dtype=int)
    knn = KNNClassifier(k)
    knn.fit(data_train, label_train)
    predictions = knn.predict(X_test)
    for i in range (len(predictions)):
        confussionMatrix[int(predictions[i])-1][int(Y_test[i])-1] += 1
    accuracy = np.trace(confussionMatrix)/np.sum(confussionMatrix)
    #se imprime matriz de confusion
    print("the accuracy with k = ",k, " is ",accuracy*100, "%")
    print(confussionMatrix, "\n")
    #return accuracy

In [25]:
knnAlgorithm(dataTrain, label_train, dataTest, label_test, 3)

[5.  3.5 1.3 0.3]
[(0.14142135623730917, 1.0), (0.2645751311064589, 1.0), (0.2645751311064592, 1.0)]
[4.5 2.3 1.3 0.3]
[(0.7141428428542852, 1.0), (0.7810249675906656, 1.0), (0.7937253933193773, 1.0)]
[4.4 3.2 1.3 0.2]
[(0.20000000000000018, 1.0), (0.3162277660168382, 1.0), (0.42426406871192845, 1.0)]
[5.  3.5 1.6 0.6]
[(0.22360679774997896, 1.0), (0.264575131106459, 1.0), (0.31622776601683794, 1.0)]
[5.1 3.8 1.9 0.4]
[(0.41231056256176585, 1.0), (0.41231056256176596, 1.0), (0.5099019513592782, 1.0)]
[4.8 3.  1.4 0.3]
[(0.19999999999999998, 1.0), (0.24494897427831797, 1.0), (0.2645751311064593, 1.0)]
[5.1 3.8 1.6 0.2]
[(0.14142135623730953, 1.0), (0.24494897427831772, 1.0), (0.33166247903554, 1.0)]
[4.6 3.2 1.4 0.2]
[(0.22360679774997935, 1.0), (0.2999999999999996, 1.0), (0.3000000000000002, 1.0)]
[5.3 3.7 1.5 0.2]
[(0.10000000000000053, 1.0), (0.22360679774997896, 1.0), (0.2449489742783178, 1.0)]
[5.  3.3 1.4 0.2]
[(0.17320508075688762, 1.0), (0.22360679774997877, 1.0), (0.22360679774