In [1]:
import math
from collections import Counter

class KNearest:
    def __init__(self, k:int,):
        if k < 1:
            raise ValueError("K must be >= 1")
        self.k = k
        self.X_train = None
        self.y_train = None

    def fit(self, X, y):
        # Keep Values for X & Y
        if len(X) != len(y):
            raise ValueError("X and Y must have the same length")
        if len(X) < self.k:
            raise ValueError("k cannot be larger than number of training samples")
        self.X_train = X
        self.y_train = y
    
    def predict(self, x):
        return [self._predictSingle(x) for x in x]
    
    def _predictSingle(self, x):
        # Compute Distances
        distances = [(math.dist(x, xtr), ytr) for xtr, ytr in zip(self.X_train, self.y_train)]
        distances.sort(key=lambda t: t[0])

        # Get closest K 
        k_labels = [y for _, y in distances[:self.k]]

        # Get Majority Vote
        counts = Counter(k_labels)
        max_count = max(counts.values())
        ties = [label for label, count in counts.items() if count == max_count]

        if len(ties) == 1:
            return ties[0]

        for _,y in distances:
            if y in ties:
                return y

In [None]:
# Load the Data
import pandas as pd
import random
dataset = pd.read_csv("processed.csv")

# Split into X features and y labels
X = dataset.iloc[:, 1:].values
y = dataset.iloc[:, 0].values

def train_test_split(X, y, test_ratio=0.2, seed=42):
    random.seed(seed)
    index = list(range(len(X)))
    random.shuffle(index)
    n_test = int(len(index) * test_ratio)
    test_index = index[:n_test]
    train_index = index[n_test:]

    X_train = [X[i] for i in train_index]
    y_train = [y[i] for i in train_index]
    x_test = [X[i] for i in test_index]
    y_test = [y[i] for i in test_index]

    return x_test, y_test, X_train, y_train
x_test, y_test, X_train, y_train = train_test_split(X, y, test_ratio=0.2, seed=42)

model = KNearest(k=5)
model.fit(X_train, y_train)
predictions = model.predict(x_test)

correct = sum(1 for p, t in zip(predictions, y_test) if p == t)
accuracy = correct / len(y_test)
print(f"Accuracy: {accuracy}")