In [5]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from collections import Counter

In [2]:
class KNNRegressor:
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors
        self.X = None
        self.y = None
        
    def fit(self, X_train, y_train):
        self.X = np.asarray(X_train)
        self.y = np.asarray(y_train)

    def predict(self, X_test):
        X_test = np.asarray(X_test)
        test_predictions = []
        for test_sample in range(X_test.shape[0]):
            neighbor_distances = []
            neighbors = []
            for train_sample in range(self.X.shape[0]):
                dist = np.linalg.norm(X_test[test_sample] - self.X[train_sample])
                neighbor_distances.append(dist)

            sorted_indices = np.argsort(neighbor_distances)
            for i in range(self.n_neighbors):
                neighbors.append(self.y[sorted_indices[i]])
            pred = np.mean(neighbors)
            test_predictions.append(pred)
        return np.array(test_predictions)

    def score(self, y_pred, y_test):
        y_pred = np.asarray(y_pred)
        y_test = np.asarray(y_test)
        u = ((y_test - y_pred)**2).sum()
        v = ((y_test - y_test.mean())**2).sum()
        r2 = 1 - u/v
        return r2
        

In [4]:
df_X = pd.DataFrame([[1, 2], [2, 3], [3, 4]])
s_y = pd.Series([10, 15, 20])
df_test = pd.DataFrame([[1.5, 2.5]])

knn = KNNRegressor(n_neighbors=2)
knn.fit(df_X, s_y)
preds = knn.predict(df_test)
print(preds)


[12.5]


In [8]:
class KNNClassifier:
    def __init__(self, n_neighbors=5):
        self.n_neighbors = n_neighbors
        self.X = None
        self.y = None

    def fit(self, X_train, y_train):
        self.X = np.asarray(X_train)
        self.y = np.asarray(y_train)

    def predict(self, X_test):
        X_test = np.asarray(X_test)
        test_predictions = []
        for test_sample in range(X_test.shape[0]):
            neighbor_distances = []
            neighbors = []
            for train_sample in range(self.X.shape[0]):
                dist = np.linalg.norm(X_test[test_sample] - self.X[train_sample])
                neighbor_distances.append(dist)

            sorted_indices = np.argsort(neighbor_distances)
            for i in range(self.n_neighbors):
                neighbors.append(self.y[sorted_indices[i]])

            pred = Counter(neighbors).most_common(1)[0][0]
            test_predictions.append(pred)
        return np.array(test_predictions)

    def score(self, y_pred, y_true):
        y_pred = np.asarray(y_pred)
        y_true = np.asarray(y_true)
        accuracy = (y_pred == y_true).mean()
        return accuracy


In [9]:
import pandas as pd

# Dummy classification dataset
df = pd.DataFrame({
    'feature1': [1, 2, 3, 6, 7],
    'feature2': [5, 6, 7, 2, 3],
    'label':    ['A', 'A', 'A', 'B', 'B']
})

X = df[['feature1', 'feature2']]
y = df['label']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.4, random_state=42)

clf = KNNClassifier(n_neighbors=3)
clf.fit(X_train, y_train)
predictions = clf.predict(X_test)

print(predictions)


['A' 'A']
