### K-Nearest Neighbor Algorithmus:

#### Parameter:

dataset = (X, Y)  
mit X := Features  
und Y := Classes

K := Hyperparameter für die nächsten k Nachbarn  
sample := Neuer Datenpunkt zum Klassifizieren

#### Pseudo-Code:

kNN (dataset, K, sample):

-   Bestimme von jedem Punkt $p \in dataset$ die Distanz, mit der geg. Distanzfunktion.
-   Bestimme die $K$ nächst gelegenen Nachbarn und bestimme die zugehörige Klasse von $sample$.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

np.random.seed(0)



def generate_dataset() -> tuple:
    """Generates three classes with a total of 29 datapoints."""
    
    num_samples_class1 = 10
    num_samples_class2 = 6
    num_samples_class3 = 13
    cov = np.array([[1, 0], [0, 1]])
    
    data1 = np.random.multivariate_normal(
        np.array([0, 0]), cov, num_samples_class1
    )
    data2 = np.random.multivariate_normal(
        np.array([-10, 4]), cov, num_samples_class2
    )
    data3 = np.random.multivariate_normal(
        np.array([10, 10]), cov, num_samples_class3
    )
    
    data = np.concatenate((data1, data2, data3), axis=0)
    
    classes = np.array(
        [0 for i in range(num_samples_class1)]
        + [1 for i in range(num_samples_class2)]
        + [2 for i in range(num_samples_class3)]
    )
    return data, classes

x, y = generate_dataset()

print(f"\nx array: {x.shape}")
print(f"y array: {y.shape}:\n")

print(f"x:\n{x[:]}")
print(f"\ny:\n{y[:]}")

In [None]:

def plot_dataset(x: np.ndarray, y: np.ndarray) -> None:
    """Plots the three datasets with an indivudual color for each class."""
    
    colors = ["red", "blue", "green"]
    for index, point in enumerate(x):
        plt.scatter(point[0], point[1], color=colors[y[index]])
    plt.show()
    
plot_dataset(x, y)

## KNN Implementation

In [None]:
class KNeighborClassifier:
    """Defines a class beased on sklearn nomenclature."""
    
    def __init__(self, n_neighbors: int = 5) -> None:
        """Initialize attributes to get the Inpiutdata without a return"""
        self.n_neighbors = n_neighbors
        self.X: np.ndarray # indicating a matrix
        self.y: np.ndarray # indicating a vector
        self.num_classes: int
            
    
    def _distance(self, p1: np.ndarray, p2: np.ndarray) -> float:
        """Internal function to calcute the distance between two points"""
        
        return np.linalg.norm(p1 - p2)
    
    
    def kneighbors(self, X: np.ndarray) -> np.ndarray:
        """
        calculates the distance of each sample in the test-dataset (X)
        to each datapoint (x_i) of the datapoints(self.X)
        """
        
        distances = np.array(
            [[self._distance(sample, x_i) for x_i in self.X] for sample in X]
        )
        
        # gives the first three closest Indexs back
        sorted_distances_idxs = np.argsort(distances[:])
        kneighbors_idxs = sorted_distances_idxs[:, : self.n_neighbors]
        return kneighbors_idxs
    
    
    def fit(self, X: np.ndarray, y: np.ndarray) -> None:
        """quasi Training Dataset- fits the datapoints to classes
        an calulates the unique class value
        """
        self.X = X
        self.y = y
        self.num_classes = len(np.unique(self.y))
        
    def _vote_class(self, kneighbors_idxs: np.ndarray) -> int:
        """Internal Function: calculates the corresponding class of the nierest neighbor first
        an then counts how many times each class appeared. 
        """
        votes = np.zeros(shape=(self.num_classes))
        neighbors_classes = self.y[kneighbors_idxs]
        for neighbor_class in neighbors_classes:
            votes[neighbor_class] += 1
        voted_class = np.argmax(votes)
        return voted_class
    
    def predict(self, X: np.ndarray) -> np.ndarray:
        """Predicts the class of the nearest neighbors of a given test-Dataset."""
        
        kneighbors_idxs = self.kneighbors(X)
        y_pred = np.array(
            [
                self._vote_class(kneighbor_idxs)
                for kneighbor_idxs in kneighbors_idxs
            ]
        )
        return y_pred
    
    
    def score(self, X: np.ndarray, y: np.ndarray) -> float:
        """Calculates the accuracy of the Model-prediction."""
        
        y_pred = self.predict(X)
        accuracy = np.mean(y_pred == y)
        return accuracy
    
    

In [None]:
x_test = np.array([[0.0, 4.0], [-5.0, 4.0]])
y_test = np.array([0, 1])

# defines the classifier object based on sklearn nomenclature
clf = KNeighborClassifier(n_neighbors=3)
clf.fit(x, y)

y_pred = clf.predict(x_test)
accuracy = clf.score(x_test, y_test)


print(f"x_test:\n{x_test}")
print(f"\ny_test:\n{y_test}")
print(f"\ny_pred:\n{y_pred}")
print(f"\nScore: {accuracy}")