# AIML231 Assignement One - Part Two
> Implementation of the K-Nearest Neighbor classifier

## Imports

In [None]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from tqdm.notebook import tqdm

## Function Definitions

In [None]:
def euclidean_distance(point1, point2):
    """Calculate the Euclidean distance between two points."""
    return sum((p - q) ** 2 for p, q in zip(point1, point2)) ** 0.5

def knn_classifier(train_data, train_labels, test_data, k=3):
    """Simple k-NN classifier."""
    predictions = []

    for test_point in tqdm(test_data):
        # Calculate distances from the current test point to all training points
        distances = [euclidean_distance(test_point, train_point) for train_point in train_data]

        # Get the indices of the k nearest neighbors
        k_indices = np.argsort(distances)[:k]

        # Get the labels of the k nearest neighbors
        k_nearest_labels = [train_labels[i] for i in k_indices]

        # Determine the most common label among the k nearest neighbors
        most_common_label = Counter(k_nearest_labels).most_common(1)[0][0]

        # Append the predicted label to the predictions list
        predictions.append(most_common_label)

    return np.array(predictions)

## Read and Split the Dataset

In [None]:
df = pd.read_csv('data/banknotes_new.csv')
X, y = df.drop('Class', axis=1),  df['Class']

# Use function StandardScaler().fit_transform() to normalize the value range of each feature in the dataset.
X_std = StandardScaler().fit_transform(X)

# Conduct training data and test data split randomly with a 50:50 ratio, setting random_state=100
X_train, X_test, y_train, y_test = train_test_split(X_std, y, test_size=0.5, random_state=100)
y_train = y_train.values
y_test = y_test.values

## Set K Value and Train the Model

In [None]:
k=400 # Set the value of k.
test_label = knn_classifier(X_train, y_train, X_test, k=k)
print(test_label) # View the predicion array

## View the Accuracy of the Trained Model

In [None]:
# Calculate and print the accuracy of the knn classifier on the test dataset.
accuracy = 1.0-np.sum(np.abs(y_test - test_label))/y_test.shape[0]
print(f'The accuracy of the knn classifier is {accuracy} when k={k}')