# Assignment: K Nearest Neighbor
## Problem 1 (15 points)
In this problem, you will implement a Distance-Weighted Nearest Neighbor
Classifier and run it on a 2-dimensional dataset. (You can’t use built-in
knn functions in Python to do this problem). You have to experiment
with diﬀerent distance measures and observe their influence on the classification
performance.
The training data has two classes as shown in figure 1. The training set
along with test points and their correct label are saved in knnData.csv.
Apply 3-NN and report your accuracy rate on test points for the following
distance measures:
1. L2 norm
2. L1 norm
3. L∞ norm

In [2]:
import pandas as pd
import numpy as np
from collections import Counter

# Reading the csv file
df = pd.read_csv('knnData.csv')

# Obtaining training and test data
train_data = df.iloc[:, 0:3]
test_data = df.iloc[:, -3:]


# Building functions to obtain L2, L1 and Linf distances
def l2_distance(x1, x2):
    return np.sqrt(np.sum((x1-x2)**2))

def l1_distance(x1, x2):
    return np.sum(np.abs(x1-x2))

def linf_distance(x1, x2):
    return np.max(np.abs(x1-x2))

# Building a function to obtain tke k nearest neighbors
def get_k_nearest_neighbors(train_data, test_point, k, distance_function):
    distances = []
    for _, row in train_data.iterrows():
        distance = distance_function(np.array([row.iloc[0], row.iloc[1]]), np.array(test_point))
        distances.append((distance, row.iloc[2]))
    distances.sort(key=lambda x: x[0])
    return distances[:k]

# Building a function to implement a distance-weighted knn classifier
def weighted_knn_classification(train_data, test_point, k, distance_function):
    neighbors = get_k_nearest_neighbors(train_data, test_point, k, distance_function)
    weight_sum = Counter()
    for distance, cls in neighbors:
        weight = 1 / (distance**2 + 1e-5)     # I add a small value to avoid zero division
        weight_sum[cls] += weight
    return weight_sum.most_common(1)[0][0]

# Building a function to report accuracy rate
def compute_accuracy(train_data, test_data, k, distance_function):
    correct_predictions = 0
    for _, row in test_data.iterrows():
        prediction = weighted_knn_classification(train_data, [row.iloc[0], row.iloc[1]], k, distance_function)
        if prediction == row.iloc[2]:
            correct_predictions += 1
    return correct_predictions / len(test_data)

# Obtaining accuracy for L2, L1 and Linf (with k=3)

k = 3
accuracy_l2 = compute_accuracy(train_data, test_data, k, l2_distance)
accuracy_l1 = compute_accuracy(train_data, test_data, k, l1_distance)
accuracy_linf = compute_accuracy(train_data, test_data, k, linf_distance)

print(f'Accuracy using L2 norm: {accuracy_l2:.3f}')
print(f'Accuracy using L1 norm: {accuracy_l1:.3f}')
print(f'Accuracy using Linf norm: {accuracy_linf:.3f}')





Accuracy using L2 norm: 0.825
Accuracy using L1 norm: 0.875
Accuracy using Linf norm: 0.875
