# Implementing K Nearest Neighbours

In [1]:
import numpy as np 
import matplotlib.pyplot as plt

In [7]:
def calculate_distance(point1, point2):
    return np.sqrt(np.sum((np.array(point1) - np.array(point2))**2))


In [8]:
def k_nearest_neighbours(query_point, data, labels, k):
    distance_list = []
    for i in range(len(data)):
        distance = calculate_distance(query_point, data[i])
        distance_list.append((distance, labels[i]))
    
    distance_list.sort(key=(lambda x : x[0]))
    k_points = distance_list[:k]
    return k_points

In [9]:
def predict_value(query_point, data, values, k):
    nearest_neighbours = k_nearest_neighbours(query_point, data, values, k)
    return np.mean(nearest_neighbours)

In [10]:
from collections import Counter

# Step 3: Make a Prediction for Classification
def predict_class(query_point, data, labels, k):
    # Get the k nearest neighbors
    neighbors = k_nearest_neighbours(query_point, data, labels, k)
    
    # Count the occurrences of each class label among the neighbors
    label_counts = Counter(neighbors)
    
    # Return the most common label (majority vote)
    return label_counts.most_common(1)[0][0]

In [11]:
def evaluate_knn(data, labels, test_data, test_labels, k, is_classification=True):
    predictions = []
    for i in range(len(test_data)):
        if is_classification:
            pred = predict_class(test_data[i], data, labels, k)
        else:
            pred = predict_value(test_data[i], data, labels, k)
        predictions.append(pred)
    
    if is_classification:
        accuracy = sum(1 for i in range(len(test_labels)) if predictions[i] == test_labels[i]) / len(test_labels)
        return accuracy
    else:
        mse = np.mean((np.array(predictions) - np.array(test_labels)) ** 2)
        return mse

In [12]:
# Example Usage (Classification)
# Let's say you have a dataset with points and labels
data = [[1, 2], [2, 3], [3, 3], [6, 8], [7, 9], [8, 8]]
labels = ['A', 'A', 'A', 'B', 'B', 'B']
query_point = [3, 4]

# Predict class for the query point
k = 3
predicted_class = predict_class(query_point, data, labels, k)
print("Predicted Class:", predicted_class)

# Example Usage (Regression)
# Let's say you have data points with continuous values instead of labels
values = [1.5, 1.7, 2.0, 6.0, 6.5, 7.0]
predicted_value = predict_value(query_point, data, values, k)
print("Predicted Value:", predicted_value)

# Evaluating the model on test data (Example for Classification)
test_data = [[1, 2], [2, 4], [3, 2], [8, 9]]
test_labels = ['A', 'A', 'A', 'B']
accuracy = evaluate_knn(data, labels, test_data, test_labels, k, is_classification=True)
print("Accuracy:", accuracy)

# Example Evaluation for Regression (using continuous values as labels)
test_values = [1.4, 1.6, 1.8, 6.8]
mse = evaluate_knn(data, values, test_data, test_values, k, is_classification=False)
print("Mean Squared Error:", mse)

Predicted Class: (np.float64(1.0), 'A')
Predicted Value: 1.740440114519881
Accuracy: 0.0
Mean Squared Error: 2.0336754156841628
