<a href="https://colab.research.google.com/github/Thorfinn05/Machine-Learning-4thSem/blob/main/KNN_ErrorRate.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# KNN with Error Rate

In [2]:
import math
from collections import Counter


Distance function

In [3]:
def distance(p1, p2):
    return math.sqrt(sum((x - y) ** 2 for x, y in zip(p1, p2)))


KNN function

In [4]:
def knn(train_data, test_point, k=3):
    distances = []
    for features, label in train_data:
        d = distance(features, test_point)
        distances.append((d, label))

    # Sort by distance
    distances.sort(key=lambda x: x[0])

    # Pick k nearest neighbors
    k_nearest = distances[:k]

    # Majority vote
    labels = [label for _, label in k_nearest]
    most_common = Counter(labels).most_common(1)[0][0]
    return most_common


Training data (features, label)

In [5]:
train_data = [
    ([1, 2], "ClassA"),
    ([2, 3], "ClassA"),
    ([3, 3], "ClassA"),
    ([6, 8], "ClassB"),
    ([7, 7], "ClassB"),
    ([8, 9], "ClassB")
]

Step 4: Test the model

In [6]:
test_point = [5, 5]
predicted_class = knn(train_data, test_point, k=3)

In [7]:
print("Test Point:", test_point)

Test Point: [5, 5]


In [8]:
print("Predicted Class:", predicted_class)

Predicted Class: ClassB


Test data with some tricky points

In [9]:
test_data = [
    ([1.5, 2.0], "ClassA"),   # should be ClassA
    ([2.5, 3.0], "ClassA"),   # should be ClassA
    ([7.0, 8.0], "ClassB"),   # should be ClassB
    ([5.0, 5.0], "ClassA"),   # ambiguous → might get misclassified
    ([6.5, 6.5], "ClassA"),   # closer to ClassB cluster → will likely be wrong
]

Evaluate error rate

In [10]:
k = 3
errors = 0
for features, true_label in test_data:
    pred = knn(train_data, features, k)
    print(f"Point {features} → Predicted: {pred}, Actual: {true_label}")
    if pred != true_label:
        errors += 1

total = len(test_data)
error_rate = errors / total


Point [1.5, 2.0] → Predicted: ClassA, Actual: ClassA
Point [2.5, 3.0] → Predicted: ClassA, Actual: ClassA
Point [7.0, 8.0] → Predicted: ClassB, Actual: ClassB
Point [5.0, 5.0] → Predicted: ClassB, Actual: ClassA
Point [6.5, 6.5] → Predicted: ClassB, Actual: ClassA


In [11]:
print("\nTotal Test Points:", total)


Total Test Points: 5


In [12]:
print("Errors:", errors)

Errors: 2


In [13]:
print("Error Rate:", error_rate)

Error Rate: 0.4


In [14]:
print("Accuracy:", 1 - error_rate)

Accuracy: 0.6
