In [2]:
from sklearn.datasets import load_diabetes
import matplotlib.pyplot as plt
import numpy as np
import math
import pandas as pd

# Euclidean/ Manhattan Distance/ Cosine Similarity/ Distance/ Minkowski Distance

In [3]:
def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def manhattan_distance(x1, x2):
    return np.sum(np.abs(x1 - x2))

def cosine_similarity(x1, x2):
    return np.dot(x1, x2) / (np.linalg.norm(x1) * np.linalg.norm(x2))

def chebyshev_distance(x1, x2):
    return np.max(np.abs(x1 - x2))

In [4]:
# Exercise 1:
print("Exercise 1: ")
x = np.array([3,4])
y = np.array([0,0])
print("Euclidean distance:", euclidean_distance(x, y))
print("Manhattan distance:", manhattan_distance(x, y))

print("\n")
# Exercise 2:
print("Exercise 2: ")
x = np.array([1, 2])
y = np.array([2, 4])
print("Cosine Similarity:", cosine_similarity(x, y))
print("Cosine Distance:", 1 - cosine_similarity(x, y))

print("\n")
# Exercise 3: 
print("Exercise 3: ")
x = np.array([5, 8, 2])
y = np.array([3, 5, 4])
print("Euclidean_distance:", euclidean_distance(x, y))
print("Manhattan_distance:", manhattan_distance(x, y))
print("Chebyshev_distance:", chebyshev_distance(x, y))

Exercise 1: 
Euclidean distance: 5.0
Manhattan distance: 7


Exercise 2: 
Cosine Similarity: 0.9999999999999998
Cosine Distance: 2.220446049250313e-16


Exercise 3: 
Euclidean_distance: 4.123105625617661
Manhattan_distance: 7
Chebyshev_distance: 3


# K-nearest Neighbor

In [5]:
def compute_Knearest_neighbors(distances, k):
    distances.sort(key=lambda x: x[0])
    lables = [dist[1] for dist in distances[:k]]
    return lables

def vote_distance(lables):
    labels_count = {}
    for lable in lables:
        if lable in labels_count:
            labels_count[lable] += 1
        else:
            labels_count[lable] = 1
    return max(labels_count, key=labels_count.get)

    

In [6]:
df = [
    {"A": [1, 2], "label": 0},
    {"B": [3, 4], "label": 0},
    {"C": [5, 6], "label": 1},
    {"D": [7, 8], "label": 1}]
P = np.array([2, 2])

distances = []
for item in df:
    for key, value in item.items():
        if key != "label":
            distance = round(float(euclidean_distance(P, value)), 2)
            distances.append((distance, item["label"], key))
            print(f"Distance from {P} to {key}: {distance}")

print("Label of P(2,2) with K-nearest neighbors = 3:", vote_distance(compute_Knearest_neighbors(distances, 3)))
print("Label of P(2,2) with K-nearest neighbors = 1:", vote_distance(compute_Knearest_neighbors(distances, 1)))


Distance from [2 2] to A: 1.0
Distance from [2 2] to B: 2.24
Distance from [2 2] to C: 5.0
Distance from [2 2] to D: 7.81
Label of P(2,2) with K-nearest neighbors = 3: 0
Label of P(2,2) with K-nearest neighbors = 1: 0


# K-nearest neighbors in 3D

In [7]:
x = np.array([2, 1, 4])
y = np.array([5, 3, 0])
print("Euclidean distance:", euclidean_distance(x, y))

Euclidean distance: 5.385164807134504


In [8]:
data = load_diabetes ()

# Impact of K on result

In [9]:
data = [
    {"point": np.array([1, 2]), "label": "A"},
    {"point": np.array([2, 3]), "label": "A"},
    {"point": np.array([3, 1]), "label": "A"},
    {"point": np.array([1, 4]), "label": "A"},
    {"point": np.array([2, 2]), "label": "A"},
    {"point": np.array([6, 5]), "label": "B"},
    {"point": np.array([7, 6]), "label": "B"},
    {"point": np.array([8, 5]), "label": "B"},
    {"point": np.array([7, 7]), "label": "B"},
]

In [10]:
for k in range(1,10):
    distances = []
    for item in data:
        distance = round(float(euclidean_distance(P, item["point"])), 2)
        distances.append((distance, item["label"]))
        
    print("Label of P(2,2) with K-nearest neighbors =", k, ":", vote_distance(compute_Knearest_neighbors(distances, k)))

Label of P(2,2) with K-nearest neighbors = 1 : A
Label of P(2,2) with K-nearest neighbors = 2 : A
Label of P(2,2) with K-nearest neighbors = 3 : A
Label of P(2,2) with K-nearest neighbors = 4 : A
Label of P(2,2) with K-nearest neighbors = 5 : A
Label of P(2,2) with K-nearest neighbors = 6 : A
Label of P(2,2) with K-nearest neighbors = 7 : A
Label of P(2,2) with K-nearest neighbors = 8 : A
Label of P(2,2) with K-nearest neighbors = 9 : A


# KNN Binary Classification

In [21]:
import math
from collections import Counter

def euclidean_distance(x1, x2):
    return np.sqrt(np.sum((x1 - x2) ** 2))

def knn_predict_binary(X_train, y_train, X_test, k=3):
    predictions = []
    for point in X_test:
        distances = []
        k_nearest_labels = []
        for train_point, label in zip(X_train, y_train):
            distance = euclidean_distance(point, train_point)
            distances.append((distance, label))
        distances.sort(key=lambda x: x[0])
        k_nearest_labels = [label for _, label in distances[:k]]
        Counter_labels = Counter(k_nearest_labels)
        predictions.append(int(Counter_labels.most_common(1)[0][0]))
    return predictions

X_train = np.array([[1 , 2] , [2 , 3] , [3 , 1] , [6 , 5]])
y_train = np.array([0, 0, 0, 1])
X_test = np.array([[2, 2],[2, 2]])
print("Predicted label:", ", ".join(map(str,knn_predict_binary(X_train, y_train, X_test, k=3))  ))

Predicted label: 0, 0


# KNN Regression

In [12]:
def knn_regression(X_train, y_train, x_test, k=3):
    predictions = []
    for point in x_test:
        distances = []
        for train_point, label in zip(X_train, y_train):
            distance = euclidean_distance(point, train_point)
            distances.append((distance, label))
            distances.sort(key=lambda x: x[0])
        k_nearest_labels = [label for _, label in distances[:k]]
        prediction = np.mean(k_nearest_labels)
        predictions.append(float(prediction))
    return predictions

In [13]:
X_train = np.array([[100], [150], [200], [250], [300]])
y_train = np.array([1.0, 1.5, 2.0, 2.5, 3.0])
x_test = np.array([[200], [150]])
print("Predicted price:", knn_regression(X_train , y_train , x_test , k=2))

Predicted price: [1.75, 1.25]


# Weighted Voting

In [14]:
def knn_weighted (X_train , y_train , x_test , k =3):
    predictions = []
    for point in x_test:
        distances = []
        for train_point, label in zip(X_train , y_train):
            distance = euclidean_distance(x_test , train_point)
            if distance == 0:
                weight = float('inf')
            else:
                weight = 1 / distance
            distances.append((distance , weight, label))
        distances.sort(key=lambda x: x[0])
        k_nearest = distances[:k]

        votes = {}
        for _, weight, label in k_nearest:
            if label in votes:
                votes[label] += weight
            else:
                votes[label] = weight
        prediction = max(votes, key=votes.get)
        predictions.append(int(prediction))
    return predictions

In [15]:
X_train = np.array([[1 , 2] , [2 , 3] , [3 , 1] , [6 , 5]])
y_train = np.array([0, 0, 0, 1])
x_test = np.array([[2, 2], [2,2]])
print("Predicted label: ", knn_weighted(X_train, y_train, x_test, k=3)) 

Predicted label:  [0, 0]
