In [None]:
# K-Nearest Neighbor For Classification
import numpy as np

# (Size, Weights)
X = np.array([[2, 3], [4, 5], [6, 7], [7, 8]])

Y = [1, 1, 0, 0]  # 1 -> Cat, 0 -> Dog

classes = {0: "dog", 1: "cat"}


def find_k(n):
    root = np.sqrt(n.shape[0])
    return int(root if root % 2 != 0 else root + 1)


def euclidean_distance(X, X_new):
    return np.expand_dims(np.sqrt(np.sum((X_new - X) ** 2, axis=1)), axis=1)


def weighted_distance(d):
    return 1 / (d - 1e-5)


def classification_majority_vote(X, X_new):
    mean = np.mean(X, axis=0) # mean of X
    std = np.std(X, axis=0)  # standard deviation of X

    # scaled X & X_new to prevent features with larger points or numbers from dominating distances
    X_scaled = (X - mean) / std 
    X_new_scaled = (X_new - mean) / std


    K = find_k(X) # pick the K nearest neighbor
    distances = euclidean_distance(X_scaled, X_new_scaled)

    # Add the calculated distance and Y into the X array i.e [size, weight, distance, class]
    X_extension = np.hstack((X_scaled, distances, np.expand_dims(Y, axis=1)))

    # sort by distances from [size, weight, distance, class]
    sort_indices = np.argsort(X_extension[:, -2])

    nearest_indices = X_extension[sort_indices][:K]

    # calculate the weighted distance 1/(d - 1e-5) so that closer neighbors get more influence
    weights = weighted_distance(nearest_indices[:, -2])

    # get the class labels, which is the last item in [size, weight, distance, class]
    labels = nearest_indices[:, -1].astype(np.int64)

    # sum up the weights per class
    vote_sums = np.bincount(labels, weights=weights)

    return classes[np.argmax(vote_sums)]


new_point = [5, 6]
prediction = classification_majority_vote(X, new_point)
print(prediction)


dog


In [None]:
# K-Nearest Neighbor For Regression
import numpy as np

X = np.array([[1], [2], [4], [6]])
Y = np.array([100, 150, 300, 360])


# K-Nearest Neighbor For Regression

X = np.array([[1], [2], [4], [6]])
Y = np.array([100, 150, 300, 360])


def regression_average(X, Y, X_new):
    mean = np.mean(X, axis=0)
    std = np.std(X, axis=0)

    X_scaled = (X - mean) / (std + 1e-8)  # scaled X values
    K = int(np.sqrt(X.shape[0]))  # K nearest neighbor

    results = []

    for i in X_new:
        X_new_scaled = (i - mean) / (std + 1e-8)


        # calc the euclidean distance of X and new X
        distances = np.sqrt(np.sum((X_new_scaled - X_scaled) ** 2, axis=1))

        idx = np.argsort(distances)[:K]  # sort the distances which returns the index

        nearest_distances = distances[idx]  # find the nearest neighbors to the K

        nearest_targets = Y[idx]  # get the nearest targets ( Y values of the X to the K)

        # get the weights which is 1/distance
        weights = 1 / (nearest_distances + 1e-5)

        """
            Calculate the weighted KNN
            formula : y = Sum( 1/d * y ) / Sum( 1/d )
        """
        results.append(np.sum(weights * nearest_targets) / np.sum(weights))
    return np.array(results)





X_train = np.array([[1], [2], [4], [6]])
Y_train = np.array([100, 150, 300, 360])
X_test = np.array([[3], [5]])  # Two points at once
regression_average(X_train, Y_train,  X_test)


array([225., 330.])