<a href="https://colab.research.google.com/github/PXDHU/Machine-Learning/blob/main/ML_K_Nearest_Neighbour_Algorithm.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

#K-Nearest Neighbour Algorithm


###1. Implement K-NN classification algorithm with Euclidean and Manhattan Distance metrics. The program should be generic, should work for any k values on Iris dataset.
-	Keep 80% of samples for training and rest for testing
-	Show the results using both distance metrics.
-	Compare your results with Scikit/SKlearn Library function.


In [None]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score

In [None]:
class KNNClassifier:
    def __init__(self, k=3, distance_metric='euclidean'):
        self.k = k
        self.distance_metric = distance_metric

    def euclidean_distance(self, point1, point2):
        return np.sqrt(np.sum((point1 - point2) ** 2))

    def manhattan_distance(self, point1, point2):
        return np.sum(np.abs(point1 - point2))

    def _get_distance(self, point1, point2):
        if self.distance_metric == 'euclidean':
            return self.euclidean_distance(point1, point2)
        elif self.distance_metric == 'manhattan':
            return self.manhattan_distance(point1, point2)
        else:
            raise ValueError('Invalid distance metric specified.')

    def _get_k_nearest_labels(self, X_train, y_train, test_point):
        distances = []
        for i in range(len(X_train)):
            distance = self._get_distance(test_point, X_train[i])
            distances.append((distance, y_train[i]))

        distances.sort(key=lambda x: x[0])
        k_nearest_labels = [label for _, label in distances[:self.k]]
        return k_nearest_labels

    def _get_majority_vote(self, labels):
        label_counts = np.bincount(labels)
        majority_vote = np.argmax(label_counts)
        return majority_vote

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        y_pred = []
        for test_point in X_test:
            k_nearest_labels = self._get_k_nearest_labels(self.X_train, self.y_train, test_point)
            majority_vote = self._get_majority_vote(k_nearest_labels)
            y_pred.append(majority_vote)
        return y_pred


In [None]:
# Load the Iris dataset
iris = load_iris()
X = iris.data
y = iris.target

In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [None]:
# Custom K-NN Classifier
knn_classifier_custom = KNNClassifier(k=5, distance_metric='euclidean')
knn_classifier_custom.fit(X_train, y_train)
predictions_custom = knn_classifier_custom.predict(X_test)

In [None]:
# scikit-learn KNeighborsClassifier
knn_classifier_sklearn = KNeighborsClassifier(n_neighbors=5, metric='euclidean')
knn_classifier_sklearn.fit(X_train, y_train)
predictions_sklearn = knn_classifier_sklearn.predict(X_test)

In [None]:
# Compare accuracy
accuracy_custom = accuracy_score(y_test, predictions_custom)
accuracy_sklearn = accuracy_score(y_test, predictions_sklearn)

In [None]:
print("Custom K-NN Classifier accuracy:", accuracy_custom)
print("scikit-learn KNeighborsClassifier accuracy:", accuracy_sklearn)

Custom K-NN Classifier accuracy: 1.0
scikit-learn KNeighborsClassifier accuracy: 1.0


###2. Modify your K-NN implementation for regression problem.
-	Make an auxiliary dataset from Iris.csv file consisting of only sepal length and sepal width. Assume you want to predict sepal width based on sepal length values.
-	Keep 80% of samples for training and rest for testing
-	Show the results using Euclidean metric and different K-values. 
-	Use appropriate Scikit/SKlearn Library function to apply K-NN regression on the given dataset and compare the results with your implementation.


In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsRegressor
from sklearn.metrics import mean_squared_error

In [None]:
# Custom K-NN Regressor
class KNNRegressor:
    def __init__(self, k=3, distance_metric='euclidean'):
        self.k = k
        self.distance_metric = distance_metric

    def euclidean_distance(self, point1, point2):
        return np.sqrt(np.sum((point1 - point2) ** 2))

    def manhattan_distance(self, point1, point2):
        return np.sum(np.abs(point1 - point2))

    def _get_distance(self, point1, point2):
        if self.distance_metric == 'euclidean':
            return self.euclidean_distance(point1, point2)
        elif self.distance_metric == 'manhattan':
            return self.manhattan_distance(point1, point2)
        else:
            raise ValueError('Invalid distance metric specified.')

    def _get_k_nearest_values(self, X_train, y_train, test_point):
        distances = []
        for i in range(len(X_train)):
            distance = self._get_distance(test_point, X_train[i])
            distances.append((distance, y_train[i]))

        distances.sort(key=lambda x: x[0])
        k_nearest_values = [value for _, value in distances[:self.k]]
        return k_nearest_values

    def _get_average_value(self, values):
        return np.mean(values)

    def fit(self, X_train, y_train):
        self.X_train = X_train
        self.y_train = y_train

    def predict(self, X_test):
        y_pred = []
        for test_point in X_test:
            k_nearest_values = self._get_k_nearest_values(self.X_train, self.y_train, test_point)
            average_value = self._get_average_value(k_nearest_values)
            y_pred.append(average_value)
        return y_pred

In [None]:
# Load the Iris dataset and extract sepal length and sepal width
iris_df = pd.read_csv('Iris.csv')
X = iris_df[['SepalLengthCm']].values
y = iris_df['SepalWidthCm'].values


In [None]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)


In [None]:
# Custom K-NN Regressor
knn_regressor_custom = KNNRegressor(k=3, distance_metric='euclidean')
knn_regressor_custom.fit(X_train, y_train)
predictions_custom = knn_regressor_custom.predict(X_test)
mse_custom = mean_squared_error(y_test, predictions_custom)

In [None]:
# scikit-learn KNeighborsRegressor
knn_regressor_sklearn = KNeighborsRegressor(n_neighbors=3, metric='euclidean')
knn_regressor_sklearn.fit(X_train, y_train)
predictions_sklearn = knn_regressor_sklearn.predict(X_test)
mse_sklearn = mean_squared_error(y_test, predictions_sklearn)

In [None]:
print("Custom K-NN Regressor MSE:", mse_custom)
print("scikit-learn KNeighborsRegressor MSE:", mse_sklearn)

Custom K-NN Regressor MSE: 0.13229629629629627
scikit-learn KNeighborsRegressor MSE: 0.14244444444444443
