<a href="https://colab.research.google.com/github/HRKhan-DS/Machine_learning_scratch/blob/main/knn_scratch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import confusion_matrix, accuracy_score
from sklearn import datasets

iris = datasets.load_iris()

X = iris.data
y = iris.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

class KnnClassifier:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
      self.X_train = X_train
      self.y_train = y_train

    def _euclidean_distance(self, x1, x2):
        return np.sqrt(np.sum((x1 - x2)**2))

    def _get_neighbors(self, X_test):
        distances = [self._euclidean_distance(x, X_test) for x in self.X_train]
        neighbors_indices = np.argsort(distances)[:self.k]
        return neighbors_indices

    def _vote(self, neighbors):
        neighbor_labels = [self.y_train[i] for i in neighbors]
        most_common = Counter(neighbor_labels).most_common(1)[0][0]
        return most_common

    def predict(self, X_test):
        predictions = [self._vote(self._get_neighbors(x)) for x in X_test]
        return np.array(predictions)
# Create an instance of the KNN classifier with k=3
knn = KnnClassifier(k=3)

# Fit the model on the training data
knn.fit(X_train, y_train)

# Predict labels for both training and test data
y_train_pred = knn.predict(X_train)
y_test_pred = knn.predict(X_test)

# Calculate train and test accuracy
train_accuracy = accuracy_score(y_train, y_train_pred)
test_accuracy = accuracy_score(y_test, y_test_pred)

# Display the results
print(f"Train Accuracy: {train_accuracy:.2f}")
print(f"Test Accuracy: {test_accuracy:.2f}")

# Generate confusion matrix for test data
conf_matrix = confusion_matrix(y_test, y_test_pred)
print("\nConfusion Matrix (Test Data):")
print(conf_matrix)


Train Accuracy: 0.94
Test Accuracy: 1.00

Confusion Matrix (Test Data):
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]
