# KNN

In [1]:
import numpy as np
import matplotlib.pyplot as plt



#This class initialization allows the user to create an instance of the KNN class with specific values for K and choose whether to normalize the data or not. 
#The actual training and prediction methods, such as fit and predict, will be used after the object is created to handle data and perform KNN classification.
#both self._train are initialized as 'None' until the fit method is called to train the model.
class KNN:
    def __init__(self, K=3, normalize=False):
        self.K = K
        self.normalize = normalize
        self.X_train = None
        self.y_train = None

    #Defined fit as a method within KNN that trains the model. It takes two parameters 'X' (the training data, features) and 'y' (the corresponding labels)
    #'self.X_train = X' and 'self.y_train = y' will store the training data.
    def fit(self, X, y):
        self.X_train = X
        self.y_train = y
        
        #This conditional statement checks if the normalize attribute is set to True. If the normalize attribute was set to True during the creation of the KNN object, this condition will be met.
        if self.normalize:
            self.X_train = self._normalize(self.X_train)

    #The 'predict' method predicts labels for the provided test data ('X_test'). It takes 'X_test' as input.
    def predict(self, X_test):
        if self.normalize:
            X_test = self._normalize(X_test) #Conditional statement where if the normalize attribute is set to true, it will execute the subsequent code block.

        #for loop will iterate through each test sample in 'X_test'. 
        predictions = []
        for x in X_test:
            distances = [np.linalg.norm(x - x_train) for x_train in self.X_train] #calculates the Euclidean distances
            nearest_neighbours = np.argsort(distances)[:self.K] #sorts the distances and selects the indices of the 'self.K' nearest neighbours
            nearest_labels = self.y_train[nearest_neighbours] #retrieves the labels of the self.K nearest neighbors from the stored training labels (self.y_train)
            prediction = np.argmax(np.bincount(nearest_labels)) #determines the most common label among the nearest neighbors by using np.bincount.
            predictions.append(prediction) #predicted label is stored here

        return predictions #returns a list containing the predicted labels for all the test samples in 'X_test'.

    def _normalize(self, X):
        return (X - np.mean(X, axis=0)) / np.std(X, axis=0) #This line returns the normalized input X after performing the mean subtraction and division by the standard deviation for each feature.


# Example usage:
# Create synthetic data
np.random.seed(42)
X_train = np.random.rand(100, 2) * 10
y_train = np.random.randint(0, 2, size=100)

# User inputs
Ks = int(input("Enter the number of neighbors (K): "))
normalize_option = input("Normalize data? (yes/no): ").lower() == 'yes'

# Create and fit KNN model based on user inputs
knn = KNN(K=Ks, normalize=normalize_option)
knn.fit(X_train, y_train)

# Generate test data (random points for prediction)
X_test = np.random.rand(10, 2) * 10

# Predict using the fitted model
predicted_labels = knn.predict(X_test)
print("Predicted labels:", predicted_labels)

Predicted labels: [1, 1, 1, 0, 1, 1, 1, 0, 1, 1]
