<a href="https://colab.research.google.com/github/Nikhilesh-075/6thSem-ML-Lab/blob/main/KNN.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import numpy as np
from collections import Counter

# 1. Load the dataset (using Iris dataset as an example)
def load_iris_data():
    from sklearn.datasets import load_iris
    data = load_iris()
    X = data.data
    y = data.target
    return X, y

# 2. Calculate Euclidean distance between two points
def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((point1 - point2) ** 2))

# 3. KNN Algorithm - Predict a label for a single data point
def knn_predict(X_train, y_train, test_point, k=3):
    # Step 1: Calculate the distance between the test point and all training points
    distances = [euclidean_distance(test_point, x_train) for x_train in X_train]

    # Step 2: Sort by distance and return indices of the first k neighbors
    k_indices = np.argsort(distances)[:k]

    # Step 3: Get the labels of the k nearest neighbors
    k_nearest_labels = [y_train[i] for i in k_indices]

    # Step 4: Return the most common class label among the k neighbors
    most_common = Counter(k_nearest_labels).most_common(1)
    return most_common[0][0]

# 4. KNN Algorithm - Predict labels for all test points
def knn_predict_all(X_train, y_train, X_test, k=3):
    predictions = [knn_predict(X_train, y_train, test_point, k) for test_point in X_test]
    return predictions

# 5. Evaluate the accuracy of the model
def evaluate_model(X_train, y_train, X_test, y_test, k=3):
    predictions = knn_predict_all(X_train, y_train, X_test, k)
    accuracy = np.mean(predictions == y_test)
    return accuracy

# 6. Split the data into training and testing sets
def train_test_split(X, y, test_size=0.2):
    # Calculate the number of test samples
    num_test_samples = int(len(X) * test_size)

    # Shuffle the dataset indices
    indices = np.random.permutation(len(X))

    # Split the data into train and test
    X_train, X_test = X[indices[num_test_samples:]], X[indices[:num_test_samples]]
    y_train, y_test = y[indices[num_test_samples:]], y[indices[:num_test_samples]]

    return X_train, X_test, y_train, y_test

# 7. Example of usage
if __name__ == "__main__":
    # Load data
    X, y = load_iris_data()

    # Split data into training and testing sets
    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)

    # Evaluate the model
    accuracy = evaluate_model(X_train, y_train, X_test, y_test, k=3)

    print(f"Accuracy: {accuracy * 100:.2f}%")

Accuracy: 96.67%
