In [2]:
import numpy as np
import pandas as pd
from collections import Counter
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

# Load dataset (assuming you have downloaded the file)
data = pd.read_csv('Iris.csv').iloc[:, 1:]  # Ignore the first column (ID)

X = data.iloc[:, :-1].values  # Features
y = data.iloc[:, -1].values   # Target

# Split the dataset: 90% training and 10% testing
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.1, random_state=42)


In [3]:
# Step 1: Define the function to compute Euclidean distance
def euclidean_distance(point1, point2):
    return np.sqrt(np.sum((np.array(point1) - np.array(point2))**2))

# Step 2: Define the KNN function
def knn_predict(X_train, y_train, test_point, k=3):
    distances = []
    for i in range(len(X_train)):
        distance = euclidean_distance(X_train[i], test_point)
        distances.append((distance, y_train[i]))  # Append as a tuple (distance, label)
    
    # Sort by distance (ascending order) and select the k nearest neighbors
    distances.sort(key=lambda x: x[0])
    k_nearest_neighbors = distances[:k]
    
    # Extract the labels of the k nearest neighbors
    k_nearest_labels = [label for (_, label) in k_nearest_neighbors]
    
    # Predict the most common label
    prediction = Counter(k_nearest_labels).most_common(1)[0][0]
    return prediction

# Step 3: Prediction for all test points
def knn_predict_all(X_train, y_train, X_test, k=3):
    predictions = []
    for test_point in X_test:
        prediction = knn_predict(X_train, y_train, test_point, k)
        predictions.append(prediction)
    return predictions


In [4]:
# Step 4: Grid search to find optimal value of k
def grid_search_k(X_train, y_train, X_test, y_test, k_values):
    best_k = k_values[0]
    best_accuracy = 0
    
    for k in k_values:
        predictions = knn_predict_all(X_train, y_train, X_test, k)
        accuracy = accuracy_score(y_test, predictions)
        
        if accuracy > best_accuracy:
            best_accuracy = accuracy
            best_k = k
            
        print(f"K={k}, Accuracy={accuracy}")
    
    return best_k, best_accuracy

# Try different values of k (e.g., from 1 to 10)
k_values = list(range(1, 11))

# Perform Grid Search
best_k, best_accuracy = grid_search_k(X_train, y_train, X_test, y_test, k_values)

print(f"Best K: {best_k} with Accuracy: {best_accuracy}")


K=1, Accuracy=1.0
K=2, Accuracy=1.0
K=3, Accuracy=1.0
K=4, Accuracy=1.0
K=5, Accuracy=1.0
K=6, Accuracy=1.0
K=7, Accuracy=0.9333333333333333
K=8, Accuracy=1.0
K=9, Accuracy=1.0
K=10, Accuracy=1.0
Best K: 1 with Accuracy: 1.0
