In [86]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import precision_score, recall_score, f1_score, accuracy_score, confusion_matrix
from sklearn.model_selection import KFold
from sklearn.neighbors import KNeighborsClassifier
import statistics
import math

In [87]:
# Euclidean distance function
def euclidean_distance(x1, x2):
    sum = 0
    for i in range(len(x1)):
      sum += (x1[i]-x2[i])**2
    return math.sqrt(sum)

# KNN classifier
def knn_predict(X_train, y_train, x_test, k):
    distances = [euclidean_distance(x, x_test) for x in X_train]
    nearest_indices = np.argsort(distances)[:k]
    nearest_labels = [y_train[i] for i in nearest_indices]
    most_common_string = max(set(nearest_labels), key=nearest_labels.count)
    return most_common_string

# Evaluate KNN classifier
def evaluate_classifier(X_train, y_train, X_test, y_test, k):
    predictions = [knn_predict(X_train, y_train, x_test, k) for x_test in X_test]

    # Calculate evaluation metrics
    precision = precision_score(y_test, predictions, average='weighted')
    recall = recall_score(y_test, predictions, average='weighted')
    f1 = f1_score(y_test, predictions, average='weighted')
    accuracy = accuracy_score(y_test, predictions)
    # print(list(y_test))
    confusion_mat = confusion_matrix(list(y_test), predictions)
    # print(list(y_test),(predictions))
    return precision, recall, f1, accuracy, confusion_mat

In [88]:
# Load and split data
data = pd.read_csv("iris.csv")
data = data.values.tolist()
data = np.array(data)
X = data[:,:4]
X = X.astype(float)
y = data[:,4]

In [89]:
y

array(['Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-setosa', 'Iris-setosa',
       'Iris-setosa', 'Iris-setosa', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versicolor',
       'Iris-versicolor', 'Iris-versicolor', 'Iris-versic

## Without Cross Validation and without in-build KNN model

In [90]:
# Perform classification for k=3 and k=5
k_values = [3, 5]
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
for k in k_values:
    precision, recall, f1, accuracy, confusion_mat = evaluate_classifier(X_train, y_train, X_test, y_test, k)
    print(f"Results for k={k}:")
    print("Precision:", precision)
    print("Recall:", recall)
    print("F1 Score:", f1)
    print("Accuracy:", accuracy)
    print("Confusion Matrix:")
    print(confusion_mat)
    print("\n")

Results for k=3:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 1.0
Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]


Results for k=5:
Precision: 1.0
Recall: 1.0
F1 Score: 1.0
Accuracy: 1.0
Confusion Matrix:
[[10  0  0]
 [ 0  9  0]
 [ 0  0 11]]




## With Cross Validation and without in-build KNN model

In [92]:
k_values = [3, 5]

k_fold = KFold(n_splits=10, shuffle=True, random_state=42)

precisions = []
recalls = []
f1s = []
accuracies = []


# count = 0

for k in k_values:
  count = 0
  confusion_matx = []
  for train_index, test_index in k_fold.split(X_train):
    x_train = X[train_index]
    Y_train = y[train_index]
    precision, recall, f1, accuracy, confusion_mat = evaluate_classifier(x_train, Y_train, X_test, y_test, k)
    # print(confusion_mat)
    if(count==0):
      count+=1
      confusion_matx = confusion_mat
    else:
      confusion_matx+=confusion_mat
    precisions.append(precision)
    recalls.append(recall)
    f1s.append(f1)
    accuracies.append(accuracy)
  print(f"Results for k={k}:")
  print("Precision:", statistics.mean(precisions))
  print("Recall:", statistics.mean(recalls))
  print("F1 Score:", statistics.mean(f1s))
  print("Accuracy:", statistics.mean(accuracies))
  print("Confusion Matrix:")
  print(confusion_matx/10)
  print("\n")

Results for k=3:
Precision: 0.9700000000000001
Recall: 0.9666666666666667
F1 Score: 0.966750208855472
Accuracy: 0.9666666666666667
Confusion Matrix:
[[10.  0.  0.]
 [ 0.  9.  0.]
 [ 0.  1. 10.]]


Results for k=5:
Precision: 0.9700000000000001
Recall: 0.9666666666666667
F1 Score: 0.966750208855472
Accuracy: 0.9666666666666667
Confusion Matrix:
[[10.  0.  0.]
 [ 0.  9.  0.]
 [ 0.  1. 10.]]


