<a href="https://colab.research.google.com/github/Atikur295/Machine-Learning-Lab/blob/main/Lab_report_2.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import numpy as np
from collections import Counter

class CustomKNN:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = np.array(X_train)
        self.y_train = np.array(y_train)

    def predict(self, X_test):
        return [self._predict_point(x) for x in X_test]

    def _predict_point(self, x):
        distances = np.linalg.norm(self.X_train - x, axis=1)
        k_indices = distances.argsort()[:self.k]
        k_nearest_labels = self.y_train[k_indices]
        most_common = Counter(k_nearest_labels).most_common(1)
        return most_common[0][0]


In [5]:
def accuracy(y_true, y_pred):
    return np.mean(np.array(y_true) == np.array(y_pred))

def confusion_matrix(y_true, y_pred):
    labels = np.unique(y_true)
    cm = np.zeros((len(labels), len(labels)), dtype=int)
    label_to_index = {label: idx for idx, label in enumerate(labels)}
    for true, pred in zip(y_true, y_pred):
        cm[label_to_index[true]][label_to_index[pred]] += 1
    return cm

def precision_recall_f1(y_true, y_pred):
    cm = confusion_matrix(y_true, y_pred)
    TP = np.diag(cm)
    FP = np.sum(cm, axis=0) - TP
    FN = np.sum(cm, axis=1) - TP
    precision = np.mean(TP / (TP + FP + 1e-10))
    recall = np.mean(TP / (TP + FN + 1e-10))
    f1 = 2 * precision * recall / (precision + recall + 1e-10)
    return precision, recall, f1


In [8]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

data = load_iris()
X = data.data
y = data.target

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)


In [9]:
# Train custom KNN
model = CustomKNN(k=3)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Evaluate using custom metrics
acc = accuracy(y_test, y_pred)
prec, rec, f1 = precision_recall_f1(y_test, y_pred)

print("Accuracy:", acc)
print("Precision:", prec)
print("Recall:", rec)
print("F1-score:", f1)


Accuracy: 1.0
Precision: 0.9999999999931175
Recall: 0.9999999999931175
F1-score: 0.9999999999431175


In [14]:
def evaluate_knn_custom(X, y, k_values, split_ratios):
    results = []
    for split in split_ratios:
        X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=split, random_state=42)
        for k in k_values:
            model = CustomKNN(k)
            model.fit(X_train, y_train)
            y_pred = model.predict(X_test)
            acc = accuracy(y_test, y_pred)
            prec, rec, f1 = precision_recall_f1(y_test, y_pred)
            results.append((k, split, acc, prec, rec, f1))
    return results

k_values = [1, 3, 5, 7]
split_ratios = [0.2, 0.3, 0.4]
results = evaluate_knn_custom(X, y, k_values, split_ratios)

for r in results:
    print(f"K={r[0]}, Split={r[1]}, Acc={r[2]:.2f}, Prec={r[3]:.2f}, Rec={r[4]:.2f}, F1={r[5]:.2f}")


K=1, Split=0.2, Acc=1.00, Prec=1.00, Rec=1.00, F1=1.00
K=3, Split=0.2, Acc=1.00, Prec=1.00, Rec=1.00, F1=1.00
K=5, Split=0.2, Acc=1.00, Prec=1.00, Rec=1.00, F1=1.00
K=7, Split=0.2, Acc=0.97, Prec=0.97, Rec=0.96, F1=0.97
K=1, Split=0.3, Acc=1.00, Prec=1.00, Rec=1.00, F1=1.00
K=3, Split=0.3, Acc=1.00, Prec=1.00, Rec=1.00, F1=1.00
K=5, Split=0.3, Acc=1.00, Prec=1.00, Rec=1.00, F1=1.00
K=7, Split=0.3, Acc=1.00, Prec=1.00, Rec=1.00, F1=1.00
K=1, Split=0.4, Acc=0.98, Prec=0.98, Rec=0.98, F1=0.98
K=3, Split=0.4, Acc=0.98, Prec=0.98, Rec=0.98, F1=0.98
K=5, Split=0.4, Acc=0.98, Prec=0.98, Rec=0.98, F1=0.98
K=7, Split=0.4, Acc=0.98, Prec=0.98, Rec=0.98, F1=0.98


In [11]:
from sklearn.neighbors import KNeighborsClassifier

def evaluate_sklearn_knn(X_train, X_test, y_train, y_test, k):
    model = KNeighborsClassifier(n_neighbors=k)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    acc = accuracy(y_test, y_pred)
    prec, rec, f1 = precision_recall_f1(y_test, y_pred)
    return acc, prec, rec, f1

# Example comparison using best k=3 and split=0.3
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
acc, prec, rec, f1 = evaluate_sklearn_knn(X_train, X_test, y_train, y_test, k=3)

print("Sklearn KNN - Accuracy:", acc)
print("Precision:", prec)
print("Recall:", rec)
print("F1-score:", f1)


Sklearn KNN - Accuracy: 1.0
Precision: 0.9999999999931175
Recall: 0.9999999999931175
F1-score: 0.9999999999431175
