In [1]:

import pandas as pd
from sklearn.model_selection import cross_val_score
import numpy as np
from scipy.spatial.distance import mahalanobis
from collections import Counter
from sklearn.metrics import confusion_matrix, accuracy_score, precision_score, recall_score, f1_score

df_train=pd.read_csv('IntClasses/MinMax/train_Int_MM_PCA10.csv')
df_valid=pd.read_csv('IntClasses/MinMax/valid_Int_MM_PCA10.csv')
df_test=pd.read_csv('IntClasses/MinMax/test_Int_MM_PCA10.csv')

In [2]:
class KNNMahalanobis:
    def __init__(self, k=3):
        self.k = k

    def fit(self, X_train, y_train):
        self.X_train = np.array(X_train)
        self.y_train = np.array(y_train)
        cov_matrix = np.cov(self.X_train.T)
        self.S_inv = np.linalg.inv(cov_matrix)

    def _mahalanobis_distances(self, x):
        return [mahalanobis(x, train_x, self.S_inv) for train_x in self.X_train]

    def predict(self, X_test):
        X_test = np.array(X_test)
        predictions = []
        for x in X_test:
            distances = self._mahalanobis_distances(x)
            k_indices = np.argsort(distances)[:self.k]
            k_labels = self.y_train[k_indices]
            most_common = Counter(k_labels).most_common(1)[0][0]
            predictions.append(most_common)
        return predictions



X_train_orig = df_train.iloc[:, :-1]
y_train_orig = df_train.iloc[:, -1]
X_test_orig = df_test.iloc[:, :-1]
y_test_orig = df_test.iloc[:, -1]

k = 7
knn = KNNMahalanobis(k=k)
knn.fit(X_train_orig, y_train_orig)
y_pred = knn.predict(X_test_orig)

cm = confusion_matrix(y_test_orig, y_pred)
acc = accuracy_score(y_test_orig, y_pred)
precision = precision_score(y_test_orig, y_pred, zero_division=0)
recall = recall_score(y_test_orig, y_pred, zero_division=0)
f1 = f1_score(y_test_orig, y_pred, zero_division=0)

print(f"K={k}")  
print("Confusion Matrix:")
print(cm)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

accuracy_scores = []
accuracy_scores.append(acc)


K=7
Confusion Matrix:
[[2814  230]
 [ 707  249]]
Accuracy: 0.7658, Precision: 0.5198, Recall: 0.2605, F1-score: 0.3470



In [3]:
df_train=pd.read_csv('IntClasses/MinMax/train_Int_MM_PCA20.csv')
df_valid=pd.read_csv('IntClasses/MinMax/valid_Int_MM_PCA20.csv')
df_test=pd.read_csv('IntClasses/MinMax/test_Int_MM_PCA20.csv')

In [4]:

X_train_orig = df_train.iloc[:, :-1]
y_train_orig = df_train.iloc[:, -1]
X_test_orig = df_test.iloc[:, :-1]
y_test_orig = df_test.iloc[:, -1]

k = 7
knn = KNNMahalanobis(k=k)
knn.fit(X_train_orig, y_train_orig)
y_pred = knn.predict(X_test_orig)

cm = confusion_matrix(y_test_orig, y_pred)
acc = accuracy_score(y_test_orig, y_pred)
precision = precision_score(y_test_orig, y_pred, zero_division=0)
recall = recall_score(y_test_orig, y_pred, zero_division=0)
f1 = f1_score(y_test_orig, y_pred, zero_division=0)

print(f"K={k}")  
print("Confusion Matrix:")
print(cm)
print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

accuracy_scores = []
accuracy_scores.append(acc)


K=7
Confusion Matrix:
[[2980   64]
 [ 420  536]]
Accuracy: 0.8790, Precision: 0.8933, Recall: 0.5607, F1-score: 0.6889



In [5]:
df_train=pd.read_csv('IntClasses/MinMax/train_Int_MM_PCA25.csv')
df_valid=pd.read_csv('IntClasses/MinMax/valid_Int_MM_PCA25.csv')
df_test=pd.read_csv('IntClasses/MinMax/test_Int_MM_PCA25.csv')

In [6]:
class KNNMahalanobis:
    def __init__(self):
        self.X_train = None
        self.y_train = None
        self.S_inv = None
        self.distance_matrix = None

    def fit(self, X_train, y_train):
        self.X_train = np.array(X_train)
        self.y_train = np.array(y_train)
        cov_matrix = np.cov(self.X_train.T)
        self.S_inv = np.linalg.inv(cov_matrix)

    def compute_distance_matrix(self, X_test):
        X_test = np.array(X_test)
        self.distance_matrix = np.zeros((len(X_test), len(self.X_train)))

        for i, test_point in enumerate(X_test):
            for j, train_point in enumerate(self.X_train):
                self.distance_matrix[i, j] = mahalanobis(test_point, train_point, self.S_inv)

    def predict(self, k):
        predictions = []
        for dist_row in self.distance_matrix:
            k_indices = np.argsort(dist_row)[:k]
            k_labels = self.y_train[k_indices]
            most_common = Counter(k_labels).most_common(1)[0][0]
            predictions.append(most_common)
        return predictions

X_train_orig = df_train.iloc[:, :-1]
y_train_orig = df_train.iloc[:, -1]
X_test_orig = df_test.iloc[:, :-1]
y_test_orig = df_test.iloc[:, -1]

knn = KNNMahalanobis()
knn.fit(X_train_orig, y_train_orig)
knn.compute_distance_matrix(X_test_orig)

k_values = [1, 3, 5, 7, 11]
accuracy_scores = []

for k in k_values:
    y_pred = knn.predict(k)

    cm = confusion_matrix(y_test_orig, y_pred)
    acc = accuracy_score(y_test_orig, y_pred)
    precision = precision_score(y_test_orig, y_pred, zero_division=0)
    recall = recall_score(y_test_orig, y_pred, zero_division=0)
    f1 = f1_score(y_test_orig, y_pred, zero_division=0)

    print(f"K={k}")  
    print("Confusion Matrix:")
    print(cm)
    print(f"Accuracy: {acc:.4f}, Precision: {precision:.4f}, Recall: {recall:.4f}, F1-score: {f1:.4f}\n")

    accuracy_scores.append(acc)


K=1
Confusion Matrix:
[[2985   59]
 [ 134  822]]
Accuracy: 0.9517, Precision: 0.9330, Recall: 0.8598, F1-score: 0.8949

K=3
Confusion Matrix:
[[3023   21]
 [  99  857]]
Accuracy: 0.9700, Precision: 0.9761, Recall: 0.8964, F1-score: 0.9346

K=5
Confusion Matrix:
[[3032   12]
 [  89  867]]
Accuracy: 0.9748, Precision: 0.9863, Recall: 0.9069, F1-score: 0.9450

K=7
Confusion Matrix:
[[3035    9]
 [  92  864]]
Accuracy: 0.9748, Precision: 0.9897, Recall: 0.9038, F1-score: 0.9448

K=11
Confusion Matrix:
[[3036    8]
 [  82  874]]
Accuracy: 0.9775, Precision: 0.9909, Recall: 0.9142, F1-score: 0.9510

