In [14]:
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_wine
from sklearn.decomposition import PCA
import pandas as pd
import numpy as np

In [7]:
wine = load_wine()
X = wine.data
y = wine.target
feature_names = wine.feature_names

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=42, stratify=y)

print("Train:", X_train.shape, "Test:", X_test.shape)

Train: (124, 13) Test: (54, 13)


In [10]:
knn_orig = KNeighborsClassifier(n_neighbors=3)
knn_orig.fit(X_train, y_train)
y_pred_orig = knn_orig.predict(X_test)
acc_orig = accuracy_score(y_test, y_pred_orig)

print("KNN (original features) accuracy =", acc_orig)

KNN (original features) accuracy = 0.6851851851851852


In [13]:
pca = PCA(n_components=2)
X_train_pca = pca.fit_transform(X_train)
X_test_pca = pca.transform(X_test)

knn_pca = KNeighborsClassifier(n_neighbors=3)
knn_pca.fit(X_train_pca, y_train)
y_pred_pca = knn_pca.predict(X_test_pca)
acc_pca = accuracy_score(y_test, y_pred_pca)

print("KNN (PCA=2 features) accuracy =", acc_pca)

KNN (PCA=2 features) accuracy = 0.6666666666666666


In [16]:
alcohol_index = feature_names.index("alcohol")

def rule_based_predict(X):
    alc = X[:, alcohol_index]
    preds = np.zeros_like(alc, dtype=int)
    preds[alc <= 12] = 0
    preds[(alc > 12) & (alc <= 13)] = 1
    preds[alc > 13] = 2
    return preds

y_pred_rule = rule_based_predict(X_test)
acc_rule = accuracy_score(y_test, y_pred_rule)
print("Rule-Based accuracy =", acc_rule)

Rule-Based accuracy = 0.42592592592592593


In [None]:
metrics = ["euclidean", "manhattan", "cosine", "mahalanobis", "chebyshev"]
results = []

cov = np.cov(X_train_pca.T)
VI = np.linalg.inv(cov)

for metric in metrics:
    if metric == "mahalanobis":
        knn = KNeighborsClassifier(n_neighbors=3, metric=metric, metric_params={'VI': VI})
    else:
        knn = KNeighborsClassifier(n_neighbors=3, metric=metric)

    knn.fit(X_train_pca, y_train)
    y_pred = knn.predict(X_test_pca)
    results.append((metric, accuracy_score(y_test, y_pred)))

print("Distance Metric Results:")
for m, a in results:
    print(m, ":", a)