<a href="https://colab.research.google.com/github/Sugaminni/Machine-Learning-Assignment-1/blob/main/ML_Assignment_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Imports required libraries
from sklearn.datasets import load_wine
import pandas as pd
import numpy as np

# Loads Wine dataset
wine = load_wine()

# Converts to a pandas DataFrame
wine_df = pd.DataFrame(wine.data, columns=wine.feature_names)
wine_df['target'] = wine.target

In [None]:
# Separates features and labels
X = wine_df.drop("target", axis=1).values
y = wine_df["target"].values

# Shuffles data
idx = np.random.permutation(len(X))
X, y = X[idx], y[idx]

# 70% training, 30% testing split
split = int(0.7 * len(X))
Xtr, Xte = X[:split], X[split:]
ytr, yte = y[:split], y[split:]

# Standardizes using training statistics only
mu = Xtr.mean(axis=0)
sd = Xtr.std(axis=0)
sd[sd == 0] = 1
Xtr = (Xtr - mu) / sd
Xte = (Xte - mu) / sd

In [None]:
def knn1(Xtr, ytr, Xte):
    preds = []
    for x in Xte:
        # Euclidean distance to all training samples
        dists = np.linalg.norm(Xtr - x, axis=1)
        # Label of nearest neighbor
        preds.append(ytr[np.argmin(dists)])
    return np.array(preds)

def accuracy(y_true, y_pred):
    return (y_true == y_pred).mean()

# Pure KNN classification
yp_knn = knn1(Xtr, ytr, Xte)
print("KNN accuracy:", accuracy(yte, yp_knn))


In [None]:
def pca(X, k):
    # Center data
    Xc = X - X.mean(axis=0)
    # Covariance matrix
    C = np.cov(Xc, rowvar=False)
    # Eigen decomposition
    vals, vecs = np.linalg.eigh(C)
    # Top-k eigenvectors
    return vecs[:, np.argsort(vals)[::-1][:k]]

# Reduces to 2 dimensions
W_pca = pca(Xtr, 2)
Xtr_p = Xtr @ W_pca
Xte_p = Xte @ W_pca

# KNN on PCA-reduced data
yp_pca = knn1(Xtr_p, ytr, Xte_p)
print("PCA + KNN accuracy:", accuracy(yte, yp_pca))

In [None]:
def lda(X, y, k):
    classes = np.unique(y)
    mean_total = X.mean(axis=0)

    Sw = np.zeros((X.shape[1], X.shape[1]))  # Within-class scatter
    Sb = np.zeros_like(Sw)                   # Between-class scatter

    for c in classes:
        Xc = X[y == c]
        mean_c = Xc.mean(axis=0)
        Sw += (Xc - mean_c).T @ (Xc - mean_c)
        diff = (mean_c - mean_total).reshape(-1, 1)
        Sb += Xc.shape[0] * (diff @ diff.T)

    # Solves generalized eigenvalue problem
    vals, vecs = np.linalg.eig(np.linalg.pinv(Sw) @ Sb)
    return vecs[:, np.argsort(vals.real)[::-1][:k]].real

# Projects data using LDA
W_lda = lda(Xtr, ytr, 2)
Xtr_l = Xtr @ W_lda
Xte_l = Xte @ W_lda

# KNN on LDA-reduced data
yp_lda = knn1(Xtr_l, ytr, Xte_l)
print("LDA + KNN accuracy:", accuracy(yte, yp_lda))

In [None]:
def lvq(Xtr, ytr, Xte, lr=0.01, epochs=30):
    classes = np.unique(ytr)

    # Initialize one prototype per class
    protos = np.array([Xtr[ytr == c][0] for c in classes])
    proto_labels = classes.copy()

    # Trains LVQ
    for _ in range(epochs):
        for x, y in zip(Xtr, ytr):
            dists = np.linalg.norm(protos - x, axis=1)
            i = np.argmin(dists)
            if proto_labels[i] == y:
                protos[i] += lr * (x - protos[i])
            else:
                protos[i] -= lr * (x - protos[i])

    # Classifies test data
    preds = []
    for x in Xte:
        dists = np.linalg.norm(protos - x, axis=1)
        preds.append(proto_labels[np.argmin(dists)])
    return np.array(preds)

yp_lvq = lvq(Xtr, ytr, Xte)
print("LVQ accuracy:", accuracy(yte, yp_lvq))

The KNN Classifier (k = 1) was able to achieve a high accuracy by labeling the samples based on the nearest neighbor. With PCA commbined with KNN producing similar but lower accuarcy since PCA doesn't use class labels. LDA combined with KNN showed improved classification by maximizing class separation in the reduced feature space.
Therefore, the LVQ algorithm showed lower accuracy since each class was represented by one prototype. Finally, LDA gave the best classification results.