#**HOG + LDA + KMEANS**

In [None]:
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score
from sklearn.metrics import classification_report

x_train_hog = np.load('/content/drive/MyDrive/features_prml/hog_train.npy')     # Load the HoG features
x_test_hog = np.load('/content/drive/MyDrive/features_prml/hog_test.npy')
y_train = np.load('/content/drive/MyDrive/features_prml/y_train.npy')
y_test = np.load('/content/drive/MyDrive/features_prml/y_test.npy')


y_train = y_train.flatten()                                                     # Reshape labels
y_test = y_test.flatten()

print(f"Train HoG Shape: {x_train_hog.shape}, Test HoG Shape: {x_test_hog.shape}")

scaler = StandardScaler()                                                       # Normalize Features
x_train_hog = scaler.fit_transform(x_train_hog)
x_test_hog = scaler.transform(x_test_hog)
                                                                                #LDA for Dimensionality Reduction
lda = LDA(n_components=9) # n_components = classes - 1
x_train_lda = lda.fit_transform(x_train_hog, y_train)
x_test_lda = lda.transform(x_test_hog)

kmeans = KMeans(n_clusters=10, random_state=42, n_init=50)                      # Apply K-Means Clustering with 10 Clusters
kmeans.fit(x_train_lda)
y_train_pred = kmeans.predict(x_train_lda)
y_test_pred = kmeans.predict(x_test_lda)

def map_labels(y_true, y_pred):                                                 # Map cluster labels to actual labels using majority voting
    mapping = {}
    for i in range(10):
        cluster_indices = np.where(y_pred == i)[0]
        true_labels = y_true[cluster_indices]
        if len(true_labels) > 0:
            mapping[i] = np.bincount(true_labels).argmax()
    return np.array([mapping[label] for label in y_pred])

y_train_pred_mapped = map_labels(y_train, y_train_pred)
y_test_pred_mapped = map_labels(y_test, y_test_pred)


train_accuracy = accuracy_score(y_train, y_train_pred_mapped)                   # Calculate Accuracy
test_accuracy = accuracy_score(y_test, y_test_pred_mapped)

print(f"Train Accuracy: {train_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

print("Classification Report (Test):")
print(classification_report(y_test, y_test_pred_mapped))

Train HoG Shape: (50000, 324), Test HoG Shape: (10000, 324)
Train Accuracy: 52.30%
Test Accuracy: 51.07%
Classification Report (Test):
              precision    recall  f1-score   support

           0       0.66      0.49      0.56      1000
           1       0.62      0.65      0.63      1000
           2       0.50      0.32      0.39      1000
           3       0.41      0.31      0.35      1000
           4       0.38      0.58      0.46      1000
           5       0.42      0.40      0.41      1000
           6       0.45      0.60      0.52      1000
           7       0.54      0.57      0.56      1000
           8       0.60      0.55      0.58      1000
           9       0.61      0.64      0.63      1000

    accuracy                           0.51     10000
   macro avg       0.52      0.51      0.51     10000
weighted avg       0.52      0.51      0.51     10000

