#**CNN+ LDA+ KMEANS**

In [None]:
import numpy as np
from sklearn.cluster import KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.discriminant_analysis import LinearDiscriminantAnalysis as LDA
from sklearn.metrics import accuracy_score

x_train_cnn = np.load('/content/drive/MyDrive/features_prml/cnn_train.npy')
x_test_cnn = np.load('/content/drive/MyDrive/features_prml/cnn_test.npy')
y_train = np.load('/content/drive/MyDrive/features_prml/y_train.npy')
y_test = np.load('/content/drive/MyDrive/features_prml/y_test.npy')

y_train = y_train.flatten()
y_test = y_test.flatten()

print(f"Train cnn Shape: {x_train_cnn.shape}, Test cnn Shape: {x_test_cnn.shape}")


                                                                                # Perform LDA for Dimensionality Reduction
lda = LDA(n_components=9) # n_components = classes - 1
x_train_lda = lda.fit_transform(x_train_cnn, y_train)
x_test_lda = lda.transform(x_test_cnn)


kmeans_cnn_lda = KMeans(n_clusters=10, random_state=42, n_init=50)              # Apply K-Means Clustering with 10 Clusters
kmeans_cnn_lda.fit(x_train_lda)
y_train_pred = kmeans_cnn_lda.predict(x_train_lda)
y_test_pred = kmeans_cnn_lda.predict(x_test_lda)


def map_labels(y_true, y_pred):                                                 # Map cluster labels to actual labels using majority voting
    mapping = {}
    for i in range(10):
        cluster_indices = np.where(y_pred == i)[0]
        true_labels = y_true[cluster_indices]
        if len(true_labels) > 0:
            mapping[i] = np.bincount(true_labels).argmax()
    return np.array([mapping[label] for label in y_pred])

y_train_pred_mapped = map_labels(y_train, y_train_pred)
y_test_pred_mapped = map_labels(y_test, y_test_pred)


train_accuracy = accuracy_score(y_train, y_train_pred_mapped)                   # Calculate Accuracy
test_accuracy = accuracy_score(y_test, y_test_pred_mapped)

print(f"Train Accuracy: {train_accuracy * 100:.2f}%")
print(f"Test Accuracy: {test_accuracy * 100:.2f}%")

from sklearn.metrics import classification_report


print("Classification Report (Test):")
print(classification_report(y_test, y_test_pred_mapped))


Train cnn Shape: (50000, 2048), Test cnn Shape: (10000, 2048)
Train Accuracy: 90.99%
Test Accuracy: 89.02%
Classification Report (Test):
              precision    recall  f1-score   support

           0       0.89      0.94      0.91      1000
           1       0.96      0.92      0.94      1000
           2       0.92      0.82      0.87      1000
           3       0.70      0.89      0.78      1000
           4       0.83      0.90      0.86      1000
           5       0.92      0.78      0.85      1000
           6       0.92      0.90      0.91      1000
           7       0.95      0.87      0.91      1000
           8       0.95      0.95      0.95      1000
           9       0.94      0.93      0.93      1000

    accuracy                           0.89     10000
   macro avg       0.90      0.89      0.89     10000
weighted avg       0.90      0.89      0.89     10000



In [None]:
import pickle
import os

save_dir = "/content/drive/MyDrive/prml/checkpoints"
os.makedirs(save_dir, exist_ok=True)

checkpoint = {
    "kmeans_model": kmeans_cnn_lda,
    "lda_model": lda,
    "x_train_lda": x_train_lda,
    "y_train_pred": y_train_pred,  # for finding cluster members
    "x_train_images": x_train_images,  # shape: (50000, 32, 32, 3)
    "train_accuracy": train_accuracy,
    "test_accuracy": test_accuracy
}

with open(os.path.join(save_dir, "cnn_kmeans_lda_checkpoint.pkl"), "wb") as f:
    pickle.dump(checkpoint, f)

print("Checkpoint saved successfully.")


Checkpoint saved successfully.


In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import numpy as np
import torchvision
from torchvision import datasets, transforms

# Load CIFAR-10 training images
transform = transforms.Compose([
    transforms.ToTensor()
])

trainset = datasets.CIFAR10(root="./data", train=True, download=True, transform=transform)

# Convert images to numpy array
x_train_images = np.stack([np.transpose(img.numpy(), (1, 2, 0)) * 255 for img, _ in trainset]).astype(np.uint8)


In [None]:
import pickle
import os

save_dir = "/content/drive/MyDrive/prml/checkpoints"
os.makedirs(save_dir, exist_ok=True)

checkpoint = {
    "kmeans_model": kmeans_cnn_lda,
    "lda_model": lda,
    "x_train_lda": x_train_lda,
    "y_train_pred": y_train_pred,  # for finding cluster members
    "x_train_images": x_train_images,  # shape: (50000, 32, 32, 3)
    "train_accuracy": train_accuracy,
    "test_accuracy": test_accuracy
}

with open(os.path.join(save_dir, "cnn_kmeans_lda_checkpoint.pkl"), "wb") as f:
    pickle.dump(checkpoint, f)

print("Checkpoint saved successfully.")


Checkpoint saved successfully.
