In [8]:
import numpy as np
from sklearn.cluster import DBSCAN
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.neighbors import KNeighborsClassifier
import tensorflow as tf  # For CIFAR-10 loading
from sklearn.metrics import accuracy_score

# Load and preprocess CIFAR-10
(x_train, y_train), (x_test, y_test) = tf.keras.datasets.cifar10.load_data()
x_train = x_train.astype('float32') / 255.0
x_test = x_test.astype('float32') / 255.0
x_train = x_train.reshape(x_train.shape[0], -1)  # Flatten
x_test = x_test.reshape(x_test.shape[0], -1)
y_train = y_train.flatten()
y_test = y_test.flatten()

# Standardize the data
scaler = StandardScaler()
x_train_scaled = scaler.fit_transform(x_train)
x_test_scaled = scaler.transform(x_test)

# Apply PCA
pca = PCA(n_components=100)  # Retain more variance
train_pca = pca.fit_transform(x_train_scaled)
test_pca = pca.transform(x_test_scaled)
train_labels = y_train
test_labels = y_test

# Print explained variance to verify PCA
print(f"Explained Variance Ratio (cumulative): {np.sum(pca.explained_variance_ratio_):.4f}")

# Step 1: Apply DBSCAN on training data
def apply_dbscan(train_pca, eps=20, min_samples=7):
    dbscan = DBSCAN(eps=eps, min_samples=min_samples)
    cluster_labels = dbscan.fit_predict(train_pca)
    return cluster_labels

train_cluster_labels = apply_dbscan(train_pca)

# Step 2: Map clusters to actual class labels
def map_clusters_to_labels(cluster_labels, true_labels):
    cluster_mapping = {}
    unique_clusters = set(cluster_labels) - {-1}  # Ignore noise points

    for cluster in unique_clusters:
        indices = np.where(cluster_labels == cluster)[0]
        cluster_labels = true_labels[indices]

        if len(cluster_labels) > 0:
            most_common_label = np.bincount(cluster_labels).argmax()
            cluster_mapping[cluster] = most_common_label

    return cluster_mapping

cluster_mapping = map_clusters_to_labels(train_cluster_labels, train_labels)

# Step 3: Train kNN classifier and predict test labels
def predict_with_knn(train_pca, train_labels, test_pca, k=10):
    knn = KNeighborsClassifier(n_neighbors=k)
    knn.fit(train_pca, train_labels)
    return knn.predict(test_pca)

test_cluster_labels = predict_with_knn(train_pca, train_labels, test_pca)

# Map test cluster labels to actual class labels
test_predictions = np.array([cluster_mapping.get(label, label) for label in test_cluster_labels])

# Calculate accuracy
def compute_accuracy(true_labels, predicted_labels):
    return accuracy_score(true_labels, predicted_labels)

accuracy = compute_accuracy(test_labels, test_predictions)
print("Predicted Test Labels:", test_predictions)
print(f"Accuracy: {accuracy * 100:.2f}%")

Downloading data from https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
[1m170498071/170498071[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 0us/step
Explained Variance Ratio (cumulative): 0.8983
Predicted Test Labels: [2 8 8 ... 5 6 4]
Accuracy: 30.64%
