In [None]:
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.cluster import KMeans
from sklearn.decomposition import PCA
from sklearn.manifold import TSNE
import tensorflow as tf
from tensorflow.keras import layers, models

In [None]:
(x_train, y_train), (x_test, _) = tf.keras.datasets.mnist.load_data()
x_data = x_test[:2000].astype('float32') / 255.0
x_flat = x_data.reshape((2000, -1))

In [None]:

pca_results = PCA(n_components=2).fit_transform(x_flat)
ml_labels = KMeans(n_clusters=10, n_init=10).fit_predict(pca_results)

In [None]:
tsne_results = TSNE(n_components=2, perplexity=30).fit_transform(x_flat)
tsne_labels = KMeans(n_clusters=10, n_init=10).fit_predict(tsne_results)

In [None]:
x_conv = x_data.reshape((2000, 28, 28, 1))
encoder_input = layers.Input(shape=(28, 28, 1))
x = layers.Conv2D(16, (3, 3), activation='relu', padding='same')(encoder_input)
x = layers.MaxPooling2D((2, 2), padding='same')(x)
x = layers.Conv2D(8, (3, 3), activation='relu', padding='same')(x)
encoded = layers.MaxPooling2D((2, 2), padding='same')(x)
flat = layers.Flatten()(encoded)
latent = layers.Dense(2)(flat)

In [None]:
d = layers.Dense(7 * 7 * 8, activation='relu')(latent)
d = layers.Reshape((7, 7, 8))(d)
d = layers.UpSampling2D((2, 2))(d)
d = layers.Conv2DTranspose(16, (3, 3), activation='relu', padding='same')(d)
d = layers.UpSampling2D((2, 2))(d)
decoded = layers.Conv2DTranspose(1, (3, 3), activation='sigmoid', padding='same')(d)

In [None]:
autoencoder = models.Model(encoder_input, decoded)
encoder = models.Model(encoder_input, latent)
autoencoder.compile(optimizer='adam', loss='mse')
autoencoder.fit(x_conv, x_conv, epochs=50, batch_size=64, verbose=0)

dl_latent = encoder.predict(x_conv)
dl_labels = KMeans(n_clusters=10, n_init=10).fit_predict(dl_latent)

In [None]:
plt.style.use('dark_background')
fig, axes = plt.subplots(1, 3, figsize=(24, 8))
titles = ["Baseline: PCA + KMeans", "Manifold: t-SNE", "Deep: Convolutional Autoencoder"]
data_sources = [pca_results, tsne_results, dl_latent]
cluster_labels = [ml_labels, tsne_labels, dl_labels]

for i, (data, labels, title) in enumerate(zip(data_sources, cluster_labels, titles)):
    ax = axes[i]
    scatter = ax.scatter(data[:, 0], data[:, 1], c=labels, cmap='Spectral', s=25, alpha=0.7)
    
    #
    for cluster in range(10):
        mask = labels == cluster
        if not np.any(mask): continue
        sns.kdeplot(x=data[mask, 0], y=data[mask, 1], ax=ax, levels=2, color='white', alpha=0.2)
        
    ax.set_title(title, fontsize=22, fontweight='bold', color='#00FFCC', pad=20)
    ax.axis('off')

plt.tight_layout()
plt.show()