In [None]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import tensorflow as tf

from sklearn.metrics import accuracy_score, precision_score, recall_score
from sklearn.model_selection import train_test_split
from tensorflow.keras import layers, losses
from tensorflow.keras.datasets import mnist
from tensorflow.keras.models import Model


In [None]:
# Set hyperparameters
input_dim = 784  # Dimensionality of input data
encoding_dim = 64  # Dimensionality of the code layer
num_clusters = 10  # Number of clusters
lambda_value = 0.1  # Hyperparameter controlling the trade-off between reconstruction and clustering


In [None]:
# Load and preprocess the dataset
(x_train, _), (x_test, _) = mnist.load_data()
x_train = x_train.astype('float32') / 255.
x_test = x_test.astype('float32') / 255.

print (x_train.shape)
print (x_test.shape)

In [None]:
# Initialize sample assignment randomly
sample_assignments = np.random.randint(num_clusters, size=len(x_train))

# Initialize cluster centers
cluster_centers = tf.Variable(tf.random.normal(shape=(num_clusters, encoding_dim)))

In [None]:
latent_dim = 64

class Autoencoder(Model):
  def __init__(self, latent_dim):
    super(Autoencoder, self).__init__()
    self.latent_dim = latent_dim
    self.encoder = tf.keras.Sequential([
      layers.Flatten(),
      layers.Dense(latent_dim, activation='relu'),
    ])
    self.decoder = tf.keras.Sequential([
      layers.Dense(784, activation='sigmoid'),
      layers.Reshape((28, 28))
    ])

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded



autoencoder = Autoencoder(latent_dim)


In [None]:
def clustering_loss(y_true, y_pred):
  # Compute the reconstruction loss
  reconstruction_loss = tf.keras.losses.mean_squared_error(y_true, y_pred)

  # Compute the clustering loss
  encoded_inputs = autoencoder.encoder(y_true)  # Get the encoded representations
  expanded_centers = tf.expand_dims(cluster_centers, axis=0)
  distances = tf.reduce_sum(tf.square(tf.expand_dims(encoded_inputs, axis=1) - expanded_centers), axis=2)
  closest_cluster = tf.argmin(distances, axis=1)
  clustering_loss = tf.reduce_mean(tf.reduce_sum(distances, axis=1))

  # Combine the reconstruction loss and clustering loss
  total_loss = reconstruction_loss - lambda_value * clustering_loss

  return total_loss



In [None]:
# Define the update equation for cluster centers
def update_cluster_centers(X, sample_assignments):
    cluster_centers = []
    for j in range(num_clusters):
        samples_in_cluster = X[sample_assignments == j]
        cluster_center = tf.reduce_mean(samples_in_cluster, axis=0)
        cluster_centers.append(cluster_center)
    return cluster_centers

In [None]:
autoencoder = Autoencoder(latent_dim)
autoencoder.compile(optimizer = 'adam', loss = clustering_loss)

In [None]:
# Training loop
max_iterations = 200  # Maximum number of iterations
x_train_reshaped = x_train.reshape(-1, 784)
for t in range(max_iterations):
    # Train the auto-encoder
    autoencoder.fit(x_train, x_train, batch_size=6400, epochs=1)

    # Update the cluster centers
    cluster_centers.assign(update_cluster_centers(autoencoder.encoder.predict(x_train), sample_assignments))

    # Update the sample assignment
    encoded_samples = autoencoder.encoder(x_train_reshaped)

    # Compute distances between encoded samples and cluster centers
    distances = tf.reduce_sum(tf.square(tf.expand_dims(encoded_samples, axis=1) - cluster_centers), axis=2)

    # Assign each sample to the closest cluster
    sample_assignments = tf.argmin(distances, axis=1)


# Final sample assignment
final_sample_assignment = sample_assignments


In [None]:
final_sample_assignment = sample_assignments

In [None]:
from sklearn.datasets import fetch_openml

mnist = fetch_openml('mnist_784')

labels = mnist.target
labels = [int(i) for i in labels]

In [None]:
from sklearn.metrics import adjusted_rand_score, silhouette_score

In [None]:
ari = adjusted_rand_score(labels[:60000], final_sample_assignment)

In [None]:
labels[0:10]

In [None]:
final_sample_assignment[0:10]