# Autoencoder for Clustering

In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import numpy as np
import matplotlib.pyplot as plt

In [None]:
# Generate synthetic 2D data for clustering
def generate_data():
    cluster_1 = np.random.randn(100, 2) + np.array([2, 2])
    cluster_2 = np.random.randn(100, 2) + np.array([-2, -2])
    data = np.vstack([cluster_1, cluster_2])
    np.random.shuffle(data)
    return data

In [None]:
# Neural Network Autoencoder for clustering
class Autoencoder(nn.Module):
    def __init__(self):
        super(Autoencoder, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 2)
        )
        self.decoder = nn.Sequential(
            nn.Linear(2, 4),
            nn.ReLU(),
            nn.Linear(4, 2)
        )

    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return encoded, decoded


In [None]:
# Prepare data
data = generate_data()
data_tensor = torch.FloatTensor(data)

In [None]:
# Visualize the randomly generated data
plt.figure(figsize=(8, 6))
plt.scatter(data[:, 0], data[:, 1], c='gray', marker='o', label='Data points')
plt.title('Randomly Generated Data')
plt.xlabel('Dimension 1')
plt.ylabel('Dimension 2')
plt.show()

In [None]:
# Initialize model, loss function, and optimizer
model = Autoencoder()
criterion = nn.MSELoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)

In [None]:
# Training loop
epochs = 1000
for epoch in range(epochs):
    optimizer.zero_grad()
    encoded, decoded = model(data_tensor)
    loss = criterion(decoded, data_tensor)
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 100 == 0:
        print(f"Epoch [{epoch + 1}/{epochs}], Loss: {loss.item():.4f}")


In [None]:
# Get the encoded representations
encoded_data, _ = model(data_tensor)
encoded_data = encoded_data.detach().numpy()


In [None]:
# Perform simple clustering based on encoded values
cluster_labels = (encoded_data[:, 0] > 0).astype(int)

In [None]:
# Visualize the results
plt.figure(figsize=(8, 6))
plt.scatter(data[:, 0], data[:, 1], c=cluster_labels, cmap='viridis', s=50)
plt.title("Clustering with Autoencoder")
plt.xlabel("X1")
plt.ylabel("X2")
plt.colorbar(label="Cluster")
plt.show()