In [44]:
import torch
import numpy as np
from torch import nn, optim
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from torch.utils.data import DataLoader, TensorDataset

In [55]:

# add random seed for reproducibility
# torch.manual_seed(0)
# np.random.seed(0)

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data

# Standardize the dataset
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)

# Create a dataset and data loader
dataset = TensorDataset(X_tensor)
data_loader = DataLoader(dataset, batch_size=10, shuffle=True)

# Define the clustering neural network
class ClusteringNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_clusters):
        super(ClusteringNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, n_clusters),
            nn.Softmax(dim=1)
        )
        # Initialize centroids as a learnable parameter
        self.centroids = nn.Parameter(torch.rand(n_clusters, hidden_dim))

    def forward(self, x):
        # Apply the network and get raw scores for each cluster
        cluster_scores = self.network(x)
        return cluster_scores


# Initialize the network and optimizer
input_dim = 4  # Iris dataset has 4 features
hidden_dim = 10
n_clusters = 3  # Iris dataset has 3 classes
model = ClusteringNetwork(input_dim, hidden_dim, n_clusters)
optimizer = optim.Adam(model.parameters(), lr=0.01)


def wcss_loss(outputs, centroids):
    """Calculate the Within-Cluster Sum of Squares (WCSS) loss."""
    norm_squared = torch.sum((outputs - centroids).pow(2), 2)
    min_norm_squared = torch.min(norm_squared, 1)[0]
    wcss = torch.sum(min_norm_squared)
    return wcss


# Train the network
def train(model, data_loader, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for inputs in data_loader:
            features = inputs[0]  # Get the input features
            optimizer.zero_grad()
            outputs = model(features)
            loss = wcss_loss(outputs.unsqueeze(1), model.centroids.t())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        # Debugging prints
        print("Centroids after epoch {}: {}".format(epoch+1, model.centroids.data.t()))
        print("Gradients: {}".format(model.centroids.grad))
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(data_loader)}\n')

# Train the model
train(model, data_loader, optimizer, epochs=10)

Centroids after epoch 1: tensor([[0.0012, 0.6870, 0.3742],
        [0.5936, 0.0051, 0.0237],
        [0.4158, 0.1757, 0.4910],
        [0.4177, 0.7497, 0.1235],
        [0.2132, 0.5093, 0.2366],
        [0.6923, 0.1100, 0.4725],
        [0.2377, 0.3294, 0.4563],
        [0.6833, 0.9704, 0.2952],
        [0.7529, 0.8369, 0.7967],
        [0.8579, 0.2820, 0.1957]])
Gradients: tensor([[ 0.0000,  0.0000,  0.0000,  0.0000, -1.0330,  0.0000,  0.0061,  0.0000,
          0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000, -0.5641,  0.0000, -0.1602,  0.0000,
          0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000,  0.0000,  0.6947,  0.0000,  0.1996,  0.0000,
          0.0000,  0.0000]])
Epoch 1/10, Loss: 0.2735993872086207

Centroids after epoch 2: tensor([[0.0012, 0.6870, 0.3742],
        [0.5936, 0.0051, 0.0237],
        [0.4158, 0.1757, 0.4910],
        [0.4177, 0.7497, 0.1235],
        [0.2707, 0.5139, 0.2515],
        [0.6923, 0.1100, 0.4725],
        [0.2157, 0.3794, 0.4

In [None]:
# torch.Size([10, 3])
# tensor([[0.3036, 0.2849, 0.4116],
#         [0.3302, 0.2863, 0.3836],
#         [0.3303, 0.3056, 0.3641],
#         [0.3335, 0.3320, 0.3345],
#         [0.3299, 0.4534, 0.2166],
#         [0.3259, 0.2868, 0.3873],
#         [0.3382, 0.4570, 0.2048],
#         [0.3442, 0.2998, 0.3560],
#         [0.3183, 0.2796, 0.4022],
#         [0.3578, 0.4651, 0.1771]], grad_fn=<SoftmaxBackward0>)

In [None]:







# Evaluate the clustering performance using silhouette score
def evaluate_clustering(model, data_loader):
    model.eval()
    all_outputs = []
    with torch.no_grad():
        for inputs, in data_loader:
            inputs = inputs[0]  # Get the input features
            outputs = model(inputs)
            all_outputs.extend(outputs.numpy())
    
    # Infer cluster assignments
    cluster_assignments = torch.argmax(torch.tensor(all_outputs), dim=1).numpy()
    
    # Calculate silhouette score using inferred cluster assignments
    silhouette_avg = silhouette_score(all_outputs, cluster_assignments)
    print(f'Silhouette Score (unsupervised): {silhouette_avg}')

# Evaluate the model
evaluate_clustering(model, data_loader)
