In [1]:
import torch
import numpy as np
from torch import nn, optim
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from torch.utils.data import DataLoader, TensorDataset

In [None]:

# add random seed for reproducibility
torch.manual_seed(0)
np.random.seed(0)

# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data

# Standardize the dataset
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)

# Create a dataset and data loader
dataset = TensorDataset(X_tensor)
data_loader = DataLoader(dataset, batch_size=10, shuffle=True)

# Define the clustering neural network
class ClusteringNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_clusters):
        super(ClusteringNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, n_clusters),
            nn.Softmax(dim=1)
        )
        # Initialize centroids as a learnable parameter
        self.centroids = nn.Parameter(torch.rand(n_clusters, hidden_dim))

    def forward(self, x):
        # Apply the network and get raw scores for each cluster
        cluster_scores = self.network(x)
        return cluster_scores


# Initialize the network and optimizer
input_dim = 4  # Iris dataset has 4 features
hidden_dim = 10
n_clusters = 3  # Iris dataset has 3 classes
model = ClusteringNetwork(input_dim, hidden_dim, n_clusters)
optimizer = optim.Adam(model.parameters(), lr=0.1)


def wcss_loss(outputs, centroids):
    """Calculate the Within-Cluster Sum of Squares (WCSS) loss."""
    norm_squared = torch.sum((outputs - centroids).pow(2), 2)
    min_norm_squared = torch.min(norm_squared, 1)[0]
    wcss = torch.sum(min_norm_squared)
    return wcss


# Train the network
def train(model, data_loader, optimizer, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for inputs in data_loader:
            features = inputs[0]  # Get the input features
            optimizer.zero_grad()
            outputs = model(features)
            loss = wcss_loss(outputs.unsqueeze(1), model.centroids.t())
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        
        # Debugging prints
        print("Centroids after epoch {}: {}".format(epoch+1, model.centroids.data.t()))
        print("Gradients: {}".format(model.centroids.grad))
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(data_loader)}\n')

# Train the model
train(model, data_loader, optimizer, epochs=10)

In [None]:
# torch.Size([10, 3])
# tensor([[0.3036, 0.2849, 0.4116],
#         [0.3302, 0.2863, 0.3836],
#         [0.3303, 0.3056, 0.3641],
#         [0.3335, 0.3320, 0.3345],
#         [0.3299, 0.4534, 0.2166],
#         [0.3259, 0.2868, 0.3873],
#         [0.3382, 0.4570, 0.2048],
#         [0.3442, 0.2998, 0.3560],
#         [0.3183, 0.2796, 0.4022],
#         [0.3578, 0.4651, 0.1771]], grad_fn=<SoftmaxBackward0>)

In [None]:
# Evaluate the clustering performance using silhouette score
def evaluate_clustering(model, data_loader):
    model.eval()
    all_outputs = []
    with torch.no_grad():
        for inputs, in data_loader:
            inputs = inputs[0]  # Get the input features
            outputs = model(inputs)
            all_outputs.extend(outputs.numpy())
    
    # Infer cluster assignments
    cluster_assignments = torch.argmax(torch.tensor(all_outputs), dim=1).numpy()
    
    # Calculate silhouette score using inferred cluster assignments
    silhouette_avg = silhouette_score(all_outputs, cluster_assignments)
    print(f'Silhouette Score (unsupervised): {silhouette_avg}')

# Evaluate the model
evaluate_clustering(model, data_loader)


In [5]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn import datasets
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import silhouette_score
from torch.utils.data import DataLoader, TensorDataset



# add random seed for reproducibility
# torch.manual_seed(10)
# np.random.seed(10)


# Load the Iris dataset
iris = datasets.load_iris()
X = iris.data

# Standardize the dataset
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

# Convert to PyTorch tensors
X_tensor = torch.tensor(X_scaled, dtype=torch.float32)

# Create a dataset and data loader
dataset = TensorDataset(X_tensor)
data_loader = DataLoader(dataset, batch_size=10, shuffle=True)

# Define the clustering neural network
class ClusteringNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dim, n_clusters):
        super(ClusteringNetwork, self).__init__()
        self.network = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Linear(hidden_dim, hidden_dim),
            # nn.ReLU()
        )
        # Initialize centroids as a learnable parameter
        # self.centroids = nn.Parameter(torch.rand(n_clusters, hidden_dim))
        # Initialize centroids as a learnable parameter using Xavier initialization
        self.centroids = nn.Parameter(torch.empty(n_clusters, 1))
        torch.nn.init.xavier_uniform_(self.centroids)


    def forward(self, x):
        # Apply the network to get features for each cluster
        features = self.network(x)
        return features

# Initialize the network and optimizer
input_dim = 4  # Iris dataset has 4 features
hidden_dim = 10
n_clusters = 3  # Iris dataset has 3 classes
model = ClusteringNetwork(input_dim, hidden_dim, n_clusters)
optimizer = optim.Adam(model.parameters(), lr=0.1)
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=50, gamma=0.5)

def wcss_loss(outputs, centroids):
    """Calculate the Within-Cluster Sum of Squares (WCSS) loss."""
    norm_squared = torch.sum((outputs.unsqueeze(1) - centroids.unsqueeze(0)).pow(2), dim=2)
    min_norm_squared = torch.min(norm_squared, dim=1)[0]
    wcss = torch.sum(min_norm_squared)
    return wcss

# Train the network
def train(model, data_loader, optimizer, scheduler, epochs):
    model.train()
    for epoch in range(epochs):
        total_loss = 0
        for inputs in data_loader:
            features = inputs[0]  # Get the input features
            optimizer.zero_grad()
            outputs = model(features)
            loss = wcss_loss(outputs, model.centroids)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()  # Update the learning rate
        print("Centroids after epoch {}: {}".format(epoch+1, model.centroids.data.t()))
        # print out gradients
        print("Gradients: {}".format(model.centroids.grad))
        print(f'Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(data_loader)}\n')

# Train the model
train(model, data_loader, optimizer, scheduler, epochs=100)

# # Evaluate the clustering performance using silhouette score
# model.eval()
# with torch.no_grad():
#     features = model(X_tensor)
#     centroids = model.centroids.data
#     # Assign clusters based on closest centroid
#     distances = torch.cdist(features, centroids)
#     cluster_assignments = torch.argmin(distances, dim=1).numpy()
#     score = silhouette_score(X_scaled, cluster_assignments)
#     print(f'Silhouette Score: {score}')


Centroids after epoch 1: tensor([[-0.5226,  0.7414, -0.2015]])
Gradients: tensor([[0.0000],
        [0.0000],
        [4.5320]])
Epoch 1/100, Loss: 5.031632578372955

Centroids after epoch 2: tensor([[-0.3001,  0.7414, -0.2077]])
Gradients: tensor([[0.0000],
        [0.0000],
        [3.8936]])
Epoch 2/100, Loss: 0.3410863310098648

Centroids after epoch 3: tensor([[-0.2383,  0.7414, -0.2162]])
Gradients: tensor([[0.0000],
        [0.0000],
        [1.1383]])
Epoch 3/100, Loss: 0.04908597556253274

Centroids after epoch 4: tensor([[-0.3020,  0.7414, -0.2149]])
Gradients: tensor([[ 0.0000],
        [ 0.0000],
        [-0.5082]])
Epoch 4/100, Loss: 0.018527679983526467

Centroids after epoch 5: tensor([[-0.3411,  0.7414, -0.2119]])
Gradients: tensor([[ 0.0000],
        [ 0.0000],
        [-0.1182]])
Epoch 5/100, Loss: 0.002193997660651803

Centroids after epoch 6: tensor([[-0.3501,  0.7414, -0.2116]])
Gradients: tensor([[0.0000],
        [0.0000],
        [0.0914]])
Epoch 6/100, Loss: 0.