In [1]:
import os

import time

import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init

import torchvision.transforms as transforms

from PIL import Image

from sklearn.cluster import KMeans, AgglomerativeClustering, SpectralClustering

from torch.utils.data import Dataset, DataLoader

from sklearn.metrics import silhouette_score, calinski_harabasz_score

In [6]:
class LazyDataset(Dataset):
    def __init__(self, image_dir, transform=None):
        self.image_dir = image_dir
        self.image_filenames = os.listdir(image_dir)
        self.transform = transform

    def __len__(self):
        return len(self.image_filenames)
    
    def __getitem__(self, idx):
        img_path = os.path.join(self.image_dir, self.image_filenames[idx])
        image = Image.open(img_path).convert("RGB")
        if self.transform:
            image = self.transform(image)
        return image

In [7]:
def get_transform():
    return transforms.Compose([
        transforms.Resize((368,368)),
        transforms.ToTensor()
    ])

In [None]:
class Autoencoder(nn.Module):
    def __init__(self, latent_dim):
        super().__init__()
        self.latent_dim = latent_dim

        # Encoder
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(64, 128, 3, stride=2, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, latent_dim, 3, stride=2, padding=1),
        )

        # Decoder
        self.decoder = nn.Sequential(
            nn.ConvTranspose2d(latent_dim, 128, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(128, 64, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 3, stride=2, padding=1, output_padding=1),
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 3, stride=2, padding=1, output_padding=1),
            nn.Sigmoid(),
        )
    
    def _init_weights(self, m):
        if isinstance(m, (nn.Conv2d, nn.ConvTranspose2d)):
            init.kaiming_uniform_(m.weight, nonlinearity='relu')
            if m.bias is not None:
                init.zeros_(m.bias)

    def forward(self, x):
        z = self.encoder(x)
        x_rec = self.decoder(z)
        return x_rec

    def encode(self, x):
        z = self.encoder(x)
        return z.view(z.size(0), -1)

    def decode(self, z):
        B = z.size(0)
        z = z.view(B, self.latent_dim, 23, 23)
        return self.decoder(z)

In [None]:
def target_distribution(q):
    weight = q ** 2 / torch.sum(q, dim=0)
    return (weight.t() / torch.sum(weight, dim=1)).t()


class IDECModel(nn.Module): # DCNModel
    def __init__(self, autoencoder, cluster_centers, alpha=1.0):
        super().__init__()
        self.autoencoder = autoencoder
        self.cluster_centers = nn.Parameter(
            torch.tensor(cluster_centers, dtype=torch.float32)
        )
        self.alpha = alpha

    def forward(self, x):
        z = self.autoencoder.encode(x)
        dist = torch.sum((z.unsqueeze(1) - self.cluster_centers)**2, dim=2)
        q = 1.0 / (1.0 + dist / self.alpha)
        q = q ** ((self.alpha + 1.0) / 2.0)
        q = (q.t() / torch.sum(q, dim=1)).t()
        x_rec = self.autoencoder.decode(z)
        return q, x_rec

In [12]:
def train_autoencoder(model, dataset, device):
    print("[AE] Iniciando pre-entrenamiento del Autoencoder")
    criterion = nn.MSELoss()
    optimizer = optim.NAdam(model.parameters(), weight_decay=1e-5, lr=AE_LEARNING_RATE) # TODO: Nadam https://keras.io/api/optimizers/Nadam/

    model.to(device)
    model.train()

    loader = DataLoader(dataset, batch_size=AE_BATCH_SIZE, shuffle=True)
    
    print(f"[AE] Entrenando Autoencoder por {AE_EPOCHS} épocas...")

    start_time = time.time()
    for epoch in range(AE_EPOCHS):

        epoch_loss = 0.0
        for data in loader:
            inputs = data.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)

            loss = criterion(outputs, inputs)
            loss.backward()
            optimizer.step()
            
            epoch_loss += loss.item() * inputs.size(0)
    
        avg_epoch_loss = epoch_loss / len(dataset)
        print(f"\t[AE] Época {epoch+1} completada — loss promedio: {avg_epoch_loss:.6f}")  

    end_time = time.time()
    print(f"[AE] Entrenamiento completado en {end_time - start_time:.2f} segundos.")
    return model

In [None]:
def train_deep_joint_clustering(dataset):
    torch.manual_seed(RANDOM_STATE)
    torch.cuda.manual_seed_all(RANDOM_STATE)
    np.random.seed(RANDOM_STATE)

    device = torch.device("cuda" if USE_GPU else "cpu")
    print(f"Usando dispositivo: {device}\n")

    autoencoder = Autoencoder(ENCODING_DIM)
    autoencoder = train_autoencoder(autoencoder, dataset, device)

    autoencoder.to(device)
    autoencoder.eval()

    print("Extrayendo representaciones latentes para Clustering…")  
    latents = []
    with torch.no_grad():
        for batch in DataLoader(dataset, batch_size=AE_BATCH_SIZE, shuffle=False):
            x = batch.to(device)
            z = autoencoder.encode(x)
            latents.append(z.cpu())

    latents = torch.cat(latents, dim=0).numpy()
    print(f"Latents extraídas: shape = {latents.shape}\n")  

    print(f"Inicializando KMeans con {NUM_CLUSTERS} clusters…")  
    kmeans = KMeans(n_clusters=NUM_CLUSTERS, random_state=RANDOM_STATE, n_init=30, init="k-means++") # TODO: cambiar por KMeans+++ o mini bach KMeans
    y_pred_kmeans = kmeans.fit_predict(latents)
    print("KMeans completado. Centroides iniciales obtenidos.\n") 

    cluster_centers_init = kmeans.cluster_centers_
    y_pred_last = y_pred_kmeans

    # print(f"Inicializando AgglomerativeClustering con {NUM_CLUSTERS} clusters…")
    # agg = AgglomerativeClustering(n_clusters=NUM_CLUSTERS, metric="cosine", linkage="average")
    # y_pred_agg = agg.fit_predict(latents)
    # print("AgglomerativeClustering completado. Etiquetas obtenidas.\n")

    # cluster_centers_init = np.vstack([
    #     latents[y_pred_agg == j].mean(axis=0)
    #     for j in range(NUM_CLUSTERS)
    # ])
    # y_pred_last = y_pred_agg

    # print(f"Inicializando SpectralClustering con {NUM_CLUSTERS} clusters…")
    # spec = SpectralClustering(
    #     n_clusters=NUM_CLUSTERS,
    #     affinity='nearest_neighbors',
    #     n_neighbors=7,
    #     assign_labels='kmeans',
    #     eigen_solver='arpack',
    #     n_init=10,
    #     random_state=RANDOM_STATE,
    # )
    # y_pred_spec = spec.fit_predict(latents)
    # print("SpectralClustering completado. Etiquetas obtenidas.\n")

    # cluster_centers_init = np.vstack([
    #     latents[y_pred_spec == j].mean(axis=0)
    #     for j in range(NUM_CLUSTERS)
    # ])
    # y_pred_last = y_pred_spec

    # ------------------------------------------------------------------------

    idec_model = IDECModel(autoencoder, cluster_centers_init, alpha=1.0)
    idec_model.to(device)

    optimizer = optim.NAdam(idec_model.parameters(), weight_decay=1e-5, lr=AE_LEARNING_RATE)

    recon_criterion = nn.MSELoss()
    kl_criterion    = nn.KLDivLoss(reduction="batchmean")

    p_target = None

    print("[Joint] Entrenamiento conjunto arrancando…")

    start_time = time.time()
    for epoch in range(JOINT_EPOCHS):
        if epoch % UPDATE_INTERVAL == 0:
            print(f"\t[Joint] Actualizando p_target (epoch % {UPDATE_INTERVAL} == 0)")

            all_q = []
            with torch.no_grad():
                for batch in DataLoader(dataset, batch_size=JOINT_BATCH_SIZE, shuffle=False):
                    batch = batch.to(device)
                    q_batch, _ = idec_model(batch)
                    all_q.append(q_batch)
            q_all = torch.cat(all_q, dim=0)
            p_target = target_distribution(q_all)

            y_pred_current = torch.argmax(q_all, dim=1).cpu().numpy()

            delta = np.mean(y_pred_current != y_pred_last)
            if epoch > 0 and delta < TOL:
                print(f"[Joint] Convergencia alcanzada (Δ={delta:.4f} < {TOL}). Deteniendo entrenamiento.")
                break
            y_pred_last = y_pred_current

        epoch_loss = 0.0
        for batch_idx, batch in enumerate(DataLoader(dataset, batch_size=JOINT_BATCH_SIZE, shuffle=True)):
            batch = batch.to(device)
            q_batch, x_rec = idec_model(batch)

            if p_target is not None:
                start = batch_idx * JOINT_BATCH_SIZE
                end   = start + batch.size(0)
                p_batch = p_target[start:end].to(device)
            else:
                p_batch = q_batch.detach()

            loss_kl = kl_criterion(q_batch.log(), p_batch) # TODO: Revisar la ecuación directamente
            loss_recon = recon_criterion(x_rec, batch) # TODO: Revisar la ecuación directamente

            loss = GAMMA_IMAGES * loss_kl + loss_recon

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            epoch_loss += loss.item() * batch.size(0)

        avg_joint_loss = epoch_loss / len(dataset)
        print(f"\t[Joint] Época {epoch+1} finalizada — loss promedio: {avg_joint_loss:.6f}")

    end_time = time.time()
    print(f"[Joint] Entrenamiento completado en {end_time - start_time:.2f} segundos.")
    


    print("Obteniendo predicciones finales…")     
    idec_model.eval()

    all_q = []
    new_latents = []
    with torch.no_grad():
        for batch in DataLoader(dataset, batch_size=JOINT_BATCH_SIZE, shuffle=False):
            batch = batch.to(device)

            z = idec_model.autoencoder.encode(batch)
            new_latents.append(z.cpu())

            q_batch, _ = idec_model(batch)
            all_q.append(q_batch)

    q_final = torch.cat(all_q, dim=0)
    y_pred_final = torch.argmax(q_final, dim=1).cpu().numpy()
    new_latents = torch.cat(new_latents, dim=0).numpy()
    print("Predicciones finales obtenidas.\n")

    return new_latents, y_pred_final, idec_model

# Dataset Boold Cell

In [18]:
IMAGE_DIR = "./storage/clean/blood_cell/segmenter"

USE_GPU = True

NUM_CLUSTERS = 5

AE_EPOCHS = 30
AE_BATCH_SIZE = 64
AE_LEARNING_RATE = 1e-3

JOINT_EPOCHS = 200
JOINT_BATCH_SIZE = 64

GAMMA_IMAGES = 0.05
RANDOM_STATE = 42

TOL = 1e-2
UPDATE_INTERVAL = 16

ENCODING_DIM = 128

INPUT_DIM = 368 * 368 * 3


dataset = LazyDataset(image_dir=IMAGE_DIR, transform=get_transform())
image_filenames = dataset.image_filenames

latents_final, cluster_labels, trained_idec_model_images = train_deep_joint_clustering(dataset)

print("\n--- Final Image Clustering Results ---")
print(f"Predicted cluster assignments shape: {cluster_labels.shape}")

for i in range(min(10, len(image_filenames))):
    print(f"Image: {image_filenames[i]}, Cluster ID: {cluster_labels[i]}")
if len(image_filenames) > 10:
    print("...")

unique_clusters, counts = np.unique(cluster_labels, return_counts=True)
print("\nCluster distribution:")
for cluster_id, count in zip(unique_clusters, counts):
    print(f"Cluster {cluster_id}: {count} images")


if len(set(cluster_labels)) > 1 and len(cluster_labels) > NUM_CLUSTERS:
    try:
        score = silhouette_score(latents_final, cluster_labels)
        print(f"\nSilhouette Score: {score:.4f}")
    except Exception as e:
        print(f"Error Silhouette: {e}")
    try:
        ch_score = calinski_harabasz_score(latents_final, cluster_labels)
        print(f"Calinski-Harabasz Index: {ch_score:.2f}")
    except Exception as e:
        print(f"Error CH Index: {e}")
else:
    print("Insuficientes clusters para métricas.")

Usando dispositivo: cuda

[AE] Iniciando pre-entrenamiento del Autoencoder
[AE] Entrenando Autoencoder por 30 épocas...
	[AE] Época 1 completada — loss promedio: 0.052959
	[AE] Época 2 completada — loss promedio: 0.016883
	[AE] Época 3 completada — loss promedio: 0.009705
	[AE] Época 4 completada — loss promedio: 0.007372
	[AE] Época 5 completada — loss promedio: 0.006744
	[AE] Época 6 completada — loss promedio: 0.006210
	[AE] Época 7 completada — loss promedio: 0.006061
	[AE] Época 8 completada — loss promedio: 0.005790
	[AE] Época 9 completada — loss promedio: 0.005667
	[AE] Época 10 completada — loss promedio: 0.005452
	[AE] Época 11 completada — loss promedio: 0.005247
	[AE] Época 12 completada — loss promedio: 0.005135
	[AE] Época 13 completada — loss promedio: 0.005028
	[AE] Época 14 completada — loss promedio: 0.004853
	[AE] Época 15 completada — loss promedio: 0.004664
	[AE] Época 16 completada — loss promedio: 0.004603
	[AE] Época 17 completada — loss promedio: 0.004449
	[AE]

In [None]:
IMAGE_DIR = "./storage/raw/cifar/cifar-10-batches-py"

USE_GPU = True

NUM_CLUSTERS = 5

AE_EPOCHS = 30
AE_BATCH_SIZE = 64
AE_LEARNING_RATE = 1e-3

JOINT_EPOCHS = 200
JOINT_BATCH_SIZE = 64

GAMMA_IMAGES = 0.05
RANDOM_STATE = 42

TOL = 1e-2
UPDATE_INTERVAL = 16

ENCODING_DIM = 128

INPUT_DIM = 368 * 368 * 3


dataset = LazyDataset(image_dir=IMAGE_DIR, transform=get_transform())
image_filenames = dataset.image_filenames

latents_final, cluster_labels, trained_idec_model_images = train_deep_joint_clustering(dataset)

print("\n--- Final Image Clustering Results ---")
print(f"Predicted cluster assignments shape: {cluster_labels.shape}")

for i in range(min(10, len(image_filenames))):
    print(f"Image: {image_filenames[i]}, Cluster ID: {cluster_labels[i]}")
if len(image_filenames) > 10:
    print("...")

unique_clusters, counts = np.unique(cluster_labels, return_counts=True)
print("\nCluster distribution:")
for cluster_id, count in zip(unique_clusters, counts):
    print(f"Cluster {cluster_id}: {count} images")


if len(set(cluster_labels)) > 1 and len(cluster_labels) > NUM_CLUSTERS:
    try:
        score = silhouette_score(latents_final, cluster_labels)
        print(f"\nSilhouette Score: {score:.4f}")
    except Exception as e:
        print(f"Error Silhouette: {e}")
    try:
        ch_score = calinski_harabasz_score(latents_final, cluster_labels)
        print(f"Calinski-Harabasz Index: {ch_score:.2f}")
    except Exception as e:
        print(f"Error CH Index: {e}")
else:
    print("Insuficientes clusters para métricas.")