In [30]:
import os
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torchvision import models
import numpy as np
import pandas as pd
import sys
from sklearn.model_selection import train_test_split
import time
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader,Dataset
from tqdm.notebook import tqdm # Progession bar
from PIL import Image


In [31]:
def make_data_loaders(train_csv, val_csv, test_csv, images_dir, batch_size, image_size):
    from torch.utils.data import DataLoader, Dataset
    from PIL import Image

    class ImageDataset(Dataset):
        def __init__(self, csv_file, root_dir, transform=None):
            self.data = pd.read_csv(csv_file)
            self.root_dir = root_dir
            self.transform = transform

        def __len__(self):
            return len(self.data)

        def __getitem__(self, idx):
            img_name = os.path.join(self.root_dir, self.data.iloc[idx, 0])
            image = Image.open(img_name).convert('RGB')
            if self.transform:
                image = self.transform(image)
            return image

    transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    train_dataset = ImageDataset(train_csv, images_dir, transform)
    val_dataset = ImageDataset(val_csv, images_dir, transform)
    test_dataset = ImageDataset(test_csv, images_dir, transform)

    dataloaders = {
        'train': DataLoader(train_dataset, batch_size=batch_size, shuffle=True),
        'val': DataLoader(val_dataset, batch_size=batch_size, shuffle=False),
        'test': DataLoader(test_dataset, batch_size=batch_size, shuffle=False)
    }
    return dataloaders


In [32]:

train_csv = "C:/Users/RSCBAL04/Documents/GitHub/ChestX-ray8_classification/data/metadata/val_metadata.csv"
val_csv =train_csv
test_csv =train_csv
images_dir = "C:/Users/RSCBAL04/Documents/GitHub/ChestX-ray8_classification/data/images"
dataloaders = make_data_loaders(train_csv, val_csv, test_csv, images_dir, batch_size=16, image_size=224)

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")



In [33]:
class DeepClusterModel(nn.Module):
    def __init__(self, base_model):
        super(DeepClusterModel, self).__init__()
        self.features = nn.Sequential(*list(base_model.children())[:-1])
        self.fc = nn.Linear(base_model.fc.in_features, 128)  # Dimensión del espacio de características

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

In [34]:

from torchvision.models import ResNet50_Weights
def initialize_model():
    base_model = models.resnet50(weights=ResNet50_Weights.IMAGENET1K_V1)
    model = DeepClusterModel(base_model)
    return model

In [35]:
from sklearn.cluster import KMeans
def run_kmeans(features, num_clusters):
    kmeans = KMeans(n_clusters=num_clusters, n_init=20, verbose=1)
    assignments = kmeans.fit_predict(features)
    return assignments

In [36]:
def train_deepcluster(model, dataloader, num_clusters, num_epochs):
    optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9)
    criterion = nn.CrossEntropyLoss()

    for epoch in range(num_epochs):
        model.train()
        features = []
        print(features)
        for inputs in dataloader:
           
            inputs = inputs.cuda()
            outputs = model(inputs)
           
            features.append(outputs.detach().cpu().numpy())

        features = np.vstack(features)
        assignments = run_kmeans(features, num_clusters)

        model.train()
        for batch_idx, inputs in enumerate(dataloader):
            inputs = inputs.cuda()
            optimizer.zero_grad()
            outputs = model(inputs)
            batch_assignments = torch.tensor(assignments[batch_idx * inputs.size(0):(batch_idx + 1) * inputs.size(0)], dtype=torch.long).cuda()
            loss = criterion(outputs, batch_assignments)
            loss.backward()
            optimizer.step()




In [37]:
for i in dataloaders['train']:
    print(i)
    break

tensor([[[[-1.9467, -1.9467, -1.9467,  ..., -1.9295, -1.7925, -1.5185],
          [-1.9467, -1.9467, -1.9467,  ..., -1.9295, -1.8097, -1.5528],
          [-1.9467, -1.9467, -1.9467,  ..., -1.9467, -1.8268, -1.5699],
          ...,
          [-1.9467, -1.9467, -1.9467,  ..., -1.9295, -1.8782, -1.8097],
          [-1.9467, -1.9467, -1.9467,  ..., -1.8953, -1.8097, -1.7412],
          [-1.9124, -1.9295, -1.9295,  ..., -1.8610, -1.7240, -1.6213]],

         [[-1.8606, -1.8606, -1.8606,  ..., -1.8431, -1.7031, -1.4230],
          [-1.8606, -1.8606, -1.8606,  ..., -1.8431, -1.7206, -1.4580],
          [-1.8606, -1.8606, -1.8606,  ..., -1.8606, -1.7381, -1.4755],
          ...,
          [-1.8606, -1.8606, -1.8606,  ..., -1.8431, -1.7906, -1.7206],
          [-1.8606, -1.8606, -1.8606,  ..., -1.8081, -1.7206, -1.6506],
          [-1.8256, -1.8431, -1.8431,  ..., -1.7731, -1.6331, -1.5280]],

         [[-1.6302, -1.6302, -1.6302,  ..., -1.6127, -1.4733, -1.1944],
          [-1.6302, -1.6302, -

In [38]:
model = initialize_model().cuda()
train_deepcluster(model, dataloaders['train'], num_clusters=10, num_epochs=5)

[]
Initialization complete
Iteration 0, inertia 27941.025390625.
Iteration 1, inertia 17876.740234375.
Iteration 2, inertia 17582.34375.
Iteration 3, inertia 17475.92578125.
Iteration 4, inertia 17423.982421875.
Iteration 5, inertia 17392.044921875.
Iteration 6, inertia 17371.62109375.
Iteration 7, inertia 17352.24609375.
Iteration 8, inertia 17336.73828125.
Iteration 9, inertia 17322.45703125.
Iteration 10, inertia 17307.904296875.
Iteration 11, inertia 17293.9296875.
Iteration 12, inertia 17281.36328125.
Iteration 13, inertia 17270.771484375.
Iteration 14, inertia 17260.171875.
Iteration 15, inertia 17248.9375.
Iteration 16, inertia 17239.626953125.
Iteration 17, inertia 17233.00390625.
Iteration 18, inertia 17227.248046875.
Iteration 19, inertia 17223.28515625.
Iteration 20, inertia 17220.703125.
Iteration 21, inertia 17219.109375.
Iteration 22, inertia 17217.7265625.
Iteration 23, inertia 17216.150390625.
Iteration 24, inertia 17214.892578125.
Iteration 25, inertia 17214.0625.
Iter

In [41]:
torch.save(model.state_dict(), 'deepcluster_model.pth')

In [43]:
import numpy as np


np.save('assignments.npy', assignments)

NameError: name 'assignments' is not defined

In [40]:
import matplotlib.pyplot as plt

def visualize_clusters(images, assignments, num_clusters=10):
    fig, axes = plt.subplots(num_clusters, 10, figsize=(20, 20))
    for cluster in range(num_clusters):
        cluster_indices = [i for i, a in enumerate(assignments) if a == cluster]
        for i, idx in enumerate(cluster_indices[:10]):
            img = images[idx].permute(1, 2, 0).numpy()  # Convertir tensor a numpy array
            axes[cluster, i].imshow(img)
            axes[cluster, i].axis('off')
    plt.show()

# Obtener imágenes y asignaciones de clúster
images = [traindataset[i] for i in range(len(dataset))]
assignments = run_kmeans(features, num_clusters=10)

# Visualizar clústeres
visualize_clusters(images, assignments)

NameError: name 'dataset' is not defined