In [11]:
import os
import torch
import pandas as pd
import numpy as np
import random
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from PIL import Image
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
import itertools
import matplotlib.pyplot as plt


In [12]:
# Directorios de imÃ¡genes
train_image_dir = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Training_Validation/breastpathq/datasets/train"
val_image_dir = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Training_Validation/breastpathq/datasets/validation"
test_image_dir = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Testing/breastpathq-test/test_patches"

# Rutas de etiquetas
train_label_path = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Training_Validation/breastpathq/datasets/train_labels.csv"
test_label_path = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Testing/breastpathq-test/val_labels.csv"

# Cargar datasets
df_train = pd.read_csv(train_label_path)
df_test = pd.read_csv(test_label_path)

# Agregar nombres de archivos a los DataFrames
df_train["image_name"] = df_train["slide"].astype(str) + "_" + df_train["rid"].astype(str) + ".tif"
df_test["image_name"] = df_test["slide"].astype(str) + "_" + df_test["rid"].astype(str) + ".tif"

# Cargar imÃ¡genes de validaciÃ³n
val_images = [f for f in os.listdir(val_image_dir) if f.endswith(".tif")]
df_val = pd.DataFrame({"image_name": val_images})

# Verificar los tamaÃ±os de los datasets
print(f"ðŸ“Œ Train: {len(df_train)} imÃ¡genes")
print(f"ðŸ“Œ Test: {len(df_test)} imÃ¡genes")
print(f"ðŸ“Œ Validation: {len(df_val)} imÃ¡genes")


ðŸ“Œ Train: 2394 imÃ¡genes
ðŸ“Œ Test: 185 imÃ¡genes
ðŸ“Œ Validation: 185 imÃ¡genes


In [13]:
class CustomDataset(Dataset):
    def __init__(self, image_dir, labels_df, transform=None, labeled=True):
        self.image_dir = image_dir
        self.labels_df = labels_df
        self.transform = transform
        self.labeled = labeled

    def __len__(self):
        return len(self.labels_df)

    def __getitem__(self, idx):
        img_name = self.labels_df.iloc[idx]["image_name"]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        if self.labeled:
            label = torch.tensor(self.labels_df.iloc[idx]["y"], dtype=torch.float32)
            return image, label
        else:
            return image, img_name


In [14]:
# Definir transformaciones para las imÃ¡genes
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

# Crear datasets
train_dataset = CustomDataset(train_image_dir, df_train, transform=transform, labeled=True)
validation_dataset = CustomDataset(val_image_dir, df_val, transform=transform, labeled=False)
test_dataset = CustomDataset(test_image_dir, df_test, transform=transform, labeled=False)

# Crear DataLoaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True, num_workers=2)
validation_loader = DataLoader(validation_dataset, batch_size=32, shuffle=False, num_workers=2)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False, num_workers=2)

print(f"ðŸ“Œ DataLoader de entrenamiento creado con {len(train_dataset)} imÃ¡genes")
print(f"ðŸ“Œ DataLoader de validaciÃ³n creado con {len(validation_dataset)} imÃ¡genes")
print(f"ðŸ“Œ DataLoader de test creado con {len(test_dataset)} imÃ¡genes")


ðŸ“Œ DataLoader de entrenamiento creado con 2394 imÃ¡genes
ðŸ“Œ DataLoader de validaciÃ³n creado con 185 imÃ¡genes
ðŸ“Œ DataLoader de test creado con 185 imÃ¡genes


In [15]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"ðŸ“Œ Dispositivo en uso: {device}")

# Cargar modelo preentrenado
resnet34 = models.resnet34(weights=models.ResNet34_Weights.DEFAULT)
resnet34.fc = nn.Linear(resnet34.fc.in_features, 1)  # Modificar la capa final para regresiÃ³n
resnet34 = resnet34.to(device)

# Definir funciÃ³n de pÃ©rdida y optimizador
criterion = nn.MSELoss()
optimizer = optim.Adam(resnet34.parameters(), lr=0.0001)  # Definir learning rate


ðŸ“Œ Dispositivo en uso: cuda


In [16]:
num_epochs = 10

for epoch in range(num_epochs):
    resnet34.train()
    running_loss = 0.0

    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)

        optimizer.zero_grad()
        outputs = resnet34(images).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()

    print(f"ðŸ“Œ Ã‰poca {epoch+1}/{num_epochs} - PÃ©rdida: {running_loss/len(train_loader):.4f}")

print("âœ… Entrenamiento finalizado")


ðŸ“Œ Ã‰poca 1/10 - PÃ©rdida: 0.1386
ðŸ“Œ Ã‰poca 2/10 - PÃ©rdida: 0.0208
ðŸ“Œ Ã‰poca 3/10 - PÃ©rdida: 0.0134
ðŸ“Œ Ã‰poca 4/10 - PÃ©rdida: 0.0104
ðŸ“Œ Ã‰poca 5/10 - PÃ©rdida: 0.0084
ðŸ“Œ Ã‰poca 6/10 - PÃ©rdida: 0.0077
ðŸ“Œ Ã‰poca 7/10 - PÃ©rdida: 0.0058
ðŸ“Œ Ã‰poca 8/10 - PÃ©rdida: 0.0053
ðŸ“Œ Ã‰poca 9/10 - PÃ©rdida: 0.0044
ðŸ“Œ Ã‰poca 10/10 - PÃ©rdida: 0.0047
âœ… Entrenamiento finalizado


In [20]:
def calculate_pk(labels, predictions):
    P, Q, T = 0, 0, 0
    for (pred_i, true_i), (pred_j, true_j) in itertools.combinations(zip(predictions, labels), 2):
        if (true_i < true_j and pred_i < pred_j) or (true_i > true_j and pred_i > pred_j):
            P += 1
        elif (true_i < true_j and pred_i > pred_j) or (true_i > true_j and pred_i < pred_j):
            Q += 1
        elif pred_i == pred_j:
            T += 1
    return (((P - Q) / (P + Q + T)) + 1) / 2 if (P + Q + T) != 0 else 0

def evaluate_model(model, data_loader, labeled=True):
    """
    EvalÃºa el modelo en el conjunto de datos usando MSE, MAE, RÂ² y PK.
    Si `labeled` es False, solo genera predicciones sin comparar con etiquetas.
    """
    model.eval()
    all_labels, all_predictions = [], []
    
    with torch.no_grad():
        for batch in data_loader:
            if labeled:
                images, labels = batch
                images, labels = images.to(device), labels.to(device).float().unsqueeze(1)
            else:
                images, image_names = batch
                images = images.to(device)
            
            outputs = model(images).squeeze().cpu().numpy()
            
            if labeled:
                all_labels.extend(labels.cpu().numpy())
                all_predictions.extend(outputs)

    if labeled:
        mse = mean_squared_error(all_labels, all_predictions)
        mae = mean_absolute_error(all_labels, all_predictions)
        r2 = r2_score(all_labels, all_predictions)
        pk = calculate_pk(all_labels, all_predictions)
        print(f"ðŸ“Œ MSE: {mse:.4f}, MAE: {mae:.4f}, RÂ²: {r2:.4f}, PK: {pk:.4f}")
    else:
        print("âœ… GeneraciÃ³n de predicciones completada.")

# Llamar a la evaluaciÃ³n solo con conjuntos etiquetados
evaluate_model(resnet34, validation_loader, labeled=True)



AttributeError: 'tuple' object has no attribute 'to'

In [None]:
import torch.nn.functional as F

resnet34.eval()
test_predictions = []

with torch.no_grad():
    for images, image_names in test_loader:
        images = images.to(device)
        outputs = resnet34(images).squeeze().cpu().numpy()

        # Asegurar que los valores de salida estÃ©n en el rango [0,1]
        outputs = np.clip(outputs, 0, 1)

        for img_name, pred in zip(image_names, outputs):
            slide, rid = img_name.replace(".tif", "").split("_")
            test_predictions.append([int(slide), int(rid), pred])

# Crear el DataFrame con el formato requerido
df_test_predictions = pd.DataFrame(test_predictions, columns=["slide", "rid", "score"])

# Guardar en CSV
submission_test_path = "submission_test.csv"
df_test_predictions.to_csv(submission_test_path, index=False)

print(f"âœ… Archivo de predicciones generado correctamente: {submission_test_path}")


âœ… Archivo de predicciones generado correctamente: submission_test.csv
