In [1]:
print("hola mundo")

hola mundo


In [2]:
import os
import random
import torch
import torch.nn as nn
import torch.optim as optim
import pandas as pd
import torchvision.transforms as transforms
from PIL import Image
import numpy as np
import matplotlib.pyplot as plt
from torch.utils.data import Dataset, DataLoader
import torchvision.models as models
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score
from scipy.stats import pearsonr
import itertools


In [3]:
# Directorios de imágenes
train_image_dir = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Training_Validation/breastpathq/datasets/train"
val_image_dir = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Training_Validation/breastpathq/datasets/validation"
test_image_dir = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Testing/breastpathq-test/test_patches"

# Rutas de etiquetas
train_label_path = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Training_Validation/breastpathq/datasets/train_labels.csv"
val_label_path = "/home/angel/Documentos/ProyectoSistemasInteligentes/datasets/SPIE_BreastPathQ2019_Testing/breastpathq-test/val_labels.csv"

# 📌 1️⃣ Cargar imágenes de `train` y sus etiquetas
df_train = pd.read_csv(train_label_path)
df_train["image_name"] = df_train["slide"].astype(str) + "_" + df_train["rid"].astype(str) + ".tif"

# 📌 2️⃣ Cargar imágenes de `validation` y sus etiquetas
val_images = [f for f in os.listdir(val_image_dir) if f.endswith(".tif")]
df_val = pd.DataFrame({"image_name": val_images})
df_val[["slide", "rid"]] = df_val["image_name"].str.extract(r'(\d+)_(\d+).tif').astype(int)

# Cargar etiquetas de `val_labels.csv` (solo para validación)
df_val_labels = pd.read_csv(val_label_path)
df_val = df_val.merge(df_val_labels, on=["slide", "rid"], how="left")

# 📌 3️⃣ Cargar imágenes de `test_patches` (sin etiquetas)
test_images = [f for f in os.listdir(test_image_dir) if f.endswith(".tif")]
df_test = pd.DataFrame({"image_name": test_images})
df_test[["slide", "rid"]] = df_test["image_name"].str.extract(r'(\d+)_(\d+).tif').astype(int)

# 📌 4️⃣ Resumen de conjuntos de datos
print("\n✅ Organización Final de Conjuntos:")
print(f"  - Train: {len(df_train)} imágenes")
print(f"  - Validation: {len(df_val)} imágenes")
print(f"  - Test_patches: {len(df_test)} imágenes (Debe ser 1119)")



✅ Organización Final de Conjuntos:
  - Train: 2394 imágenes
  - Validation: 185 imágenes
  - Test_patches: 1119 imágenes (Debe ser 1119)


In [4]:
class BreastDataset(Dataset):
    def __init__(self, df, image_dir, transform=None, is_test=False):
        self.df = df
        self.image_dir = image_dir
        self.transform = transform
        self.is_test = is_test

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        img_name = self.df.iloc[idx]["image_name"]
        img_path = os.path.join(self.image_dir, img_name)
        image = Image.open(img_path).convert("RGB")

        if self.transform:
            image = self.transform(image)

        if self.is_test:
            return image, img_name
        else:
            label = self.df.iloc[idx]["y"]
            return image, torch.tensor(label, dtype=torch.float32)

# Definir transformaciones
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
])

# Crear datasets y dataloaders
train_dataset = BreastDataset(df_train, train_image_dir, transform)
val_dataset = BreastDataset(df_val, val_image_dir, transform)
test_dataset = BreastDataset(df_test, test_image_dir, transform, is_test=True)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"📌 Dispositivo en uso: {device}")

resnet34 = models.resnet34(weights="IMAGENET1K_V1")
resnet34.fc = nn.Linear(512, 1)  # Modificar la capa final para regresión
resnet34 = resnet34.to(device)

criterion = nn.MSELoss()
optimizer = optim.Adam(resnet34.parameters(), lr=0.001) 


📌 Dispositivo en uso: cuda


In [6]:
epochs = 10
for epoch in range(epochs):
    resnet34.train()
    epoch_loss = 0.0
    
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = resnet34(images).squeeze()
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        epoch_loss += loss.item()s
    
    print(f"📌 Época {epoch+1}/{epochs} - Pérdida: {epoch_loss/len(train_loader):.4f}")


📌 Época 1/10 - Pérdida: 0.1460
📌 Época 2/10 - Pérdida: 0.0274
📌 Época 3/10 - Pérdida: 0.0196
📌 Época 4/10 - Pérdida: 0.0180
📌 Época 5/10 - Pérdida: 0.0200
📌 Época 6/10 - Pérdida: 0.0133
📌 Época 7/10 - Pérdida: 0.0119
📌 Época 8/10 - Pérdida: 0.0084
📌 Época 9/10 - Pérdida: 0.0072
📌 Época 10/10 - Pérdida: 0.0085


In [7]:
import torch.nn.functional as F

resnet34.eval()
test_predictions = []

with torch.no_grad():
    for images, image_names in test_loader:
        images = images.to(device)
        outputs = resnet34(images).squeeze().cpu().numpy()
        outputs = F.sigmoid(torch.tensor(outputs)).numpy()  # Convertir a [0,1]

        for img_name, pred in zip(image_names, outputs):
            slide, rid = img_name.replace(".tif", "").split("_")
            test_predictions.append([int(slide), int(rid), pred])

df_test_predictions = pd.DataFrame(test_predictions, columns=["slide", "rid", "score"])
df_test_predictions.to_csv("submission_test.csv", index=False)

print(f"✅ Archivo de predicciones generado: submission_test.csv")


✅ Archivo de predicciones generado: submission_test.csv


In [8]:
def calculate_pk(labels, predictions):
    P, Q, T = 0, 0, 0
    for (pred_i, true_i), (pred_j, true_j) in itertools.combinations(zip(predictions, labels), 2):
        if (true_i < true_j and pred_i < pred_j) or (true_i > true_j and pred_i > pred_j):
            P += 1
        elif (true_i < true_j and pred_i > pred_j) or (true_i > true_j and pred_i < pred_j):
            Q += 1
        elif pred_i == pred_j:
            T += 1
    return (((P - Q) / (P + Q + T)) + 1) / 2 if (P + Q + T) != 0 else 0


In [9]:

def evaluate_model(model, data_loader, criterion, device):
    """
    Evalúa el modelo en el conjunto de validación usando MSE, MAE, R² y PK.
    """
    model.eval()
    all_labels, all_predictions = [], []
    total_loss = 0.0

    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images).squeeze()
            loss = criterion(outputs, labels)
            total_loss += loss.item()

            all_labels.extend(labels.cpu().numpy())
            all_predictions.extend(outputs.cpu().numpy())

    mse = mean_squared_error(all_labels, all_predictions)
    mae = mean_absolute_error(all_labels, all_predictions)
    r2 = r2_score(all_labels, all_predictions)
    pk = calculate_pk(all_labels, all_predictions)

    print(f"📌 MSE: {mse:.4f}, MAE: {mae:.4f}, R²: {r2:.4f}, PK: {pk:.4f}")

    return mse, mae, r2, pk


In [10]:
evaluate_model(resnet34, val_loader, criterion, device)


📌 MSE: 0.0428, MAE: 0.1418, R²: 0.5074, PK: 0.8181


(0.04278333514366463,
 0.14183138127665262,
 0.5073858931966435,
 0.8181243280844874)