In [32]:
import pandas as pd
import sys
sys.path.append('../src') 
from data_loader import ChestXray8Dataset
from models.cnn import ResNet50
import time
import torch.optim as optim
import torch.nn as nn

import os
import matplotlib.pyplot as plt
from pathlib import Path
import torch
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from tqdm.notebook import tqdm # Progession bar

# Configuration settings
data_dir = Path.cwd().parent / 'data'
images_dir = data_dir / 'images'
metadata_dir = data_dir /'metadata'/ 'Data_Entry_2017_v2020.csv'
train_list_path = data_dir /'metadata'/ 'train_val_list.txt'
test_list_path = data_dir / 'metadata' /'test_list.txt'

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(device)

cuda


In [33]:
metadata_df = pd.read_csv(metadata_dir)
# Obtener las clases de las etiquetas
classes = ["Atelectasis", "Consolidation", "Infiltration", "Pneumothorax",
           "Edema", "Emphysema", "Fibrosis", "Effusion", "Pneumonia", 
           "Pleural_Thickening", "Cardiomegaly", "Nodule", "Mass", "Hernia"]

# Crear una columna binaria por clase
for cls in classes:
    metadata_df[cls] = metadata_df['Finding Labels'].apply(lambda x: 1 if cls in x else 0)


In [34]:
transform = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(15),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomAffine(degrees=0, translate=(0.1, 0.1)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_val_dataset = ChestXray8Dataset(
    img_dir=images_dir, 
    metadata_file=metadata_dir, 
    split_file=train_list_path,
    mode='train',  # Training mode
    transform=transform
)

transform = transforms.Compose([
    transforms.Resize((224, 224)),             # Redimensionar a 224x224
    transforms.ToTensor(),                     # Convertir a tensor
    transforms.Normalize(mean=[0.5], std=[0.5])  # Normalización
])

test_dataset = ChestXray8Dataset(
    img_dir=images_dir, 
    metadata_file=metadata_dir, 
    split_file=test_list_path,
    mode='test',  # Training mode
    transform=transform
)
train_size = int(0.8 * len(train_val_dataset))
val_size = len(train_val_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(train_val_dataset, [train_size, val_size])
from torch.utils.data import WeightedRandomSampler

# Calcular los pesos inversos de las etiquetas
class_counts = metadata_df[classes].sum(axis=0).values
weights = 1.0 / class_counts
samples_weights = [weights[row[classes].values.argmax()] for _, row in metadata_df.iterrows()]

# Crear sampler
sampler = WeightedRandomSampler(samples_weights, num_samples=len(samples_weights), replacement=True)

# Usar sampler en DataLoader
train_loader = DataLoader(train_dataset, batch_size=32, sampler=sampler)

val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

In [None]:
# Cargar modelo
model = models.resnet50(weights=models.ResNet50_Weights.IMAGENET1K_V1)

# Congelar todas las capas excepto la última
for param in model.parameters():
    param.requires_grad = False

# Modificar la última capa para 14 clases
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, len(classes))  # 14 clases
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

# -----------------------------
# Focal Loss
# -----------------------------
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2, reduction='mean', pos_weight=None):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.pos_weight = pos_weight

    def forward(self, inputs, targets):
        bce_loss = nn.functional.binary_cross_entropy_with_logits(
            inputs, targets, pos_weight=self.pos_weight, reduction='none'
        )
        probs = torch.sigmoid(inputs)
        probs = torch.where(targets == 1, probs, 1 - probs)
        focal_weight = (1 - probs) ** self.gamma
        loss = self.alpha * focal_weight * bce_loss
        return loss.mean() if self.reduction == 'mean' else loss.sum()

# Pesos de las clases
pos_weights = torch.tensor(class_counts.max() / class_counts, dtype=torch.float32).to(device)
criterion = FocalLoss(alpha=0.25, gamma=2, pos_weight=pos_weights)

# Optimizer
optimizer = optim.Adam(model.fc.parameters(), lr=0.001)

In [None]:
from sklearn.metrics import roc_auc_score, f1_score, accuracy_score
import csv

# Función para calcular métricas por batch (AUC y F1-Score)
def compute_metrics(outputs, labels):
    outputs = torch.sigmoid(outputs).cpu().detach().numpy()
    labels = labels.cpu().numpy()
    
    # AUC y F1 por clase
    aucs = []
    f1s = []
    for i in range(outputs.shape[1]):  # Loop por clases
        try:
            auc = roc_auc_score(labels[:, i], outputs[:, i])
            f1 = f1_score(labels[:, i], outputs[:, i] > 0.5)
        except ValueError:
            auc, f1 = 0.0, 0.0  # En caso de que una clase no esté representada en el batch
        aucs.append(auc)
        f1s.append(f1)
    return np.mean(aucs), np.mean(f1s)

# Guardar las métricas en un CSV
def save_metrics(epoch, train_loss, val_loss, train_auc, val_auc, train_f1, val_f1, file_path):
    with open(file_path, 'a', newline='') as f:
        writer = csv.writer(f)
        if epoch == 0:
            writer.writerow(['Epoch', 'Train_Loss', 'Val_Loss', 'Train_AUC', 'Val_AUC', 'Train_F1', 'Val_F1'])
        writer.writerow([epoch + 1, train_loss, val_loss, train_auc, val_auc, train_f1, val_f1])

# Modificar las funciones de entrenamiento y validación
def train_model(model, train_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    all_outputs = []
    all_labels = []
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item() * inputs.size(0)
        all_outputs.append(outputs)
        all_labels.append(labels)

    # Calcular métricas
    all_outputs = torch.cat(all_outputs)
    all_labels = torch.cat(all_labels)
    avg_loss = running_loss / len(train_loader.dataset)
    auc, f1 = compute_metrics(all_outputs, all_labels)
    return avg_loss, auc, f1

def validate_model(model, val_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_outputs = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in val_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * inputs.size(0)
            all_outputs.append(outputs)
            all_labels.append(labels)

    # Calcular métricas
    all_outputs = torch.cat(all_outputs)
    all_labels = torch.cat(all_labels)
    avg_loss = running_loss / len(val_loader.dataset)
    auc, f1 = compute_metrics(all_outputs, all_labels)
    return avg_loss, auc, f1

# -----------------------------
# Entrenamiento del Modelo
# -----------------------------
num_epochs = 10
metrics_file = 'training_metrics_2.csv'

# Borrar contenido previo del archivo de métricas
with open(metrics_file, 'w') as f:
    pass

for epoch in range(num_epochs):
    # Entrenamiento
    train_loss, train_auc, train_f1 = train_model(model, train_loader, criterion, optimizer, device)
    # Validación
    val_loss, val_auc, val_f1 = validate_model(model, val_loader, criterion, device)

    print(f"Epoch {epoch+1}/{num_epochs} - "
          f"Train Loss: {train_loss:.4f} | Val Loss: {val_loss:.4f} | "
          f"Train AUC: {train_auc:.4f} | Val AUC: {val_auc:.4f} | "
          f"Train F1: {train_f1:.4f} | Val F1: {val_f1:.4f}")

    # Guardar métricas
    save_metrics(epoch, train_loss, val_loss, train_auc, val_auc, train_f1, val_f1, metrics_file)

# -----------------------------
# Evaluación del Test Set
# -----------------------------
def test_model(model, test_loader, criterion, device):
    model.eval()
    running_loss = 0.0
    all_outputs = []
    all_labels = []
    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            all_outputs.append(outputs)
            all_labels.append(labels)

    # Calcular métricas
    all_outputs = torch.cat(all_outputs)
    all_labels = torch.cat(all_labels)
    avg_loss = running_loss / len(test_loader.dataset)
    auc, f1 = compute_metrics(all_outputs, all_labels)
    return avg_loss, auc, f1

# Test loader (igual que val_loader)
test_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

test_loss, test_auc, test_f1 = test_model(model, test_loader, criterion, device)
print(f"Test Loss: {test_loss:.4f} | Test AUC: {test_auc:.4f} | Test F1: {test_f1:.4f}")
