### Clasificacion de imagenes de Pecho con pytorch

In [13]:
from tqdm import tqdm
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.utils.data as data
import torchvision.transforms as transforms
import torch
from torch.utils.data import Subset

import medmnist
from medmnist import INFO, Evaluator

In [14]:
# data_flag = 'pneumoniamnist'
data_flag = 'chestmnist'
download = True

NUM_EPOCHS = 10
BATCH_SIZE = 128
lr = 0.001

info = INFO[data_flag]
task = info['task']
n_channels = info['n_channels']
n_classes = len(info['label'])

DataClass = getattr(medmnist, info['python_class'])

## Encapsular datos en Dataset

In [15]:
# preprocessing
data_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
])

# load the data
train_dataset = DataClass(split='train', transform=data_transform, download=download)
valid_dataset = DataClass(split='val', transform=data_transform, download=download)

test_dataset = DataClass(split='test', transform=data_transform, download=download)

# Preprocesamiento para las imágenes volteadas
flipped_transform = transforms.Compose([
    transforms.RandomVerticalFlip(),
    transforms.ToTensor(),
    transforms.Normalize(mean=[.5], std=[.5])
])

# load the data
train_dataset_flipped = DataClass(split='train', transform=flipped_transform, download=download)

# Combinar los dos conjuntos de train
combined_dataset  = data.ConcatDataset([train_dataset, train_dataset_flipped])

# encapsulate data into dataloader form
train_loader = data.DataLoader(dataset=combined_dataset, batch_size=BATCH_SIZE, shuffle=True)
valid_loader = data.DataLoader(dataset=valid_dataset, batch_size=2*BATCH_SIZE, shuffle=False)
test_loader = data.DataLoader(dataset=test_dataset, batch_size=2*BATCH_SIZE, shuffle=False)


Using downloaded and verified file: /Users/cmoro/.medmnist/chestmnist.npz
Using downloaded and verified file: /Users/cmoro/.medmnist/chestmnist.npz
Using downloaded and verified file: /Users/cmoro/.medmnist/chestmnist.npz
Using downloaded and verified file: /Users/cmoro/.medmnist/chestmnist.npz


## Definición del modelo

In [16]:
# define a simple CNN model

class Net(nn.Module):
    def __init__(self, in_channels, num_classes):
        super(Net, self).__init__()

        self.layer1 = nn.Sequential(
            nn.Conv2d(in_channels, 32, kernel_size=3),
            nn.BatchNorm2d(32),
            nn.ReLU())

        self.layer2 = nn.Sequential(
            nn.Conv2d(32, 32, kernel_size=3),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.layer3 = nn.Sequential(
            nn.Conv2d(32, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU())
        
        self.layer4 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3),
            nn.BatchNorm2d(64),
            nn.ReLU())

        self.layer5 = nn.Sequential(
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2))

        self.fc = nn.Sequential(
            nn.Linear(64 * 4 * 4, 128),
            nn.ReLU(),
            nn.Linear(128, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes))

    def forward(self, x):
        x = self.layer1(x)
        x = self.layer2(x)
        x = self.layer3(x)
        x = self.layer4(x)
        x = self.layer5(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

model = Net(in_channels=n_channels, num_classes=n_classes)
    
# define loss function and optimizer
if task == "multi-label, binary-class":
    criterion = nn.BCEWithLogitsLoss()
else:
    criterion = nn.CrossEntropyLoss()
    
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=0.9)

## Fase de Evaluación y Entrenamiento

In [17]:
# evaluation

def test(split):
    model.eval()
    y_true = torch.tensor([])
    y_score = torch.tensor([])
    
    # Asignar a data_loader en funcion del split: train, val o test
    if split == 'train':
        data_loader = train_loader
    elif split == 'val':
        data_loader = valid_loader
    else:
        data_loader = test_loader

    with torch.no_grad():
        for inputs, targets in data_loader:
            outputs = model(inputs)

            if task == 'multi-label, binary-class':
                targets = targets.to(torch.float32)
                outputs = outputs.softmax(dim=-1)
            else:
                targets = targets.squeeze().long()
                outputs = outputs.softmax(dim=-1)
                targets = targets.float().resize_(len(targets), 1)

            y_true = torch.cat((y_true, targets), 0)
            y_score = torch.cat((y_score, outputs), 0)

        y_true = y_true.numpy()
        y_score = y_score.detach().numpy()
        
        evaluator = Evaluator(data_flag, split)
        metrics = evaluator.evaluate(y_score)
    
        print('%s  auc: %.3f  acc:%.3f' % (split, *metrics))


In [18]:
import matplotlib.pyplot as plt

# Listas para almacenar los valores de pérdida en cada epoch
train_loss_values = []
val_loss_values = []

# Train
for epoch in range(NUM_EPOCHS):
    train_correct = 0
    train_total = 0
    test_correct = 0
    test_total = 0
    
    model.train()
    epoch_train_loss = 0.0  # Inicializar la pérdida de entrenamiento para esta epoch
    for inputs, targets in tqdm(train_loader):
        # Forward + Backward + Optimize
        optimizer.zero_grad()
        outputs = model(inputs)
        
        if task == 'multi-label, binary-class':
            targets = targets.to(torch.float32)
            loss = criterion(outputs, targets)
        else:
            targets = targets.squeeze().long()
            loss = criterion(outputs, targets)
        
        loss.backward()
        optimizer.step()

        epoch_train_loss += loss.item()  # Sumar la pérdida en cada iteración de entrenamiento

    # Calcular la media de pérdida para esta epoch de entrenamiento
    epoch_train_loss /= len(train_loader)
    train_loss_values.append(epoch_train_loss)  # Agregar el valor promedio de pérdida a la lista de pérdidas de entrenamiento

    # Validation
    model.eval()
    epoch_val_loss = 0.0  # Inicializar la pérdida de validación para esta epoch
    with torch.no_grad():
        for inputs, targets in tqdm(valid_loader):
            outputs = model(inputs)
            
            if task == 'multi-label, binary-class':
                targets = targets.to(torch.float32)
                loss = criterion(outputs, targets)
            else:
                targets = targets.squeeze().long()
                loss = criterion(outputs, targets)
            
            epoch_val_loss += loss.item()  # Sumar la pérdida en cada iteración de validación
        
        epoch_val_loss /= len(valid_loader)  # Calcular el promedio de pérdida para esta epoch de validación
        val_loss_values.append(epoch_val_loss)  # Añadir el valor promedio de pérdida a la lista de pérdidas de validación

# Trazar los valores de pérdida
print(train_loss_values)
plt.plot(train_loss_values, label='Training Loss')
plt.plot(val_loss_values, label='Validation Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Training and Validation Loss over Epochs')
plt.legend()
plt.show()

 71%|███████▏  | 875/1227 [02:16<01:00,  5.85it/s]

## Evaluar como de bueno es el modelo

In [None]:
        
print('==> Evaluating ...')
test('val')
test('test')

==> Evaluating ...
val  auc: 0.576  acc:0.949
test  auc: 0.569  acc:0.947
