Daniel Lizano Morales C04285

Esteban Castañeda Blanco C01795

Israel López Vallecillo C04396

Ariel Solís

In [1]:
import os
import shutil
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.nn as nn
from efficientnet_pytorch import EfficientNet
from tqdm import tqdm
import wandb

In [None]:
#no ejecuten esta celda a menos de que vayan a entrenar
wandb.login(key=os.getenv('WANDB_KEY'))
wandb.init(project='COVID-19_Radiography_Dataset')

In [25]:
import ssl

# Deshabilitar verificación SSL
ssl._create_default_https_context = ssl._create_unverified_context

# CNN

## CNN general

In [None]:
# Hyperparameters
num_epochs = 5
batch_size = 32
learning_rate = 0.001
classes = ['Viral_Pneumonia', 'COVID', 'Normal', 'Lung_Opacity']
num_classes = len(classes)
root_dir = '/content/drive/MyDrive/COVID-19_Radiography_Dataset/'
withoutfilter_dir = '/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter'
withfilter_dir = '/content/drive/MyDrive/COVID-19_Radiography_Dataset/filtradas'

In [None]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Resize((240, 240)),  # Adjust size for EfficientNet-B1
        transforms.RandomHorizontalFlip(),  # Randomly flip the image horizontally
        transforms.RandomRotation(10),  # Randomly rotate the image by up to 10 degrees
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),  # Randomly change brightness, contrast, saturation, and hue
        transforms.ToTensor(),  # Convert the image to a tensor
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize the image
    ]),
    'val': transforms.Compose([
        transforms.Resize((240, 240)),  # Adjust size for EfficientNet-B1
        transforms.ToTensor(),  # Convert the image to a tensor
        transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])  # Normalize the image
    ]),
}

In [None]:
def create_folders(base_dir):
  train_dir = os.path.join(base_dir, 'train')
  val_dir = os.path.join(base_dir, 'val')

  os.makedirs(train_dir, exist_ok=True)
  os.makedirs(val_dir, exist_ok=True)

  for folder in [train_dir, val_dir]:
    for category in classes:
      category_dir = os.path.join(folder, category)
      os.makedirs(category_dir, exist_ok=True)

In [None]:
def set_train_val_images(source_dir, base_dir):

  train_dir = os.path.join(base_dir, 'train')
  val_dir = os.path.join(base_dir, 'val')

  os.makedirs(train_dir, exist_ok=True)
  os.makedirs(val_dir, exist_ok=True)

  # Copy data to the correct directories if not already done
  for category in classes:
    category_path = os.path.join(source_dir, category, 'images')
    images = os.listdir(category_path)

    # Ensure the category directories exist in train and val directories
    train_category_dir = os.path.join(train_dir, category)
    val_category_dir = os.path.join(val_dir, category)
    os.makedirs(train_category_dir, exist_ok=True)
    os.makedirs(val_category_dir, exist_ok=True)

    train_images = images[:int(len(images) * 0.8)]
    val_images = images[int(len(images) * 0.8):]

    # Copy train images
    for img in tqdm(train_images, desc=f"Copying train images for {category}"):
        src_path = os.path.join(category_path, img)
        dst_path = os.path.join(train_category_dir, img)
        if not os.path.exists(dst_path):
            shutil.copy(src_path, dst_path)

    # Copy val images
    for img in tqdm(val_images, desc=f"Copying val images for {category}"):
        src_path = os.path.join(category_path, img)
        dst_path = os.path.join(val_category_dir, img)
        if not os.path.exists(dst_path):
            shutil.copy(src_path, dst_path)

  # Verify the directory structure
  print("\nTrain Directory Structure:")
  for root, dirs, files in os.walk(train_dir):
      print(root, "contains", len(files), "files")

  print("\nValidation Directory Structure:")
  for root, dirs, files in os.walk(val_dir):
      print(root, "contains", len(files), "files")

In [None]:
def train_model(model, criterion, optimizer, train_loader, val_loader, device, num_epochs):
  for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    # Training phase with progress bar
    train_progress = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training", unit="batch")
    for inputs, labels in train_progress:
      inputs, labels = inputs.to(device), labels.to(device)

      optimizer.zero_grad()

      outputs = model(inputs)
      loss = criterion(outputs, labels)
      loss.backward()
      optimizer.step()

      running_loss += loss.item() * inputs.size(0)
      _, predicted = torch.max(outputs, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

      train_progress.set_postfix({"Loss": running_loss / total, "Acc": correct / total})

    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = correct / total

    print(f'Epoch {epoch+1}/{num_epochs}')
    print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

    # Log training metrics to W&B
    # wandb.log({"epoch": epoch + 1, "train_loss": epoch_loss, "train_accuracy": epoch_acc})

    model.eval()
    val_loss = 0.0
    val_correct = 0
    val_total = 0

    # Validation phase with progress bar
    val_progress = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation", unit="batch")
    with torch.no_grad():
      for inputs, labels in val_progress:
        inputs, labels = inputs.to(device), labels.to(device)

        outputs = model(inputs)
        loss = criterion(outputs, labels)

        val_loss += loss.item() * inputs.size(0)
        _, predicted = torch.max(outputs, 1)
        val_total += labels.size(0)
        val_correct += (predicted == labels).sum().item()

        val_progress.set_postfix({"Loss": val_loss / val_total,
                                          "Acc": val_correct / val_total})

    val_loss = val_loss / len(val_loader.dataset)
    val_acc = val_correct / val_total

    print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}')

    # Log validation metrics to W&B
    #wandb.log({"epoch": epoch + 1, "val_loss": val_loss, "val_accuracy": val_acc})

    return model

In [None]:
def evaluate_model(model, dataloader, device):
  model.eval()
  correct = 0
  total = 0

  with torch.no_grad():
    for inputs, labels in dataloader:
      inputs, labels = inputs.to(device), labels.to(device)

      outputs = model(inputs)
      _, predicted = torch.max(outputs, 1)
      total += labels.size(0)
      correct += (predicted == labels).sum().item()

  accuracy = correct / total
  print(f'Accuracy: {accuracy:.4f}')
  return accuracy

## CNN sin filtros

In [None]:
withoutfilter_dir = os.path.join(root_dir, 'withoutfilter')
os.makedirs(withoutfilter_dir, exist_ok=True)
create_folders(withoutfilter_dir)

In [None]:
train_dir = '/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/train'
val_dir = '/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/val'

In [None]:
set_train_val_images(root_dir, withoutfilter_dir)

Copying train images for Viral_Pneumonia: 100%|██████████| 1076/1076 [00:22<00:00, 46.97it/s]
Copying val images for Viral_Pneumonia: 100%|██████████| 269/269 [00:03<00:00, 75.15it/s]
Copying train images for COVID: 100%|██████████| 2892/2892 [01:30<00:00, 32.07it/s]
Copying val images for COVID: 100%|██████████| 724/724 [00:07<00:00, 92.95it/s]
Copying train images for Normal: 100%|██████████| 8153/8153 [04:06<00:00, 33.03it/s]
Copying val images for Normal: 100%|██████████| 2039/2039 [00:22<00:00, 91.71it/s]
Copying train images for Lung_Opacity: 100%|██████████| 4809/4809 [07:11<00:00, 11.15it/s]
Copying val images for Lung_Opacity: 100%|██████████| 1203/1203 [00:14<00:00, 84.15it/s]



Train Directory Structure:
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/train contains 0 files
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/train/Viral_Pneumonia contains 1076 files
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/train/COVID contains 2892 files
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/train/Normal contains 8153 files
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/train/Lung_Opacity contains 4809 files

Validation Directory Structure:
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/val contains 0 files
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/val/Viral_Pneumonia contains 269 files
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/val/COVID contains 724 files
/content/drive/MyDrive/COVID-19_Radiography_Dataset/withoutfilter/val/Normal contains 2039 files
/content/drive/MyDrive/COVID-19_Radiography_Dataset/with

In [None]:
# Load datasets
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])
val_dataset = datasets.ImageFolder(val_dir, transform=data_transforms['val'])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size, shuffle=False)

In [None]:
# Load EfficientNet-B1 model
model = EfficientNet.from_pretrained('efficientnet-b1')
num_classes = len(train_dataset.classes)
model._fc = nn.Linear(model._fc.in_features, num_classes)

# Define the device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

Downloading: "https://github.com/lukemelas/EfficientNet-PyTorch/releases/download/1.0/efficientnet-b1-f1951068.pth" to /root/.cache/torch/hub/checkpoints/efficientnet-b1-f1951068.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 206MB/s]


Loaded pretrained weights for efficientnet-b1


In [None]:
trained_model = train_model(model, criterion, optimizer, train_loader, val_loader, device, num_epochs)

Epoch 1/5 - Training: 100%|██████████| 530/530 [05:37<00:00,  1.57batch/s, Loss=0.207, Acc=0.928]


Epoch 1/5
Train Loss: 0.2074 Acc: 0.9276


Epoch 1/5 - Validation: 100%|██████████| 133/133 [18:47<00:00,  8.48s/batch, Loss=0.168, Acc=0.946]

Val Loss: 0.1682 Acc: 0.9459





Error: You must call wandb.init() before wandb.log()

In [None]:
val_accuracy = evaluate_model(trained_model, val_loader, device)

CNN con filtros


In [27]:
base_dir = 'COVID-19_Radiography_Dataset/filtradas' #este path es solo para la cnn con filtro

In [28]:
data_transforms = {
    'train': transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Convertir a blanco y negro
        transforms.Resize((240, 240)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])  # Normalizar el canal único
    ]),
    'val': transforms.Compose([
        transforms.Grayscale(num_output_channels=1),  # Convertir a blanco y negro
        transforms.Resize((240, 240)),
        transforms.ToTensor(),
        transforms.Normalize([0.5], [0.5])  # Normalizar el canal único
    ]),
}

In [29]:
# Define train and val directories
train_dir = os.path.join(base_dir, 'train')
val_dir = os.path.join(base_dir, 'val')

# Create train and val directories if they don't exist
os.makedirs(train_dir, exist_ok=True)
os.makedirs(val_dir, exist_ok=True)

In [30]:
# Check if train and val directories are already populated
if not os.listdir(train_dir) or not os.listdir(val_dir):
    # Copy data to the correct directories if not already done
    for category in ['Viral_Pneumonia', 'Covid', 'Normal', 'Lung_Opacity']:
        category_path = os.path.join(base_dir, category)
        images = os.listdir(category_path)

        # Ensure the category directories exist in train and val directories
        train_category_dir = os.path.join(train_dir, category)
        val_category_dir = os.path.join(val_dir, category)
        os.makedirs(train_category_dir, exist_ok=True)
        os.makedirs(val_category_dir, exist_ok=True)

        train_images = images[:int(len(images) * 0.8)]
        val_images = images[int(len(images) * 0.8):]

        # Copy train images
        for img in tqdm(train_images, desc=f"Copying train images for {category}"):
            src_path = os.path.join(category_path, img)
            dst_path = os.path.join(train_category_dir, img)
            if not os.path.exists(dst_path):
                shutil.copy(src_path, dst_path)

        # Copy val images
        for img in tqdm(val_images, desc=f"Copying val images for {category}"):
            src_path = os.path.join(category_path, img)
            dst_path = os.path.join(val_category_dir, img)
            if not os.path.exists(dst_path):
                shutil.copy(src_path, dst_path)

# Verify the directory structure
print("\nTrain Directory Structure:")
for root, dirs, files in os.walk(train_dir):
    print(root, "contains", len(files), "files")

print("\nValidation Directory Structure:")
for root, dirs, files in os.walk(val_dir):
    print(root, "contains", len(files), "files")



Train Directory Structure:
COVID-19_Radiography_Dataset/filtradas/train contains 0 files
COVID-19_Radiography_Dataset/filtradas/train/Viral_Pneumonia contains 1076 files
COVID-19_Radiography_Dataset/filtradas/train/Lung_Opacity contains 4809 files
COVID-19_Radiography_Dataset/filtradas/train/Normal contains 8153 files
COVID-19_Radiography_Dataset/filtradas/train/Covid contains 2892 files

Validation Directory Structure:
COVID-19_Radiography_Dataset/filtradas/val contains 0 files
COVID-19_Radiography_Dataset/filtradas/val/Viral_Pneumonia contains 269 files
COVID-19_Radiography_Dataset/filtradas/val/Lung_Opacity contains 1203 files
COVID-19_Radiography_Dataset/filtradas/val/Normal contains 2039 files
COVID-19_Radiography_Dataset/filtradas/val/Covid contains 724 files


In [31]:
# Load datasets with data augmentation
train_dataset = datasets.ImageFolder(train_dir, transform=data_transforms['train'])
val_dataset = datasets.ImageFolder(val_dir, transform=data_transforms['val'])

# Create dataloaders
train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False) 

In [34]:
# Cambiar la primera capa para aceptar un solo canal
model = EfficientNet.from_pretrained('efficientnet-b1')
num_classes = len(train_dataset.classes)

# Cambiar la primera capa para aceptar un solo canal
model._conv_stem = nn.Conv2d(1, model._conv_stem.out_channels, 
                             kernel_size=model._conv_stem.kernel_size, 
                             stride=model._conv_stem.stride, 
                             padding=model._conv_stem.padding, 
                             bias=False)

# Cambiar la última capa para el número de clases
model._fc = nn.Linear(model._fc.in_features, num_classes)
model = model.to(device)

# Define loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)


Loaded pretrained weights for efficientnet-b1


In [None]:
#wandb.watch(model, log="all")

[]

In [35]:
# Early Stopping parameters
patience = 5
best_val_loss = float('inf')
epochs_no_improve = 0



In [36]:
# Definir la ruta para guardar el modelo en Google Drive
model_save_path = 'COVID-19_Radiography_Dataset/models/best_model.pth'
optimizer_save_path = 'COVID-19_Radiography_Dataset/models/model_and_optimizer.pth'

# Crear el directorio si no existe
os.makedirs(os.path.dirname(model_save_path), exist_ok=True)

In [37]:
def train_model(model, criterion, optimizer, train_loader, val_loader, device, num_epochs=50, patience=5):
    global best_val_loss, epochs_no_improve

    for epoch in range(num_epochs):
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        # Training phase with progress bar
        train_progress = tqdm(train_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Training", unit="batch")
        for inputs, labels in train_progress:
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()

            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item() * inputs.size(0)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            train_progress.set_postfix({"Loss": running_loss / total, "Acc": correct / total})

        epoch_loss = running_loss / len(train_loader.dataset)
        epoch_acc = correct / total

        print(f'Epoch {epoch+1}/{num_epochs}')
        print(f'Train Loss: {epoch_loss:.4f} Acc: {epoch_acc:.4f}')

        # Log training metrics to W&B
        #wandb.log({"epoch": epoch + 1, "train_loss": epoch_loss, "train_accuracy": epoch_acc})

        model.eval()
        val_loss = 0.0
        val_correct = 0
        val_total = 0

        # Validation phase with progress bar
        val_progress = tqdm(val_loader, desc=f"Epoch {epoch+1}/{num_epochs} - Validation", unit="batch")
        with torch.no_grad():
            for inputs, labels in val_progress:
                inputs, labels = inputs.to(device), labels.to(device)

                outputs = model(inputs)
                loss = criterion(outputs, labels)

                val_loss += loss.item() * inputs.size(0)
                _, predicted = torch.max(outputs, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

                val_progress.set_postfix({"Loss": val_loss / val_total, "Acc": val_correct / val_total})

        val_loss = val_loss / len(val_loader.dataset)
        val_acc = val_correct / val_total

        print(f'Val Loss: {val_loss:.4f} Acc: {val_acc:.4f}')

        # Log validation metrics to W&B
        #wandb.log({"epoch": epoch + 1, "val_loss": val_loss, "val_accuracy": val_acc})

        # Check for improvement
        if val_loss < best_val_loss:
            best_val_loss = val_loss
            epochs_no_improve = 0
            torch.save(model.state_dict(), model_save_path)  # Save the best model
            torch.save({
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
            }, optimizer_save_path)
        else:
            epochs_no_improve += 1

        # Save the model after each epoch
        torch.save(model.state_dict(), model_save_path)
        torch.save({
            'model_state_dict': model.state_dict(),
            'optimizer_state_dict': optimizer.state_dict(),
        }, optimizer_save_path)

        if epochs_no_improve >= patience:
            print("Early stopping triggered!")
            break

    return model

In [38]:
trained_model = train_model(model, criterion, optimizer, train_loader, val_loader, device, num_epochs=20, patience=5)


Epoch 1/20 - Training: 100%|██████████| 530/530 [2:34:43<00:00, 17.52s/batch, Loss=0.449, Acc=0.835]  


Epoch 1/20
Train Loss: 0.4488 Acc: 0.8351


Epoch 1/20 - Validation: 100%|██████████| 133/133 [08:40<00:00,  3.91s/batch, Loss=0.352, Acc=0.868]


Val Loss: 0.3516 Acc: 0.8680


Epoch 2/20 - Training: 100%|██████████| 530/530 [2:49:34<00:00, 19.20s/batch, Loss=0.289, Acc=0.898]  


Epoch 2/20
Train Loss: 0.2889 Acc: 0.8979


Epoch 2/20 - Validation: 100%|██████████| 133/133 [09:00<00:00,  4.07s/batch, Loss=0.28, Acc=0.905] 


Val Loss: 0.2801 Acc: 0.9051


Epoch 3/20 - Training:  38%|███▊      | 200/530 [57:28<1:34:50, 17.24s/batch, Loss=0.271, Acc=0.905]


KeyboardInterrupt: 

In [None]:
# Load the best model for evaluation
model_load_path = 'COVID-19_Radiography_Dataset/models/best_model.pth'
model.load_state_dict(torch.load(model_load_path))

<All keys matched successfully>

In [None]:

def evaluate_model(model, dataloader, device):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():  
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Accuracy: {accuracy:.4f}')
    return accuracy

In [None]:
val_accuracy = evaluate_model(trained_model, val_loader, device)


NameError: name 'trained_model' is not defined