# Skin Lesion Classification using Deep Learning

If you're using Tinder, all the necessary requirements are already installed in a conda environment.

To activate the environment in the terminal, use the command: ```conda activate env```

## Accessing TensorBoard:

1. Navigate to the TensorBoard logs directory:
    ```cd skin_lesion_classification/logs```

2. Start TensorBoard:
    ```tensorboard --logdir ./ --bind_all```

3. ctrl + click on the TensorBoard link.

Run the cell below if you haven't installed the requirements on your machine yet.

In [33]:
# !pip install -r requirements.txt

## Importing all the libraries

In [34]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

import pytorch_lightning
import torchvision.models as models
import torchvision.transforms as transforms
from pytorch_lightning.loggers import TensorBoardLogger

from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from efficientnet_pytorch import EfficientNet

from PIL import Image
import pandas as pd
import os

import numpy as np
import random
from sklearn.metrics import roc_curve, auc, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
import seaborn as sns
import matplotlib.pyplot as plt
import time
import json

## Set seeds for reproducibility

In [35]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Ensure reproducibility for the dataset split
generator = torch.Generator().manual_seed(seed)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

# Verifique a disponibilidade de CUDA e o número de dispositivos disponíveis
cuda_available = torch.cuda.is_available()
if cuda_available:
    num_cuda_devices = torch.cuda.device_count()
    print(f"Number of GPUs available: {num_cuda_devices}")
    for i in range(num_cuda_devices):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        
# Para operações determinísticas no PyTorch
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

Number of GPUs available: 2
GPU 0: NVIDIA GeForce GTX 1080 Ti
GPU 1: NVIDIA GeForce GTX 1080 Ti


# Loading CSV

In [36]:

class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir_1, root_dir_2, transform):
        self.annotations = pd.read_csv(csv_file)                         # Path to the CSV file containing data information.
        self.root_dir_1 = root_dir_1                                     # Path to the first directory where images are stored.
        self.root_dir_2 = root_dir_2                                     # Path to the second directory where images are stored.
        self.transform = transform                                       # Transformations to be applied to the images.
    
    def __len__(self):
        return len(self.annotations)                                     # Return the number of samples in the dataset.
    
    def __getitem__(self, idx):
        img_code = self.annotations.iloc[idx, 1]                         # Extract the file code from the DataFrame
        img_name = img_code + '.jpg'                                     # Add the '.jpg' extension
        img_path = None

        # Check if image is in directory 1
        if os.path.exists(os.path.join(self.root_dir_1, img_name)):
            img_path = os.path.join(self.root_dir_1, img_name)

        # Check if image is in directory 2
        elif os.path.exists(os.path.join(self.root_dir_2, img_name)):
            img_path = os.path.join(self.root_dir_2, img_name)
        
        # Print an error message if image is not found in either directory
        if img_path is None:
            print("IDX ",idx )
            print(f"File {img_name} not found in any of the specified directories.")
            return None, None  # Return None for image and label

        # Open the image and convert to RGB if found
        # This operation is included as a precaution to ensure all images are treated consistently
        image = Image.open(img_path).convert('RGB')

        label = self.annotations.iloc[idx, 2]                              # Access the value in row idx and column 2                
        
        if label == 0:                                                     
            label = torch.tensor(0)                                        # Convert to a tensor with value 0 
        else:
            label = torch.tensor(1)                                        # Convert to a tensor with value 1
            
        if self.transform:
            image = self.transform(image)                                  # Apply transformations
            
        return image, label


# Load CSV file and define paths
csv_file = '/home/ashiley/HAM10000_metadata_alterado.csv'
data_path_1 = '/home/ashiley/HAM10000_images_part_1'
data_path_2 = '/home/ashiley/HAM10000_images_part_2'

# Define data augmentations and transformations
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(),
    transforms.RandomVerticalFlip(),
    transforms.RandomRotation(20),
    transforms.RandomResizedCrop(299, scale=(0.8, 1.0)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.2),
    transforms.ToTensor(),
])

# Create custom dataset instance
custom_dataset = CustomDataset(csv_file=csv_file, root_dir_1=data_path_1, root_dir_2=data_path_2, transform=transform)

# Calculate class weights
df = pd.read_csv(csv_file)
class_counts = df.iloc[:, 2].value_counts().sort_index().values
class_weights = 1.0 / torch.tensor(class_counts, dtype=torch.float)
class_weights = class_weights / class_weights.sum()  # Normalize to make the sum of weights equal to 1
class_weights = class_weights.to('cuda' if cuda_available else 'cpu')  # Move to the correct device

# Create a DataLoader to load data in batches during training.
data_loader = DataLoader(custom_dataset, batch_size=32, shuffle=True, generator=generator)

# Check if CUDA is available
cuda_available = torch.cuda.is_available()

if cuda_available:
    # Get the number of available CUDA devices
    num_cuda_devices = torch.cuda.device_count()
    print("CUDA is available and {} CUDA device(s) is(are) available.".format(num_cuda_devices))
else:
    print("CUDA is not available. You are running on CPU.")

# Move a tensor to the GPU if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Concatenate datasets
full_dataset = custom_dataset

total_images = len(custom_dataset)
print(f"Total de imagens no dataset: {total_images}")

CUDA is available and 2 CUDA device(s) is(are) available.
Total de imagens no dataset: 9873


In [37]:
total_size = len(full_dataset)
train_size = int(0.8 * total_size)              # 80% for training
val_size = int(0.1 * total_size)                # 10% for validation
test_size = total_size - train_size - val_size  # Remaining 10% for testing

# Split the dataset into training, validation, and test sets
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size], generator=generator)

# Create DataLoaders for the training, validation, and test sets
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, worker_init_fn=seed_worker, generator=generator)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False, worker_init_fn=seed_worker, generator=generator)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False, worker_init_fn=seed_worker, generator=generator)

# Define

In [38]:
input_channels = 3  # Number of channels in the input images (RGB)
num_classes = 2     # Number of classes in the classification task (malignant or benign)

In [39]:
class GenericClassifier(pl.LightningModule):
    def __init__(self, model_name, num_classes, learning_rate, class_weights):
        super(GenericClassifier, self).__init__()
        
        # Dictionary to map model names to their creation functions
        model_dict = {
            'vgg': models.vgg16,
            'resnet': models.resnet18,
            'alexnet': models.alexnet,
            'efficientnet': EfficientNet.from_pretrained,
            'inception': models.inception_v3
        }
        
        # Select model
        if model_name not in model_dict:
            raise ValueError(f"Model {model_name} is not supported. Choose from {list(model_dict.keys())}.")
        
        if model_name == 'efficientnet':
            self.model = model_dict[model_name]('efficientnet-b0', num_classes=num_classes)
            
            for name, param in self.model.named_parameters():
                if '_fc' not in name:  
                    param.requires_grad = False
        else:
            self.model = model_dict[model_name](pretrained=True)
            if model_name == 'vgg':
                for param in self.model.parameters():
                    param.requires_grad = False
                for param in self.model.classifier[6].parameters():
                    param.requires_grad = True
                self.model.classifier[6] = nn.Linear(self.model.classifier[6].in_features, num_classes)
            
            elif model_name == 'resnet':
                for param in self.model.parameters():
                    param.requires_grad = False
                for param in self.model.fc.parameters():
                    param.requires_grad = True
                self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
            
            elif model_name == 'alexnet':
                for param in self.model.parameters():
                    param.requires_grad = False
                for param in self.model.classifier[6].parameters():
                    param.requires_grad = True
                self.model.classifier[6] = nn.Linear(self.model.classifier[6].in_features, num_classes)
            
            elif model_name == 'inception':
                for name, param in self.model.named_parameters():
                    if "fc" not in name:  
                        param.requires_grad = False
                
                in_features = self.model.fc.in_features
                self.model.fc = nn.Linear(in_features, num_classes)
        
        self.learning_rate = learning_rate
        self.class_weights = class_weights
        self.val_preds = []
        self.val_true = []
        self.test_preds = []
        self.test_true = []
        
    def forward(self, x):
        if isinstance(self.model, models.Inception3):
            x = self.model(x)
            return x.logits if hasattr(x, 'logits') else x  # Use the main output for Inception
        else:
            return self.model(x)

        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        return {
            'optimizer': optimizer,
            'lr_scheduler': scheduler,
            'monitor': 'val_loss'
        }

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y, weight=self.class_weights)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y, weight=self.class_weights)
        preds = torch.argmax(logits, dim=1)
        acc = torch.sum(preds == y).item() / len(y)
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())
        self.val_preds.extend(preds.detach().cpu().numpy())
        self.val_true.extend(y.detach().cpu().numpy())
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y, weight=self.class_weights)
        preds = torch.argmax(logits, dim=1)
        acc = torch.sum(preds == y).item() / len(y)
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())
        self.test_preds.extend(preds.detach().cpu().numpy())
        self.test_true.extend(y.detach().cpu().numpy())
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

# Train

In [40]:
def save_metrics(val_true, val_preds, test_true, test_preds, val_probs, test_probs, experiment_name, time_taken):
    output_dir = os.path.join('results', experiment_name)
    os.makedirs(output_dir, exist_ok=True)
    
    val_confusion = confusion_matrix(val_true, val_preds)
    test_confusion = confusion_matrix(test_true, test_preds)
    
    val_precision = precision_score(val_true, val_preds, average='macro')
    test_precision = precision_score(test_true, test_preds, average='macro')
    
    val_recall = recall_score(val_true, val_preds, average='macro')
    test_recall = recall_score(test_true, test_preds, average='macro')
    
    val_f1 = f1_score(val_true, val_preds, average='macro')
    test_f1 = f1_score(test_true, test_preds, average='macro')
    
    print("Validation Confusion Matrix:\n", val_confusion)
    print("Test Confusion Matrix:\n", test_confusion)
    print("Validation Precision: ", val_precision)
    print("Test Precision: ", test_precision)
    print("Validation Recall: ", val_recall)
    print("Test Recall: ", test_recall)
    print("Validation F1-Score: ", val_f1)
    print("Test F1-Score: ", test_f1)
    
    metrics = {
        "val_precision": val_precision,
        "test_precision": test_precision,
        "val_recall": val_recall,
        "test_recall": test_recall,
        "val_f1": val_f1,
        "test_f1": test_f1,
        "time_taken": time_taken
    }
    
    with open(os.path.join(output_dir, 'metrics.json'), 'w') as f:
        json.dump(metrics, f, indent=4)
    
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    sns.heatmap(val_confusion, annot=True, fmt='d', cmap='Blues', ax=axes[0])
    axes[0].set_title('Validation Confusion Matrix')
    axes[0].set_xlabel('Predicted')
    axes[0].set_ylabel('True')
    
    sns.heatmap(test_confusion, annot=True, fmt='d', cmap='Blues', ax=axes[1])
    axes[1].set_title('Test Confusion Matrix')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('True')
    
    confusion_matrices_path = os.path.join(output_dir, 'confusion_matrices.png')
    plt.savefig(confusion_matrices_path)
    plt.close()
    
    plt.figure(figsize=(10, 5))
    sns.barplot(data=pd.DataFrame({
        'Precision': [val_precision, test_precision],
        'Recall': [val_recall, test_recall],
        'F1-Score': [val_f1, test_f1]
    }, index=['Validation', 'Test']))
    plt.title('Metrics Comparison')
    plt.ylabel('Score')
    
    metrics_comparison_path = os.path.join(output_dir, 'metrics_comparison.png')
    plt.savefig(metrics_comparison_path)
    plt.close()
    
    np.savetxt(os.path.join(output_dir, 'val_probs.csv'), np.array(val_probs), delimiter=',')
    np.savetxt(os.path.join(output_dir, 'val_true.csv'), np.array(val_true), delimiter=',')
    np.savetxt(os.path.join(output_dir, 'test_probs.csv'), np.array(test_probs), delimiter=',')
    np.savetxt(os.path.join(output_dir, 'test_true.csv'), np.array(test_true), delimiter=',')

In [41]:
def train_model(model_name, num_classes, experiment_name, learning_rate):
    
    start = time.time()
    
    model = GenericClassifier(model_name=model_name, num_classes=num_classes, learning_rate=learning_rate, class_weights=class_weights)
    
    early_stop_callback = EarlyStopping(monitor='val_acc', patience=3, mode='max')
    checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode='max')
    
    trainer = pl.Trainer(
        max_epochs=10, 
        accelerator='auto',  # Use 'auto' to let Lightning handle device selection
        logger=TensorBoardLogger("logs", name=experiment_name),
        callbacks=[checkpoint_callback, early_stop_callback]
    )
    
    trainer.fit(model=model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)
    
    trainer.test(model, test_dataloader)
    
    end = time.time()
    
    time_taken = end - start
    
    print(f"Time taken: {time_taken} seconds.")
    
    save_metrics(
        model.val_true, model.val_preds, 
        model.test_true, model.test_preds, 
        model.val_probs, model.test_probs, 
        experiment_name,
        time_taken    
    )
    

In [42]:
def plot_auc_roc_curves(experiment_names, title):
    plt.figure(figsize=(10, 8))
    
    for experiment_name in experiment_names:
        output_dir = os.path.join('results', experiment_name)
        
        # Load probabilities and true labels
        val_probs = np.loadtxt(os.path.join(output_dir, 'val_probs.csv'), delimiter=',')
        val_true = np.loadtxt(os.path.join(output_dir, 'val_true.csv'), delimiter=',')
        test_probs = np.loadtxt(os.path.join(output_dir, 'test_probs.csv'), delimiter=',')
        test_true = np.loadtxt(os.path.join(output_dir, 'test_true.csv'), delimiter=',')
        
        # Compute ROC curve and AUC
        fpr, tpr, _ = roc_curve(test_true, test_probs[:, 1])  # Assuming binary classification
        roc_auc = auc(fpr, tpr)
        
        plt.plot(fpr, tpr, label=f'{experiment_name} (AUC = {roc_auc:.2f})')
    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve for Different Models')
    plt.legend(loc='lower right')
    plt.savefig(f'results/{title}_roc_curve_comparison.png')
    plt.show()

# Models


# VGG

In [43]:
class VGGClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super(VGGClassifier, self).__init__()
        
        # Load the pretrained VGG16 model
        self.vgg16 = models.vgg16(pretrained=True)
        
        # Freeze all layers except the last one
        for param in self.vgg16.parameters():
            param.requires_grad = False
        
        # Unfreeze the last layer
        for param in self.vgg16.classifier[6].parameters():
            param.requires_grad = True
            
        # Modify the classifier layer for the specified number of classes
        num_features = self.vgg16.classifier[6].in_features
        self.vgg16.classifier[6] = nn.Linear(num_features, num_classes)
    
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []
        
    def forward(self, x):
        return self.vgg16(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=2, gamma=0.1)

        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }

    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).    
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.  
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.test_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []


# ResNet

In [44]:
class ResNetClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super(ResNetClassifier, self).__init__()
        
        # Load a pre-trained ResNet model
        self.resnet = models.resnet18(pretrained=True)
        
        # Freeze all layers except the last one
        for param in self.resnet.parameters():
            param.requires_grad = False
        
        # Unfreeze the last layer
        for param in self.resnet.fc.parameters(): 
            param.requires_grad = True
        
        # Modify the classifier layer for the specified number of classes
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes)
    
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

    def forward(self, x):
        return self.resnet(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
    
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        
        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }

    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
                    
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).     
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.test_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

# AlexNet

In [45]:
class AlexNetClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super(AlexNetClassifier, self).__init__()
        
        # Load a pre-trained AlexNet model
        self.alexnet = models.alexnet(pretrained=True)
        
        # Freeze all layers except the last one
        for param in self.alexnet.parameters():
            param.requires_grad = False
        
        # Unfreeze the last layer
        for param in self.alexnet.classifier[6].parameters():
            param.requires_grad = True
            
        # Replace the classifier layer for the specified number of classes
        num_features = self.alexnet.classifier[6].in_features
        self.alexnet.classifier[6] = nn.Linear(num_features, num_classes)
        
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

        
    def forward(self, x):
        return self.alexnet(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
    
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        
        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }


    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
        

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.test_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []



# EfficientNet

In [46]:
class EfficientNetClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super(EfficientNetClassifier, self).__init__()

        # Load a pre-trained EfficientNet model
        self.efficientnet = EfficientNet.from_pretrained('efficientnet-b0', num_classes=num_classes)
        
        # Freeze all layers except the last one
        for name, param in self.efficientnet.named_parameters():
            if '_fc' not in name:  
                param.requires_grad = False
        
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

        
    def forward(self, x):
        return self.efficientnet(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
    
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        
        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }

    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).               
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).        
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y). 
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.

        # Store logits and true labels to compute metrics outside Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.test_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

# Inception

In [47]:
class InceptionClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super().__init__()
        
        # Load a pre-trained Inception model
        self.inception = models.inception_v3(pretrained=True)
        
        # Freeze all layers except the last one
        for name, param in self.inception.named_parameters():
            if "fc" not in name:  
                param.requires_grad = False
        
        # Replace the classifier layer for the specified number of classes
        in_features = self.inception.fc.in_features
        self.inception.fc = nn.Linear(in_features, num_classes)
        
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

        
    def forward(self, x):
        return self.inception(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
    
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        
        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }

    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch é uma tupla contendo os dados de entrada (x) e os rótulos verdadeiros (y).               
        logits = self(x).logits                     # passe os dados de entrada para o modelo para obter os logits preditos.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss entre as previsões do modelo (logits) e os rótulos verdadeiros (y).
                     
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch é uma tupla contendo os dados de entrada (x) e os rótulos verdadeiros (y).        
        logits = self(x)                            # passe os dados de entrada para o modelo para obter os logits preditos.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss entre as previsões do modelo (logits) e os rótulos verdadeiros (y).
        preds = torch.argmax(logits, dim=1)         # obtenha os rótulos previstos ao tomar o argmax dos logits.
        acc = torch.sum(preds == y).item() / len(y) # calcula a precisão comparando os rótulos previstos com os rótulos verdadeiros.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch é uma tupla contendo os dados de entrada (x) e os rótulos verdadeiros (y).
        logits = self(x)                            # passe os dados de entrada para o modelo para obter os logits preditos.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss entre as previsões do modelo (logits) e os rótulos verdadeiros (y).
        preds = torch.argmax(logits, dim=1)         # obtenha os rótulos previstos ao tomar o argmax dos logits.
        acc = torch.sum(preds == y).item() / len(y) # calcula a precisão comparando os rótulos previstos com os rótulos verdadeiros.

        # Armazene logits e rótulos verdadeiros para computar métricas fora do Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Armazene probabilidades
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True)
        
        return loss

    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []
