# Skin Lesion Classification using Deep Learning

If you're using Tinder, all the necessary requirements are already installed in a conda environment.

To activate the environment in the terminal, use the command: ```conda activate env```

## Accessing TensorBoard:

1. Navigate to the TensorBoard logs directory:
    ```cd skin_lesion_classification/logs```

2. Start TensorBoard:
    ```tensorboard --logdir ./ --bind_all```

3. ctrl + click on the TensorBoard link.

Run the cell below if you haven't installed the requirements on your machine yet.

In [34]:
# !pip install -r requirements.txt

## Importing all the libraries

In [35]:

import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.optim.lr_scheduler import StepLR

import pytorch_lightning
import torchvision.models as models
import torchvision.transforms as transforms
from pytorch_lightning.loggers import TensorBoardLogger

from torch.utils.data import DataLoader, Dataset, random_split
from torchvision.datasets import ImageFolder
from torchvision.utils import make_grid

import pytorch_lightning as pl
from pytorch_lightning.callbacks import ModelCheckpoint
from pytorch_lightning.callbacks import EarlyStopping
from efficientnet_pytorch import EfficientNet

from PIL import Image
import pandas as pd
import os

import numpy as np
import random
from sklearn.metrics import roc_curve, auc, confusion_matrix, precision_score, recall_score, f1_score, roc_auc_score
import seaborn as sns
import matplotlib.pyplot as plt
import time
import json

## Set seeds for reproducibility

In [36]:
seed = 42
torch.manual_seed(seed)
np.random.seed(seed)
random.seed(seed)

# Ensure reproducibility for the dataset split
generator = torch.Generator().manual_seed(seed)

def seed_worker(worker_id):
    worker_seed = torch.initial_seed() % 2**32
    np.random.seed(worker_seed)
    random.seed(worker_seed)

# Verifique a disponibilidade de CUDA e o número de dispositivos disponíveis
cuda_available = torch.cuda.is_available()
if cuda_available:
    num_cuda_devices = torch.cuda.device_count()
    print(f"Number of GPUs available: {num_cuda_devices}")
    for i in range(num_cuda_devices):
        print(f"GPU {i}: {torch.cuda.get_device_name(i)}")
        
# Para operações determinísticas no PyTorch
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

Number of GPUs available: 2
GPU 0: NVIDIA GeForce GTX 1080 Ti
GPU 1: NVIDIA GeForce GTX 1080 Ti


# Loading CSV

In [37]:

class CustomDataset(Dataset):
    def __init__(self, csv_file, root_dir_1, root_dir_2, transform):
        self.annotations = pd.read_csv(csv_file)                         # Path to the CSV file containing data information.
        self.root_dir_1 = root_dir_1                                     # Path to the first directory where images are stored.
        self.root_dir_2 = root_dir_2                                     # Path to the second directory where images are stored.
        self.transform = transform                                       # Transformations to be applied to the images.
    
    def __len__(self):
        return len(self.annotations)                                     # Return the number of samples in the dataset.
    
    def __getitem__(self, idx):
        img_code = self.annotations.iloc[idx, 1]                         # Extract the file code from the DataFrame
        img_name = img_code + '.jpg'                                     # Add the '.jpg' extension
        img_path = None

        # Check if image is in directory 1
        if os.path.exists(os.path.join(self.root_dir_1, img_name)):
            img_path = os.path.join(self.root_dir_1, img_name)

        # Check if image is in directory 2
        elif os.path.exists(os.path.join(self.root_dir_2, img_name)):
            img_path = os.path.join(self.root_dir_2, img_name)
        
        # Print an error message if image is not found in either directory
        if img_path is None:
            print("IDX ",idx )
            print(f"File {img_name} not found in any of the specified directories.")
            return None, None  # Return None for image and label

        # Open the image and convert to RGB if found
        # This operation is included as a precaution to ensure all images are treated consistently
        image = Image.open(img_path).convert('RGB')

        label = self.annotations.iloc[idx, 2]                              # Access the value in row idx and column 2                
        
        if label == 0:                                                     
            label = torch.tensor(0)                                        # Convert to a tensor with value 0 
        else:
            label = torch.tensor(1)                                        # Convert to a tensor with value 1
            
        if self.transform:
            image = self.transform(image)                                  # Apply transformations
            
        return image, label


# Load CSV file and define paths
csv_file = '/home/ashiley/HAM10000_metadata_alterado.csv'
data_path_1 = '/home/ashiley/HAM10000_images_part_1'
data_path_2 = '/home/ashiley/HAM10000_images_part_2'

# Define transformations
transform = transforms.Compose([
    transforms.Resize((299, 299)),
    transforms.ToTensor(),
])

# Create custom dataset instance
custom_dataset = CustomDataset(csv_file=csv_file, root_dir_1=data_path_1, root_dir_2=data_path_2, transform=transform)

# Create a DataLoader to load data in batches during training.
data_loader = DataLoader(custom_dataset, batch_size=32, shuffle=True, generator=generator)

# Check if CUDA is available
cuda_available = torch.cuda.is_available()

if cuda_available:
    # Get the number of available CUDA devices
    num_cuda_devices = torch.cuda.device_count()
    print("CUDA is available and {} CUDA device(s) is(are) available.".format(num_cuda_devices))
else:
    print("CUDA is not available. You are running on CPU.")

# Move a tensor to the GPU if CUDA is available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Concatenate datasets
full_dataset = custom_dataset

total_images = len(custom_dataset)
print(f"Total de imagens no dataset: {total_images}")

CUDA is available and 2 CUDA device(s) is(are) available.
Total de imagens no dataset: 9873


In [38]:
total_size = len(full_dataset)
train_size = int(0.8 * total_size)              # 80% for training
val_size = int(0.1 * total_size)                # 10% for validation
test_size = total_size - train_size - val_size  # Remaining 10% for testing

# Split the dataset into training, validation, and test sets
train_dataset, val_dataset, test_dataset = random_split(full_dataset, [train_size, val_size, test_size], generator=generator)

# Create DataLoaders for the training, validation, and test sets
train_dataloader = DataLoader(train_dataset, batch_size=64, shuffle=True, worker_init_fn=seed_worker, generator=generator)
val_dataloader = DataLoader(val_dataset, batch_size=64, shuffle=False, worker_init_fn=seed_worker, generator=generator)
test_dataloader = DataLoader(test_dataset, batch_size=64, shuffle=False, worker_init_fn=seed_worker, generator=generator)

# Define

In [39]:
input_channels = 3  # Number of channels in the input images (RGB)
num_classes = 2     # Number of classes in the classification task (malignant or benign)

In [40]:
class GenericClassifier(pl.LightningModule):
    def __init__(self, model_name, num_classes, learning_rate):
        super(GenericClassifier, self).__init__()
        
        # Dictionary to map model names to their creation functions
        model_dict = {
            'vgg': models.vgg16,
            'resnet': models.resnet18,
            'alexnet': models.alexnet,
            'efficientnet': EfficientNet.from_pretrained,
            'inception': models.inception_v3
        }
        
        # Select model
        if model_name not in model_dict:
            raise ValueError(f"Model {model_name} is not supported. Choose from {list(model_dict.keys())}.")
        
        if model_name == 'efficientnet':
            self.model = model_dict[model_name]('efficientnet-b0', num_classes=num_classes)
            
            for name, param in self.model.named_parameters():
                if '_fc' not in name:  
                    param.requires_grad = False
        else:
            self.model = model_dict[model_name](pretrained=True)
            if model_name == 'vgg':
                for param in self.model.parameters():
                    param.requires_grad = False
                for param in self.model.classifier[6].parameters():
                    param.requires_grad = True
                self.model.classifier[6] = nn.Linear(self.model.classifier[6].in_features, num_classes)
            
            elif model_name == 'resnet':
                for param in self.model.parameters():
                    param.requires_grad = False
                for param in self.model.fc.parameters():
                    param.requires_grad = True
                self.model.fc = nn.Linear(self.model.fc.in_features, num_classes)
            
            elif model_name == 'alexnet':
                for param in self.model.parameters():
                    param.requires_grad = False
                for param in self.model.classifier[6].parameters():
                    param.requires_grad = True
                self.model.classifier[6] = nn.Linear(self.model.classifier[6].in_features, num_classes)
            
            elif model_name == 'inception':
                for name, param in self.model.named_parameters():
                    if "fc" not in name:  
                        param.requires_grad = False
                
                in_features = self.model.fc.in_features
                self.model.fc = nn.Linear(in_features, num_classes)
        
        
        self.learning_rate = learning_rate
        self.val_preds = []
        self.val_true = []
        self.test_preds = []
        self.test_true = []
        
    def forward(self, x):
        if isinstance(self.model, models.Inception3):
            x = self.model(x)
            return x.logits if hasattr(x, 'logits') else x  # Use the main output for Inception
        else:
            return self.model(x)

        
    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=self.learning_rate)
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        return {
            'optimizer': optimizer,
            'lr_scheduler': scheduler,
            'monitor': 'val_loss'
        }

    def training_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = torch.sum(preds == y).item() / len(y)
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())
        self.val_preds.extend(preds.detach().cpu().numpy())
        self.val_true.extend(y.detach().cpu().numpy())
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch
        logits = self(x)
        loss = F.cross_entropy(logits, y)
        preds = torch.argmax(logits, dim=1)
        acc = torch.sum(preds == y).item() / len(y)
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())
        self.test_preds.extend(preds.detach().cpu().numpy())
        self.test_true.extend(y.detach().cpu().numpy())
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        return loss

    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

# Train

In [41]:
def save_metrics(val_true, val_preds, test_true, test_preds, val_probs, test_probs, experiment_name, time_taken):
    output_dir = os.path.join('downloads', experiment_name)
    os.makedirs(output_dir, exist_ok=True)
    
    val_confusion = confusion_matrix(val_true, val_preds)
    test_confusion = confusion_matrix(test_true, test_preds)
    
    val_precision = precision_score(val_true, val_preds, average='macro')
    test_precision = precision_score(test_true, test_preds, average='macro')
    
    val_recall = recall_score(val_true, val_preds, average='macro')
    test_recall = recall_score(test_true, test_preds, average='macro')
    
    val_f1 = f1_score(val_true, val_preds, average='macro')
    test_f1 = f1_score(test_true, test_preds, average='macro')
    
    print("Validation Confusion Matrix:\n", val_confusion)
    print("Test Confusion Matrix:\n", test_confusion)
    print("Validation Precision: ", val_precision)
    print("Test Precision: ", test_precision)
    print("Validation Recall: ", val_recall)
    print("Test Recall: ", test_recall)
    print("Validation F1-Score: ", val_f1)
    print("Test F1-Score: ", test_f1)
    
    metrics = {
        "val_precision": val_precision,
        "test_precision": test_precision,
        "val_recall": val_recall,
        "test_recall": test_recall,
        "val_f1": val_f1,
        "test_f1": test_f1,
        "time_taken": time_taken
    }
    
    with open(os.path.join(output_dir, 'metrics.json'), 'w') as f:
        json.dump(metrics, f, indent=4)
    
    fig, axes = plt.subplots(1, 2, figsize=(15, 5))
    
    sns.heatmap(val_confusion, annot=True, fmt='d', cmap='Blues', ax=axes[0])
    axes[0].set_title('Validation Confusion Matrix')
    axes[0].set_xlabel('Predicted')
    axes[0].set_ylabel('True')
    
    sns.heatmap(test_confusion, annot=True, fmt='d', cmap='Blues', ax=axes[1])
    axes[1].set_title('Test Confusion Matrix')
    axes[1].set_xlabel('Predicted')
    axes[1].set_ylabel('True')
    
    confusion_matrices_path = os.path.join(output_dir, 'confusion_matrices.png')
    plt.savefig(confusion_matrices_path)
    plt.close()
    
    plt.figure(figsize=(10, 5))
    sns.barplot(data=pd.DataFrame({
        'Precision': [val_precision, test_precision],
        'Recall': [val_recall, test_recall],
        'F1-Score': [val_f1, test_f1]
    }, index=['Validation', 'Test']))
    plt.title('Metrics Comparison')
    plt.ylabel('Score')
    
    metrics_comparison_path = os.path.join(output_dir, 'metrics_comparison.png')
    plt.savefig(metrics_comparison_path)
    plt.close()
    
    np.savetxt(os.path.join(output_dir, 'val_probs.csv'), np.array(val_probs), delimiter=',')
    np.savetxt(os.path.join(output_dir, 'val_true.csv'), np.array(val_true), delimiter=',')
    np.savetxt(os.path.join(output_dir, 'test_probs.csv'), np.array(test_probs), delimiter=',')
    np.savetxt(os.path.join(output_dir, 'test_true.csv'), np.array(test_true), delimiter=',')

In [42]:
def train_model(model_name, num_classes, experiment_name, learning_rate):
    
    start = time.time()
    
    model = GenericClassifier(model_name=model_name, num_classes=num_classes, learning_rate=learning_rate)
    
    early_stop_callback = EarlyStopping(monitor='val_acc', patience=3, mode='max')
    checkpoint_callback = ModelCheckpoint(monitor='val_acc', mode='max')
    
    trainer = pl.Trainer(
        max_epochs=10, 
        accelerator='auto',  # Use 'auto' to let Lightning handle device selection
        logger=TensorBoardLogger("logs", name=experiment_name),
        callbacks=[checkpoint_callback, early_stop_callback]
    )
    
    trainer.fit(model=model, train_dataloaders=train_dataloader, val_dataloaders=val_dataloader)
    
    trainer.test(model, test_dataloader)
    
    end = time.time()
    
    time_taken = end - start
    
    print(f"Time taken: {time_taken} seconds.")
    
    save_metrics(
        model.val_true, model.val_preds, 
        model.test_true, model.test_preds, 
        model.val_probs, model.test_probs, 
        experiment_name,
        time_taken    
    )
    

In [43]:
def plot_auc_roc_curves(experiment_names):
    plt.figure(figsize=(10, 8))
    
    for experiment_name in experiment_names:
        output_dir = os.path.join('downloads', experiment_name)
        
        # Load probabilities and true labels
        val_probs = np.loadtxt(os.path.join(output_dir, 'val_probs.csv'), delimiter=',')
        val_true = np.loadtxt(os.path.join(output_dir, 'val_true.csv'), delimiter=',')
        test_probs = np.loadtxt(os.path.join(output_dir, 'test_probs.csv'), delimiter=',')
        test_true = np.loadtxt(os.path.join(output_dir, 'test_true.csv'), delimiter=',')
        
        # Compute ROC curve and AUC
        fpr, tpr, _ = roc_curve(test_true, test_probs[:, 1])  # Assuming binary classification
        roc_auc = auc(fpr, tpr)
        
        plt.plot(fpr, tpr, label=f'{experiment_name} (AUC = {roc_auc:.2f})')
    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve for Different Models')
    plt.legend(loc='lower right')
    plt.savefig(f'downloads/{experiment_name}/roc_curve_comparison.png')
    plt.show()

# Models


# VGG

In [44]:
class VGGClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super(VGGClassifier, self).__init__()
        
        # Load the pretrained VGG16 model
        self.vgg16 = models.vgg16(pretrained=True)
        
        # Freeze all layers except the last one
        for param in self.vgg16.parameters():
            param.requires_grad = False
        
        # Unfreeze the last layer
        for param in self.vgg16.classifier[6].parameters():
            param.requires_grad = True
            
        # Modify the classifier layer for the specified number of classes
        num_features = self.vgg16.classifier[6].in_features
        self.vgg16.classifier[6] = nn.Linear(num_features, num_classes)
    
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []
        
    def forward(self, x):
        return self.vgg16(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-4)
        
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=2, gamma=0.1)

        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }

    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).    
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.  
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.test_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []


# ResNet

In [45]:
class ResNetClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super(ResNetClassifier, self).__init__()
        
        # Load a pre-trained ResNet model
        self.resnet = models.resnet18(pretrained=True)
        
        # Freeze all layers except the last one
        for param in self.resnet.parameters():
            param.requires_grad = False
        
        # Unfreeze the last layer
        for param in self.resnet.fc.parameters(): 
            param.requires_grad = True
        
        # Modify the classifier layer for the specified number of classes
        num_features = self.resnet.fc.in_features
        self.resnet.fc = nn.Linear(num_features, num_classes)
    
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

    def forward(self, x):
        return self.resnet(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
    
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        
        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }

    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
                    
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).     
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.test_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

# AlexNet

In [46]:
class AlexNetClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super(AlexNetClassifier, self).__init__()
        
        # Load a pre-trained AlexNet model
        self.alexnet = models.alexnet(pretrained=True)
        
        # Freeze all layers except the last one
        for param in self.alexnet.parameters():
            param.requires_grad = False
        
        # Unfreeze the last layer
        for param in self.alexnet.classifier[6].parameters():
            param.requires_grad = True
            
        # Replace the classifier layer for the specified number of classes
        num_features = self.alexnet.classifier[6].in_features
        self.alexnet.classifier[6] = nn.Linear(num_features, num_classes)
        
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

        
    def forward(self, x):
        return self.alexnet(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
    
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        
        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }


    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
        

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.test_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []



# EfficientNet

In [47]:
class EfficientNetClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super(EfficientNetClassifier, self).__init__()

        # Load a pre-trained EfficientNet model
        self.efficientnet = EfficientNet.from_pretrained('efficientnet-b0', num_classes=num_classes)
        
        # Freeze all layers except the last one
        for name, param in self.efficientnet.named_parameters():
            if '_fc' not in name:  
                param.requires_grad = False
        
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

        
    def forward(self, x):
        return self.efficientnet(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
    
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        
        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }

    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).               
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y).        
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch is a tuple containing the input data (x) and the target labels (y). 
        logits = self(x)                            # pass the input data to the model to get the predicted logits.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss between the model's predictions (logits) and the true labels (y).
        preds = torch.argmax(logits, dim=1)         # get the predicted labels by taking the argmax of the logits.
        acc = torch.sum(preds == y).item() / len(y) # calculate the accuracy by comparing the predicted labels to the true labels.

        # Store logits and true labels to compute metrics outside Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.test_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    
    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

# Inception

In [48]:
class InceptionClassifier(pl.LightningModule):
    def __init__(self, num_classes):
        super().__init__()
        
        # Load a pre-trained Inception model
        self.inception = models.inception_v3(pretrained=True)
        
        # Freeze all layers except the last one
        for name, param in self.inception.named_parameters():
            if "fc" not in name:  
                param.requires_grad = False
        
        # Replace the classifier layer for the specified number of classes
        in_features = self.inception.fc.in_features
        self.inception.fc = nn.Linear(in_features, num_classes)
        
        # Initialize lists to store predictions, probabilities, and true labels
        self.val_preds = []
        self.val_probs = []
        self.val_true = []
        self.test_preds = []
        self.test_probs = []
        self.test_true = []

        
    def forward(self, x):
        return self.inception(x)

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(self.parameters(), lr=1e-3)
    
        # Add a scheduler
        scheduler = StepLR(optimizer, step_size=5, gamma=0.1)
        
        return {
                'optimizer': optimizer,
                'lr_scheduler': scheduler,
                'monitor': 'val_loss'  
        }

    def training_step(self, batch, batch_idx):
        x, y = batch                                # batch é uma tupla contendo os dados de entrada (x) e os rótulos verdadeiros (y).               
        logits = self(x).logits                     # passe os dados de entrada para o modelo para obter os logits preditos.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss entre as previsões do modelo (logits) e os rótulos verdadeiros (y).
                     
        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def validation_step(self, batch, batch_idx):
        x, y = batch                                # batch é uma tupla contendo os dados de entrada (x) e os rótulos verdadeiros (y).        
        logits = self(x)                            # passe os dados de entrada para o modelo para obter os logits preditos.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss entre as previsões do modelo (logits) e os rótulos verdadeiros (y).
        preds = torch.argmax(logits, dim=1)         # obtenha os rótulos previstos ao tomar o argmax dos logits.
        acc = torch.sum(preds == y).item() / len(y) # calcula a precisão comparando os rótulos previstos com os rótulos verdadeiros.
        
        # Store logits and true labels to compute metrics outside Lightning
        self.val_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Store probabilities
        self.val_preds.extend(preds.detach().cpu().numpy())                        # Store predicted labels
        self.val_true.extend(y.detach().cpu().numpy())
        
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        self.log('val_acc', acc, on_step=False, on_epoch=True, prog_bar=True, logger=True)
        
        return loss

    def test_step(self, batch, batch_idx):
        x, y = batch                                # batch é uma tupla contendo os dados de entrada (x) e os rótulos verdadeiros (y).
        logits = self(x)                            # passe os dados de entrada para o modelo para obter os logits preditos.
        loss = F.cross_entropy(logits, y)           # cross-entropy loss entre as previsões do modelo (logits) e os rótulos verdadeiros (y).
        preds = torch.argmax(logits, dim=1)         # obtenha os rótulos previstos ao tomar o argmax dos logits.
        acc = torch.sum(preds == y).item() / len(y) # calcula a precisão comparando os rótulos previstos com os rótulos verdadeiros.

        # Armazene logits e rótulos verdadeiros para computar métricas fora do Lightning
        self.test_probs.extend(torch.softmax(logits, dim=1).detach().cpu().numpy())  # Armazene probabilidades
        self.test_true.extend(y.detach().cpu().numpy())
        
        self.log('test_loss', loss, on_step=False, on_epoch=True)
        self.log('test_acc', acc, on_step=False, on_epoch=True)
        
        return loss

    def on_validation_epoch_start(self):
        self.val_preds = []
        self.val_probs = []
        self.val_true = []

    def on_test_epoch_start(self):
        self.test_preds = []
        self.test_probs = []
        self.test_true = []


In [49]:
train_model('vgg', num_classes, 'vgg_1e-3', 1e-3)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type | Params
-------------------------------
0 | model | VGG  | 134 M 
-------------------------------
8.2 K     Trainable params
134 M     Non-trainable params
134 M     Total params
537.075   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 3: 100%|██████████| 124/124 [01:49<00:00,  1.14it/s, v_num=7, train_loss_step=0.223, val_loss=0.348, val_acc=0.827, train_loss_epoch=0.383]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 16/16 [00:11<00:00,  1.37it/s]


Time taken: 451.2139472961426 seconds.
Validation Confusion Matrix:
 [[787  18]
 [153  29]]
Test Confusion Matrix:
 [[783  12]
 [150  43]]
Validation Precision:  0.727127659574468
Test Precision:  0.8105232388190589
Validation Recall:  0.5684902054467271
Test Recall:  0.6038517939192491
Validation F1-Score:  0.5776404199146657
Test F1-Score:  0.6265120967741935


In [50]:
train_model('vgg', num_classes, 'vgg_1e-4', 1e-4)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type | Params
-------------------------------
0 | model | VGG  | 134 M 
-------------------------------
8.2 K     Trainable params
134 M     Non-trainable params
134 M     Total params
537.075   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 8: 100%|██████████| 124/124 [01:51<00:00,  1.12it/s, v_num=2, train_loss_step=0.433, val_loss=0.363, val_acc=0.832, train_loss_epoch=0.388]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 16/16 [00:12<00:00,  1.31it/s]


Time taken: 1008.1684770584106 seconds.
Validation Confusion Matrix:
 [[763  42]
 [124  58]]
Test Confusion Matrix:
 [[758  37]
 [134  59]]
Validation Precision:  0.7201014656144307
Test Precision:  0.7321795590433483
Validation Recall:  0.6332537028189202
Test Recall:  0.6295793006810702
Validation F1-Score:  0.6566193853427896
Test F1-Score:  0.6534705656731817


In [51]:
train_model('vgg', num_classes, 'vgg_1e-5', 1e-5)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type | Params
-------------------------------
0 | model | VGG  | 134 M 
-------------------------------
8.2 K     Trainable params
134 M     Non-trainable params
134 M     Total params
537.075   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 3: 100%|██████████| 124/124 [01:48<00:00,  1.14it/s, v_num=2, train_loss_step=0.591, val_loss=0.470, val_acc=0.816, train_loss_epoch=0.495]


LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Testing DataLoader 0: 100%|██████████| 16/16 [00:12<00:00,  1.32it/s]


Time taken: 454.5381474494934 seconds.
Validation Confusion Matrix:
 [[805   0]
 [182   0]]
Test Confusion Matrix:
 [[795   0]
 [193   0]]
Validation Precision:  0.4078014184397163
Test Precision:  0.4023279352226721
Validation Recall:  0.5
Test Recall:  0.5
Validation F1-Score:  0.44921875
Test F1-Score:  0.44587773415591697


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [52]:
train_model('resnet', num_classes, 'resnet_1e-3', 1e-3)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11.2 M
---------------------------------
1.0 K     Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.710    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 124/124 [01:15<00:00,  1.65it/s, v_num=2, train_loss_step=0.356, val_loss=0.297, val_acc=0.867, train_loss_epoch=0.313]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 124/124 [01:15<00:00,  1.65it/s, v_num=2, train_loss_step=0.356, val_loss=0.297, val_acc=0.867, train_loss_epoch=0.313]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Time taken: 756.6147818565369 seconds.
Validation Confusion Matrix:
 [[763  42]
 [ 89  93]]
Test Confusion Matrix:
 [[767  28]
 [103  90]]
Validation Precision:  0.7922143974960876
Test Precision:  0.8221605299045393
Validation Recall:  0.7294075489727663
Test Recall:  0.7155505588685762
Validation F1-Score:  0.7538461245571317
Test F1-Score:  0.7500497281847764


In [53]:
train_model('resnet', num_classes, 'resnet_1e-4', 1e-4)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11.2 M
---------------------------------
1.0 K     Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.710    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 7: 100%|██████████| 124/124 [01:15<00:00,  1.64it/s, v_num=1, train_loss_step=0.263, val_loss=0.338, val_acc=0.857, train_loss_epoch=0.363]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:08<00:00,  1.96it/s]


Time taken: 611.7367250919342 seconds.
Validation Confusion Matrix:
 [[773  32]
 [109  73]]
Test Confusion Matrix:
 [[757  38]
 [124  69]]
Validation Precision:  0.7858276643990929
Test Precision:  0.7520553321947234
Validation Recall:  0.680673674151935
Test Recall:  0.6548571056147554
Validation F1-Score:  0.7125652406494427
Test F1-Score:  0.681670644391408


In [54]:
train_model('resnet', num_classes, 'resnet_1e-5', 1e-5)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type   | Params
---------------------------------
0 | model | ResNet | 11.2 M
---------------------------------
1.0 K     Trainable params
11.2 M    Non-trainable params
11.2 M    Total params
44.710    Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 5: 100%|██████████| 124/124 [01:15<00:00,  1.64it/s, v_num=1, train_loss_step=0.427, val_loss=0.467, val_acc=0.815, train_loss_epoch=0.474]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:07<00:00,  2.00it/s]


Time taken: 459.59381556510925 seconds.
Validation Confusion Matrix:
 [[802   3]
 [180   2]]
Test Confusion Matrix:
 [[794   1]
 [193   0]]
Validation Precision:  0.6083503054989816
Test Precision:  0.4022289766970618
Validation Recall:  0.5036311514572385
Test Recall:  0.49937106918238994
Validation F1-Score:  0.45949205342207083
Test F1-Score:  0.4455667789001122


In [55]:
train_model('alexnet', num_classes, 'alexnet_1e-3', 1e-3)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type    | Params
----------------------------------
0 | model | AlexNet | 57.0 M
----------------------------------
8.2 K     Trainable params
57.0 M    Non-trainable params
57.0 M    Total params
228.048   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 5: 100%|██████████| 124/124 [01:08<00:00,  1.80it/s, v_num=1, train_loss_step=0.371, val_loss=0.315, val_acc=0.853, train_loss_epoch=0.360]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:07<00:00,  2.13it/s]


Time taken: 427.7330732345581 seconds.
Validation Confusion Matrix:
 [[761  44]
 [101  81]]
Test Confusion Matrix:
 [[755  40]
 [107  86]]
Validation Precision:  0.7654153132250581
Test Precision:  0.7792048760726256
Validation Recall:  0.6951982799808887
Test Recall:  0.6976406947567374
Validation F1-Score:  0.7203523464688171
Test F1-Score:  0.7252352043103922


In [56]:
train_model('alexnet', num_classes, 'alexnet_1e-4', 1e-4)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type    | Params
----------------------------------
0 | model | AlexNet | 57.0 M
----------------------------------
8.2 K     Trainable params
57.0 M    Non-trainable params
57.0 M    Total params
228.048   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 124/124 [01:07<00:00,  1.83it/s, v_num=1, train_loss_step=0.221, val_loss=0.350, val_acc=0.838, train_loss_epoch=0.377]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 124/124 [01:08<00:00,  1.82it/s, v_num=1, train_loss_step=0.221, val_loss=0.350, val_acc=0.838, train_loss_epoch=0.377]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:07<00:00,  2.25it/s]


Time taken: 692.4681696891785 seconds.
Validation Confusion Matrix:
 [[761  44]
 [116  66]]
Test Confusion Matrix:
 [[757  38]
 [117  76]]
Validation Precision:  0.7338654503990878
Test Precision:  0.7663996948893974
Validation Recall:  0.6539894887720975
Test Recall:  0.6729918206406622
Validation F1-Score:  0.678464971576564
Test F1-Score:  0.7011220122447466


In [57]:
train_model('alexnet', num_classes, 'alexnet_1e-5', 1e-5)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type    | Params
----------------------------------
0 | model | AlexNet | 57.0 M
----------------------------------
8.2 K     Trainable params
57.0 M    Non-trainable params
57.0 M    Total params
228.048   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 124/124 [01:07<00:00,  1.82it/s, v_num=1, train_loss_step=0.632, val_loss=0.417, val_acc=0.828, train_loss_epoch=0.436]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 124/124 [01:08<00:00,  1.81it/s, v_num=1, train_loss_step=0.632, val_loss=0.417, val_acc=0.828, train_loss_epoch=0.436]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:07<00:00,  2.18it/s]


Time taken: 688.7078845500946 seconds.
Validation Confusion Matrix:
 [[801   4]
 [166  16]]
Test Confusion Matrix:
 [[792   3]
 [177  16]]
Validation Precision:  0.8141675284384695
Test Precision:  0.8297213622291022
Validation Recall:  0.5414715719063545
Test Recall:  0.5395639847492424
Validation F1-Score:  0.5312395235008828
Test F1-Score:  0.5244512899499423


In [58]:
train_model('efficientnet', num_classes, 'efficientnet_1e-3', 1e-3)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type         | Params
---------------------------------------
0 | model | EfficientNet | 4.0 M 
---------------------------------------
2.6 K     Trainable params
4.0 M     Non-trainable params
4.0 M     Total params
16.040    Total estimated model params size (MB)


Loaded pretrained weights for efficientnet-b0
Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 124/124 [01:24<00:00,  1.46it/s, v_num=1, train_loss_step=0.229, val_loss=0.319, val_acc=0.864, train_loss_epoch=0.325]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 124/124 [01:24<00:00,  1.46it/s, v_num=1, train_loss_step=0.229, val_loss=0.319, val_acc=0.864, train_loss_epoch=0.325]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:08<00:00,  1.80it/s]


Time taken: 868.5242214202881 seconds.
Validation Confusion Matrix:
 [[755  50]
 [ 84  98]]
Test Confusion Matrix:
 [[738  57]
 [ 85 108]]
Validation Precision:  0.7810214863254196
Test Precision:  0.7756323870540152
Validation Recall:  0.7381748686096512
Test Recall:  0.743943689510216
Validation F1-Score:  0.7562154390621545
Test F1-Score:  0.7577946426721727


In [59]:
train_model('efficientnet', num_classes, 'efficientnet_1e-4', 1e-4)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type         | Params
---------------------------------------
0 | model | EfficientNet | 4.0 M 
---------------------------------------
2.6 K     Trainable params
4.0 M     Non-trainable params
4.0 M     Total params
16.040    Total estimated model params size (MB)


Loaded pretrained weights for efficientnet-b0
Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 124/124 [01:25<00:00,  1.44it/s, v_num=1, train_loss_step=0.579, val_loss=0.382, val_acc=0.834, train_loss_epoch=0.408]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 124/124 [01:25<00:00,  1.44it/s, v_num=1, train_loss_step=0.579, val_loss=0.382, val_acc=0.834, train_loss_epoch=0.408]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:08<00:00,  1.82it/s]


Time taken: 871.3595395088196 seconds.
Validation Confusion Matrix:
 [[746  59]
 [105  77]]
Test Confusion Matrix:
 [[734  61]
 [125  68]]
Validation Precision:  0.7213961083845994
Test Precision:  0.6908068693541255
Validation Recall:  0.6748924988055423
Test Recall:  0.6378010232345944
Validation F1-Score:  0.6926214565673139
Test F1-Score:  0.6549527965331551


In [60]:
train_model('efficientnet', num_classes, 'efficientnet_1e-5', 1e-5)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type         | Params
---------------------------------------
0 | model | EfficientNet | 4.0 M 
---------------------------------------
2.6 K     Trainable params
4.0 M     Non-trainable params
4.0 M     Total params
16.040    Total estimated model params size (MB)


Loaded pretrained weights for efficientnet-b0
Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 4: 100%|██████████| 124/124 [01:26<00:00,  1.43it/s, v_num=1, train_loss_step=0.575, val_loss=0.622, val_acc=0.709, train_loss_epoch=0.559]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:08<00:00,  1.80it/s]


Time taken: 442.7758388519287 seconds.
Validation Confusion Matrix:
 [[681 124]
 [163  19]]
Test Confusion Matrix:
 [[663 132]
 [163  30]]
Validation Precision:  0.4698695853909124
Test Precision:  0.4939243117209219
Validation Recall:  0.4751791686574295
Test Recall:  0.4947013393293577
Validation F1-Score:  0.4714391006204226
Test F1-Score:  0.49351382818812944


In [61]:
train_model('inception', num_classes, 'inception_1e-3', 1e-3)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type       | Params
-------------------------------------
0 | model | Inception3 | 25.1 M
-------------------------------------
773 K     Trainable params
24.3 M    Non-trainable params
25.1 M    Total params
100.465   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 9: 100%|██████████| 124/124 [01:23<00:00,  1.48it/s, v_num=9, train_loss_step=0.476, val_loss=0.327, val_acc=0.860, train_loss_epoch=0.371]

`Trainer.fit` stopped: `max_epochs=10` reached.


Epoch 9: 100%|██████████| 124/124 [01:24<00:00,  1.47it/s, v_num=9, train_loss_step=0.476, val_loss=0.327, val_acc=0.860, train_loss_epoch=0.371]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:08<00:00,  1.87it/s]


Time taken: 858.2375724315643 seconds.
Validation Confusion Matrix:
 [[784  21]
 [117  65]]
Test Confusion Matrix:
 [[770  25]
 [130  63]]
Validation Precision:  0.812979118808559
Test Precision:  0.7857323232323232
Validation Recall:  0.665527950310559
Test Recall:  0.6474891647929091
Validation F1-Score:  0.7020918269146647
Test F1-Score:  0.678476574391921


In [62]:
train_model('inception', num_classes, 'inception_1e-4', 1e-4)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type       | Params
-------------------------------------
0 | model | Inception3 | 25.1 M
-------------------------------------
773 K     Trainable params
24.3 M    Non-trainable params
25.1 M    Total params
100.465   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 7: 100%|██████████| 124/124 [01:23<00:00,  1.48it/s, v_num=1, train_loss_step=0.374, val_loss=0.378, val_acc=0.829, train_loss_epoch=0.408]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:08<00:00,  1.85it/s]


Time taken: 689.7017207145691 seconds.
Validation Confusion Matrix:
 [[791  14]
 [155  27]]
Test Confusion Matrix:
 [[779  16]
 [166  27]]
Validation Precision:  0.747344402619502
Test Precision:  0.7261228005414052
Validation Recall:  0.5654801720019111
Test Recall:  0.5598852934467364
Validation F1-Score:  0.5728180949771174
Test F1-Score:  0.5621079290863044


In [63]:
train_model('inception', num_classes, 'inception-1e-5', 1e-5)

Trainer will use only 1 of 2 GPUs because it is running inside an interactive / notebook environment. You may try to set `Trainer(devices=2)` but please note that multi-GPU inside interactive / notebook environments is considered experimental and unstable. Your mileage may vary.
GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
Missing logger folder: logs/inception-1e-5
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]

  | Name  | Type       | Params
-------------------------------------
0 | model | Inception3 | 25.1 M
-------------------------------------
773 K     Trainable params
24.3 M    Non-trainable params
25.1 M    Total params
100.465   Total estimated model params size (MB)


Sanity Checking: |          | 0/? [00:00<?, ?it/s]

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'val_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


                                                                           

/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'train_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.


Epoch 3: 100%|██████████| 124/124 [01:24<00:00,  1.46it/s, v_num=0, train_loss_step=0.464, val_loss=0.492, val_acc=0.816, train_loss_epoch=0.515]

LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0,1]
/home/ashiley/miniconda3/envs/env/lib/python3.8/site-packages/pytorch_lightning/trainer/connectors/data_connector.py:441: The 'test_dataloader' does not have many workers which may be a bottleneck. Consider increasing the value of the `num_workers` argument` to `num_workers=11` in the `DataLoader` to improve performance.



Testing DataLoader 0: 100%|██████████| 16/16 [00:09<00:00,  1.78it/s]


Time taken: 350.7298357486725 seconds.
Validation Confusion Matrix:
 [[805   0]
 [182   0]]
Test Confusion Matrix:
 [[795   0]
 [193   0]]
Validation Precision:  0.4078014184397163
Test Precision:  0.4023279352226721
Validation Recall:  0.5
Test Recall:  0.5
Validation F1-Score:  0.44921875
Test F1-Score:  0.44587773415591697


  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
