In [1]:
import os
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from torch import nn
from tqdm import tqdm
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report

In [2]:
import torch

# Move to GPU
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cuda')

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import models

def load_resnet50(num_classes):
    model =  models.resnet50(weights='ResNet50_Weights.DEFAULT')
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, num_classes)
    return model

def load_efficientnet_b0(num_classes):
    model = models.efficientnet_b0(weights='EfficientNet_B0_Weights.DEFAULT')
    num_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(num_features, num_classes)
    return model

def load_mobilenet_v2(num_classes):
    model = models.mobilenet_v2(weights='MobileNet_V2_Weights.DEFAULT')
    num_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(num_features, num_classes)
    return model

def load_mobilenet_v3_large(num_classes):
    model = models.mobilenet_v3_large(weights='MobileNet_V3_Large_Weights.DEFAULT')
    num_features = model.classifier[3].in_features
    model.classifier[3] = nn.Linear(num_features, num_classes)
    return model

def load_mobilenet_v3_small(num_classes):
    model = models.mobilenet_v3_small(weights='MobileNet_V3_Small_Weights.DEFAULT')
    num_features = model.classifier[3].in_features
    model.classifier[3] = nn.Linear(num_features, num_classes)
    return model

def load_efficientnet_b1(num_classes):
    model = models.efficientnet_b1(weights='EfficientNet_B1_Weights.DEFAULT')
    num_features = model.classifier[1].in_features
    model.classifier[1] = nn.Linear(num_features, num_classes)
    return model

In [4]:
# Create test dataloader function
def create_test_dataloader(data_dir, batch_size, num_workers=4):
    data_transform = transforms.Compose([
        transforms.Resize((244, 244)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
    ])

    test_dataset = datasets.ImageFolder(os.path.join(data_dir, 'test'), transform=data_transform)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=num_workers)
    return test_loader

In [5]:
# Function to load the model
def load_model(load_func, checkpoint_path, num_classes):
    model = load_func(num_classes=num_classes)
    model.load_state_dict(torch.load(checkpoint_path, map_location=device))
    model = model.to(device)
    model.eval()
    return model

In [6]:
# Function to evaluate the model and generate metrics
def evaluate_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0.0
    correct = 0
    total = 0
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for inputs, labels in tqdm(test_loader, desc='Testing'):
            inputs, labels = inputs.to(device), labels.to(device)

            outputs = model(inputs)
            loss = criterion(outputs, labels)

            test_loss += loss.item() * inputs.size(0)

            # Get predictions and accuracy
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == labels).sum().item()
            total += labels.size(0)

            # Collect predictions and true labels for further analysis
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())

    test_loss = test_loss / len(test_loader.dataset)
    test_acc = correct / total

    # Generate confusion matrix
    conf_matrix = confusion_matrix(all_labels, all_preds)

    # Return all labels and predictions for classification report
    return test_loss, test_acc, conf_matrix, all_labels, all_preds

In [7]:
from sklearn.metrics import classification_report
import scikitplot as skplt
import matplotlib.pyplot as plt

# Updated function to save confusion matrix using scikit-plot
def save_confusion_matrix(all_labels, all_preds, checkpoint_name, output_dir="confusion_matrices"):
    os.makedirs(output_dir, exist_ok=True)
    
    # Plot confusion matrix using scikit-plot
    plt.figure(figsize=(10, 8))
    skplt.metrics.plot_confusion_matrix(all_labels, all_preds, figsize=(10, 8), cmap="Blues")
    
    plt.title(f'Confusion Matrix for {checkpoint_name}')
    
    # Save the confusion matrix image
    image_path = os.path.join(output_dir, f'{checkpoint_name}_confusion_matrix.png')
    plt.savefig(image_path, dpi=300)  # Save with high resolution
    plt.close()

In [8]:
# Updated function to evaluate a single model using a checkpoint
def evaluate_single_checkpoint(model_function, checkpoint_path, num_classes, data_dir, batch_size, criterion):
    test_loader = create_test_dataloader(data_dir, batch_size)
    class_names = test_loader.dataset.classes

    print(f"Evaluating model: {model_function.__name__}")
    print(f"Checkpoint: {checkpoint_path}")

    # Load the model from checkpoint
    model = load_model(model_function, checkpoint_path, num_classes)

    # Evaluate the model and gather loss, accuracy, confusion matrix, labels, and predictions
    test_loss, test_acc, conf_matrix, all_labels, all_preds = evaluate_model(model, test_loader, criterion)

    # Save confusion matrix as an image
    save_confusion_matrix(all_labels, all_preds, os.path.basename(checkpoint_path))

    # Print the results
    print(f"Test Loss: {test_loss:.4f}, Test Accuracy: {test_acc*100:.2f}%")

    # Generate and print the classification report using scikit-learn's `classification_report`
    class_report = classification_report(all_labels, all_preds, target_names=class_names)
    print("Classification Report:\n", class_report)

## resnet50_10ep

In [9]:
if __name__ == "__main__":
    model_function = load_resnet50
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/resnet50_epoch_6.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_resnet50
Checkpoint: Checkpoints_best/resnet50_epoch_6.pth


  return F.conv2d(input, weight, bias, self.stride,
Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:05<00:00,  4.22it/s]


Test Loss: 0.1008, Test Accuracy: 97.30%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.93      0.99      0.96       163
  meningioma       0.99      0.90      0.94       165
     notumor       0.99      1.00      1.00       200
   pituitary       0.98      0.99      0.99       176

    accuracy                           0.97       704
   macro avg       0.97      0.97      0.97       704
weighted avg       0.97      0.97      0.97       704



<Figure size 1000x800 with 0 Axes>

## resnet50_20ep

In [10]:
if __name__ == "__main__":
    model_function = load_resnet50
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/resnet50_20ep_epoch_16.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_resnet50
Checkpoint: Checkpoints_best/resnet50_20ep_epoch_16.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:04<00:00,  4.49it/s]


Test Loss: 0.0583, Test Accuracy: 97.73%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.99      0.98      0.98       163
  meningioma       0.95      0.96      0.95       165
     notumor       0.98      1.00      0.99       200
   pituitary       0.99      0.97      0.98       176

    accuracy                           0.98       704
   macro avg       0.98      0.98      0.98       704
weighted avg       0.98      0.98      0.98       704



<Figure size 1000x800 with 0 Axes>

## efficientnet_b0_20ep

In [11]:
if __name__ == "__main__":
    model_function = load_efficientnet_b0
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/efficientnet_b0_20ep_epoch_7.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_efficientnet_b0
Checkpoint: Checkpoints_best/efficientnet_b0_20ep_epoch_7.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:04<00:00,  5.16it/s]


Test Loss: 0.0359, Test Accuracy: 98.86%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.98      0.99      0.98       163
  meningioma       0.99      0.96      0.98       165
     notumor       1.00      1.00      1.00       200
   pituitary       0.99      0.99      0.99       176

    accuracy                           0.99       704
   macro avg       0.99      0.99      0.99       704
weighted avg       0.99      0.99      0.99       704



<Figure size 1000x800 with 0 Axes>

## efficientnet_b0_10ep

In [12]:
if __name__ == "__main__":
    model_function = load_efficientnet_b0
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/efficientnet_b0_epoch_2.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_efficientnet_b0
Checkpoint: Checkpoints_best/efficientnet_b0_epoch_2.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:04<00:00,  5.35it/s]


Test Loss: 0.0503, Test Accuracy: 98.72%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.99      0.99      0.99       163
  meningioma       0.97      0.98      0.97       165
     notumor       1.00      1.00      1.00       200
   pituitary       0.98      0.98      0.98       176

    accuracy                           0.99       704
   macro avg       0.99      0.99      0.99       704
weighted avg       0.99      0.99      0.99       704



<Figure size 1000x800 with 0 Axes>

## efficientnet_b1_20ep

In [13]:
if __name__ == "__main__":
    model_function = load_efficientnet_b1
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/efficientnet_b1_20ep_epoch_13.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_efficientnet_b1
Checkpoint: Checkpoints_best/efficientnet_b1_20ep_epoch_13.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:04<00:00,  5.04it/s]


Test Loss: 0.0264, Test Accuracy: 99.43%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.99      0.99      0.99       163
  meningioma       0.98      0.99      0.99       165
     notumor       1.00      1.00      1.00       200
   pituitary       1.00      0.99      1.00       176

    accuracy                           0.99       704
   macro avg       0.99      0.99      0.99       704
weighted avg       0.99      0.99      0.99       704



<Figure size 1000x800 with 0 Axes>

## efficientnet_b1_10ep

In [14]:
if __name__ == "__main__":
    model_function = load_efficientnet_b1
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/efficientnet_b1_epoch_9.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_efficientnet_b1
Checkpoint: Checkpoints_best/efficientnet_b1_epoch_9.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:04<00:00,  4.95it/s]


Test Loss: 0.0373, Test Accuracy: 98.72%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.98      0.99      0.99       163
  meningioma       0.97      0.98      0.97       165
     notumor       1.00      1.00      1.00       200
   pituitary       0.99      0.98      0.99       176

    accuracy                           0.99       704
   macro avg       0.99      0.99      0.99       704
weighted avg       0.99      0.99      0.99       704



<Figure size 1000x800 with 0 Axes>

## mobilenet_v2_10ep

In [15]:
if __name__ == "__main__":
    model_function = load_mobilenet_v2
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/mobilenet_v2_epoch_3.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_mobilenet_v2
Checkpoint: Checkpoints_best/mobilenet_v2_epoch_3.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:03<00:00,  5.59it/s]


Test Loss: 0.0764, Test Accuracy: 97.16%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.97      0.98      0.97       163
  meningioma       0.94      0.94      0.94       165
     notumor       0.99      1.00      1.00       200
   pituitary       0.98      0.97      0.97       176

    accuracy                           0.97       704
   macro avg       0.97      0.97      0.97       704
weighted avg       0.97      0.97      0.97       704



<Figure size 1000x800 with 0 Axes>

## mobilenet_v2_20ep

In [16]:
if __name__ == "__main__":
    model_function = load_mobilenet_v2
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/mobilenet_v2_20ep_epoch_16.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_mobilenet_v2
Checkpoint: Checkpoints_best/mobilenet_v2_20ep_epoch_16.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:04<00:00,  5.47it/s]


Test Loss: 0.0410, Test Accuracy: 98.44%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.99      0.98      0.98       163
  meningioma       0.97      0.96      0.97       165
     notumor       0.99      1.00      1.00       200
   pituitary       0.98      0.99      0.99       176

    accuracy                           0.98       704
   macro avg       0.98      0.98      0.98       704
weighted avg       0.98      0.98      0.98       704



<Figure size 1000x800 with 0 Axes>

## mobilenet_v3_large_20ep

In [17]:
if __name__ == "__main__":
    model_function = load_mobilenet_v3_large
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/mobilenet_v3_large_20ep_epoch_10.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_mobilenet_v3_large
Checkpoint: Checkpoints_best/mobilenet_v3_large_20ep_epoch_10.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:03<00:00,  5.97it/s]


Test Loss: 0.0500, Test Accuracy: 98.86%
Classification Report:
               precision    recall  f1-score   support

      glioma       1.00      0.96      0.98       163
  meningioma       0.96      0.99      0.98       165
     notumor       1.00      0.99      1.00       200
   pituitary       0.99      1.00      1.00       176

    accuracy                           0.99       704
   macro avg       0.99      0.99      0.99       704
weighted avg       0.99      0.99      0.99       704



<Figure size 1000x800 with 0 Axes>

## mobilenet_v3_large_10ep

In [18]:
if __name__ == "__main__":
    model_function = load_mobilenet_v3_large
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/mobilenet_v3_large_epoch_5.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_mobilenet_v3_large
Checkpoint: Checkpoints_best/mobilenet_v3_large_epoch_5.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:03<00:00,  5.86it/s]


Test Loss: 0.1104, Test Accuracy: 96.73%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.95      0.99      0.97       163
  meningioma       0.98      0.89      0.93       165
     notumor       0.98      0.99      0.99       200
   pituitary       0.96      0.99      0.97       176

    accuracy                           0.97       704
   macro avg       0.97      0.97      0.97       704
weighted avg       0.97      0.97      0.97       704



<Figure size 1000x800 with 0 Axes>

## mobilenet_v3_small_20ep

In [19]:
if __name__ == "__main__":
    model_function = load_mobilenet_v3_small
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/mobilenet_v3_small_20ep_epoch_13.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_mobilenet_v3_small
Checkpoint: Checkpoints_best/mobilenet_v3_small_20ep_epoch_13.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:03<00:00,  5.93it/s]


Test Loss: 0.0506, Test Accuracy: 98.15%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.98      0.96      0.97       163
  meningioma       0.96      0.98      0.97       165
     notumor       0.99      1.00      0.99       200
   pituitary       1.00      0.98      0.99       176

    accuracy                           0.98       704
   macro avg       0.98      0.98      0.98       704
weighted avg       0.98      0.98      0.98       704



<Figure size 1000x800 with 0 Axes>

## mobilenet_v3_small_10ep

In [20]:
if __name__ == "__main__":
    model_function = load_mobilenet_v3_small
    criterion = nn.CrossEntropyLoss()
    evaluate_single_checkpoint(model_function=model_function,
                               checkpoint_path="Checkpoints_best/mobilenet_v3_small_epoch_9.pth",
                               num_classes=4,
                               data_dir="Dataset/",
                               batch_size=32,
                               criterion=criterion)

Evaluating model: load_mobilenet_v3_small
Checkpoint: Checkpoints_best/mobilenet_v3_small_epoch_9.pth


Testing: 100%|█████████████████████████████████████████████████████████████████████████| 22/22 [00:03<00:00,  5.84it/s]


Test Loss: 0.0757, Test Accuracy: 97.59%
Classification Report:
               precision    recall  f1-score   support

      glioma       0.99      0.97      0.98       163
  meningioma       0.95      0.95      0.95       165
     notumor       0.97      0.99      0.98       200
   pituitary       0.99      0.98      0.99       176

    accuracy                           0.98       704
   macro avg       0.98      0.97      0.98       704
weighted avg       0.98      0.98      0.98       704



<Figure size 1000x800 with 0 Axes>