# Pneumonia Classification using Chest X-Ray Images

This notebook implements:
- Custom CNN for pneumonia detection
- Transfer Learning with ResNet50
- Explainable AI (GradCAM) for model interpretation
- Model comparison and visualization

In [None]:
#import necessary libraries
import os
import glob
import numpy as np
import pandas as pd
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torchvision.models as models
import torchvision.transforms as transforms
from torch.utils.data import Dataset, DataLoader
from torch.optim.lr_scheduler import ReduceLROnPlateau
from torchmetrics import Accuracy, Precision, Recall, AUROC
from torch.utils.tensorboard import SummaryWriter
from sklearn.model_selection import train_test_split, KFold
from PIL import Image
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import confusion_matrix, roc_curve, auc, classification_report
import random
import multiprocessing
import cv2

## Utility Functions

Setting up utility functions for reproducibility and device selection

In [None]:
#set seed for reproducibility
def set_seed(seed=42):
    """Set seed for reproducibility"""
    random.seed(seed)
    np.random.seed(seed)
    torch.manual_seed(seed)
    if torch.cuda.is_available():
        torch.cuda.manual_seed(seed)
        torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

#define device function
def get_device():
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print('Device:', device)

    if device.type == 'cuda':
        print('Number of GPUs:', torch.cuda.device_count())
        print('GPU Name:', torch.cuda.get_device_name(0))
    else:
        print('No GPU available, using CPU.')

    print('PyTorch Version:', torch.__version__)
    return device

## Configuration Settings

Define constants and settings for the model

In [None]:
#define constants
BATCH_SIZE = 32
IMAGE_SIZE = (150, 150)
EPOCHS = 20
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-5
NUM_WORKERS = 0  
EARLY_STOPPING_PATIENCE = 5
CHECKPOINT_DIR = "checkpoints"

#set up data paths
DATA_PATH = r'E:\neat\data set kaggle pnemonia\chest_xray'

## Dataset Implementation

Custom dataset class for chest X-ray images

In [None]:
#create datasets and dataloaders
class ChestXRayDataset(Dataset):
    def __init__(self, filenames, transform=None):
        self.filenames = filenames
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        file_path = self.filenames[idx]
        label = 1 if "PNEUMONIA" in file_path else 0

        #load and process image
        img = Image.open(file_path).convert('RGB')

        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(label, dtype=torch.float32)

## Model Architecture Components

Define building blocks for the CNN model

In [None]:
#define the CNN building blocks
def conv_block(in_channels, out_channels):
    return nn.Sequential(
        nn.Conv2d(in_channels=in_channels, out_channels=out_channels, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_channels), # Apply BatchNorm before activation
        nn.ReLU(inplace=True),
        nn.Conv2d(in_channels=out_channels, out_channels=out_channels, kernel_size=3, padding=1),
        nn.BatchNorm2d(out_channels),
        nn.ReLU(inplace=True),
        nn.MaxPool2d(kernel_size=2, stride=2)
    )

#fixed dense block function
def dense_block(in_features, out_features, dropout_rate):
    return nn.Sequential(
        nn.Linear(in_features, out_features),
        nn.ReLU(inplace=True),
        nn.Dropout(dropout_rate)
    )

## Custom CNN Model

Implementation of the custom CNN architecture for pneumonia classification

In [None]:
#custom CNN model
class PneumoniaCNN(nn.Module):
    def __init__(self, image_size=(150, 150), num_classes=1):
        super(PneumoniaCNN, self).__init__()

        self.conv1 = conv_block(3, 32)
        self.conv2 = conv_block(32, 64)
        self.conv3 = conv_block(64, 128)
        self.conv4 = conv_block(128, 256)
        self.conv5 = conv_block(256, 512)

        self.dropout1 = nn.Dropout(0.3)
        self.dropout2 = nn.Dropout(0.5)

        self.flatten = nn.Flatten()

        #compute the flattened size
        flat_size = self._get_flattened_size((3, image_size[0], image_size[1]))

        self.dense1 = dense_block(flat_size, 512, 0.5)
        self.dense2 = dense_block(512, 128, 0.3)
        self.fc_out = nn.Linear(128, num_classes)
        self.sigmoid = nn.Sigmoid()

    def _get_flattened_size(self, shape):
        with torch.no_grad():
            dummy_input = torch.zeros(1, *shape)
            dummy_output = self.conv5(self.dropout1(self.conv4(self.conv3(self.conv2(self.conv1(dummy_input))))))
            return dummy_output.view(1, -1).size(1)

    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = self.conv3(x)
        x = self.conv4(x)
        x = self.dropout1(x)
        x = self.conv5(x)
        x = self.dropout2(x)
        x = self.flatten(x)
        x = self.dense1(x)
        x = self.dense2(x)
        x = self.fc_out(x)
        x = self.sigmoid(x)
        return x

## Loss Function and Early Stopping

Implementing Focal Loss for class imbalance and Early Stopping for training

In [None]:
#define Focal Loss for better handling of class imbalance
class FocalLoss(nn.Module):
    def __init__(self, alpha=0.25, gamma=2.0, reduction='mean'):
        super(FocalLoss, self).__init__()
        self.alpha = alpha
        self.gamma = gamma
        self.reduction = reduction
        self.bce = nn.BCELoss(reduction='none')

    def forward(self, inputs, targets):
        BCE_loss = self.bce(inputs, targets)
        pt = torch.exp(-BCE_loss)
        F_loss = self.alpha * (1-pt)**self.gamma * BCE_loss

        if self.reduction == 'mean':
            return torch.mean(F_loss)
        elif self.reduction == 'sum':
            return torch.sum(F_loss)
        else:
            return F_loss

#early stopping class
class EarlyStopping:
    def __init__(self, patience=5, verbose=True, delta=0, path='checkpoint.pt'):
        self.patience = patience
        self.verbose = verbose
        self.delta = delta
        self.path = path
        self.best_score = None
        self.early_stop = False
        self.val_loss_min = float('inf')
        self.counter = 0

    def __call__(self, val_loss, model):
        score = -val_loss

        if self.best_score is None:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
        elif score < self.best_score + self.delta:
            self.counter += 1
            if self.verbose:
                print(f'EarlyStopping counter: {self.counter} out of {self.patience}')
            if self.counter >= self.patience:
                self.early_stop = True
        else:
            self.best_score = score
            self.save_checkpoint(val_loss, model)
            self.counter = 0

    def save_checkpoint(self, val_loss, model):
        if self.verbose:
            print(f'Validation loss decreased ({self.val_loss_min:.6f} --> {val_loss:.6f}). Saving model...')
        torch.save(model.state_dict(), self.path)
        self.val_loss_min = val_loss

## Training and Evaluation Functions

Functions for training epochs and model evaluation

In [None]:
#training and evaluation functions
def train_epoch(model, dataloader, optimizer, criterion, class_weights, device, grad_clip=1.0):
    model.train()
    running_loss = 0.0

    #initialize metrics
    accuracy_metric = Accuracy(task="binary").to(device)
    precision_metric = Precision(task="binary").to(device)
    recall_metric = Recall(task="binary").to(device)
    auroc_metric = AUROC(task="binary").to(device)

    all_preds = []
    all_labels = []

    for images, labels in dataloader:
        images = images.to(device)
        labels = labels.to(device)

        #zero the parameter gradients
        optimizer.zero_grad()

        #forward pass
        outputs = model(images).squeeze()

        #apply class weights if using BCELoss
        if isinstance(criterion, nn.BCELoss):
            batch_weights = torch.where(labels == 1, class_weights[1], class_weights[0])
            loss = criterion(outputs, labels)
            loss = (loss * batch_weights).mean()
        else:
            #for focalLoss we don't need to apply weights separately
            loss = criterion(outputs, labels)

        #backward pass
        loss.backward()

        #gradient clipping to prevent exploding gradients
        nn.utils.clip_grad_norm_(model.parameters(), grad_clip)

        #optimizer step
        optimizer.step()

        #statistics
        running_loss += loss.item()

        #convert outputs to predictions
        preds = (outputs > 0.5).float()

        #update metrics
        accuracy_metric.update(preds, labels)
        precision_metric.update(preds, labels)
        recall_metric.update(preds, labels)

        #collect predictions and labels for AUROC
        all_preds.append(outputs.detach())
        all_labels.append(labels)

    #compute all metrics
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)
    auroc_metric.update(all_preds, all_labels)

    metrics = {
        'loss': running_loss / len(dataloader),
        'accuracy': accuracy_metric.compute().item(),
        'precision': precision_metric.compute().item(),
        'recall': recall_metric.compute().item(),
        'auroc': auroc_metric.compute().item()
    }

    #reset metrics
    accuracy_metric.reset()
    precision_metric.reset()
    recall_metric.reset()
    auroc_metric.reset()

    return metrics

def evaluate(model, dataloader, criterion, device):
    model.eval()
    running_loss = 0.0

    #initialize metrics
    accuracy_metric = Accuracy(task="binary").to(device)
    precision_metric = Precision(task="binary").to(device)
    recall_metric = Recall(task="binary").to(device)
    auroc_metric = AUROC(task="binary").to(device)

    all_preds = []
    all_labels = []
    all_outputs = []

    with torch.no_grad():
        for images, labels in dataloader:
            images = images.to(device)
            labels = labels.to(device)

            #forward pass
            outputs = model(images).squeeze()

            #loss
            loss = criterion(outputs, labels)
            running_loss += loss.item()

            #convert outputs to predictions
            preds = (outputs > 0.5).float()

            #update metrics
            accuracy_metric.update(preds, labels)
            precision_metric.update(preds, labels)
            recall_metric.update(preds, labels)

            #collect predictions and labels for AUROC
            all_outputs.append(outputs.detach().cpu())
            all_preds.append(preds.cpu())
            all_labels.append(labels.cpu())

    #compute all metrics
    all_outputs = torch.cat(all_outputs)
    all_preds = torch.cat(all_preds)
    all_labels = torch.cat(all_labels)

    auroc_metric.update(all_outputs, all_labels)

    metrics = {
        'loss': running_loss / len(dataloader),
        'accuracy': accuracy_metric.compute().item(),
        'precision': precision_metric.compute().item(),
        'recall': recall_metric.compute().item(),
        'auroc': auroc_metric.compute().item(),
        'predictions': all_preds,
        'labels': all_labels,
        'outputs': all_outputs
    }

    #reset metrics
    accuracy_metric.reset()
    precision_metric.reset()
    recall_metric.reset()
    auroc_metric.reset()

    return metrics

## GradCAM Implementation

Gradient-weighted Class Activation Mapping for model interpretability

In [None]:
#simple GradCAM implementation
def simple_gradcam(model, input_tensor, target_layer, device):
    """
    Simple GradCAM implementation without external libraries
    Args:
        model: PyTorch model
        input_tensor: Input tensor (already properly shaped)
        target_layer: Layer to visualize
        device: Device to run on
    """
    #ensure input is on the correct device
    input_tensor = input_tensor.to(device)
    input_tensor.requires_grad_(True)

    #set model to eval
    model.eval()

    #register hooks to get gradients and activations
    activations = []
    gradients = []

    def forward_hook(module, input, output):
        activations.append(output.detach())

    def backward_hook(module, grad_input, grad_output):
        gradients.append(grad_output[0].detach())

    handle_forward = target_layer.register_forward_hook(forward_hook)
    handle_backward = target_layer.register_backward_hook(backward_hook)

    #forward pass
    model.zero_grad()
    output = model(input_tensor)

    #get target 
    target = torch.ones(output.shape).to(device)

    
    output.backward(gradient=target)

    handle_forward.remove()
    handle_backward.remove()

    #check if we have gradients and activations
    if not gradients or not activations:
        print("No gradients or activations captured")
        return np.zeros((input_tensor.shape[2], input_tensor.shape[3]))

    #calculate weights
    pooled_gradients = torch.mean(gradients[0], dim=[0, 2, 3])

    #weight the channels by corresponding gradients
    activation = activations[0]

    #create a weighted combination of the activation maps
    weighted_activation = torch.zeros_like(activation)
    for i in range(pooled_gradients.shape[0]):
        weighted_activation[:, i, :, :] = activation[:, i, :, :] * pooled_gradients[i]

    #average the channels of the activations
    heatmap = torch.mean(weighted_activation, dim=1).squeeze()

    #ReLU on top of the heatmap
    heatmap = F.relu(heatmap)

    #Normalize
    heatmap = heatmap / (torch.max(heatmap) + 1e-10)

    return heatmap.cpu().numpy()

def visualize_simple_gradcam(model, dataloader, device, save_path, num_images=5):
    """save GradCAM visualizations using simple implementation"""
    model.eval()
    images_processed = 0
    os.makedirs(save_path, exist_ok=True)

    #determine target layer based on model type
    if hasattr(model, 'conv5'):
        target_layer = model.conv5[0]  
    else:  
        target_layer = model.layer4[-1].conv2  

    for inputs, labels in dataloader:
        batch_size = inputs.size(0)

        for j in range(batch_size):
            if images_processed >= num_images:
                return

            #process single image
            input_tensor = inputs[j:j+1].to(device) 

            try:
                #get prediction
                with torch.no_grad():
                    output = model(input_tensor)
                    pred = (output > 0.5).float().item()

                #get heatmap
                heatmap = simple_gradcam(model, input_tensor, target_layer, device)

                if heatmap.shape != (input_tensor.shape[2], input_tensor.shape[3]):
                    heatmap = cv2.resize(heatmap, (input_tensor.shape[3], input_tensor.shape[2]))

                #get original image for visualization
                img = inputs[j].permute(1, 2, 0).numpy()
                #denormalize
                mean = np.array([0.485, 0.456, 0.406])
                std = np.array([0.229, 0.224, 0.225])
                img = std * img + mean
                img = np.clip(img, 0, 1)

                #create visualization
                plt.figure(figsize=(12, 4))

                plt.subplot(1, 3, 1)
                plt.imshow(img)
                plt.title('Original Image')
                plt.axis('off')

                plt.subplot(1, 3, 2)
                plt.imshow(heatmap, cmap='jet')
                plt.title('GradCAM Heatmap')
                plt.axis('off')

                plt.subplot(1, 3, 3)
                plt.imshow(img)
                plt.imshow(heatmap, alpha=0.5, cmap='jet')
                plt.title(f'Overlay (Pred: {pred:.0f}, True: {labels[j].item():.0f})')
                plt.axis('off')

                plt.tight_layout()
                plt.savefig(os.path.join(save_path, f'gradcam_{images_processed}.png'))
                plt.close()

                images_processed += 1

            except Exception as e:
                print(f"Error processing image {j}: {e}")
                continue

## Visualization Functions

Functions for plotting learning curves and model comparisons

In [None]:
#visualization functions
def plot_learning_curves(train_metrics, val_metrics, save_path):
    """Plot learning curves from training metrics"""
    plt.figure(figsize=(20, 15))

    metrics = ['loss', 'accuracy', 'precision', 'recall', 'auroc']
    titles = ['Loss', 'Accuracy', 'Precision', 'Recall', 'AUROC']

    for i, (metric, title) in enumerate(zip(metrics, titles)):
        plt.subplot(3, 2, i+1)
        plt.plot(train_metrics[metric], label=f'Train {title}')
        plt.plot(val_metrics[metric], label=f'Validation {title}')
        plt.title(f'Training and Validation {title}')
        plt.xlabel('Epoch')
        plt.ylabel(title)
        plt.legend()
        plt.grid(True)

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

def compare_models(custom_metrics, transfer_metrics, save_path):
    """Create comparison charts for model performance"""
    metrics = ['accuracy', 'precision', 'recall', 'auroc']

    #bar chart comparison
    plt.figure(figsize=(12, 6))
    x = np.arange(len(metrics))
    width = 0.35

    custom_values = [custom_metrics[m] for m in metrics]
    transfer_values = [transfer_metrics[m] for m in metrics]

    plt.bar(x - width/2, custom_values, width, label='Custom CNN')
    plt.bar(x + width/2, transfer_values, width, label='Transfer Learning (ResNet50)')

    plt.xlabel('Metrics')
    plt.ylabel('Score')
    plt.title('Model Performance Comparison')
    plt.xticks(x, metrics)
    plt.legend()
    plt.ylim(0, 1.0)

    #add value labels
    for i, v in enumerate(custom_values):
        plt.text(i - width/2, v + 0.02, f'{v:.3f}', ha='center')

    for i, v in enumerate(transfer_values):
        plt.text(i + width/2, v + 0.02, f'{v:.3f}', ha='center')

    plt.tight_layout()
    plt.savefig(save_path)
    plt.close()

## Main Training and Evaluation Code

The main function for training and evaluating models

In [None]:
def main():
    device = get_device()

    set_seed()

    #create checkpoint directory
    os.makedirs(CHECKPOINT_DIR, exist_ok=True)

    #load data paths
    train_filenames = glob.glob(os.path.join(DATA_PATH, 'train/*/*'))
    val_filenames = glob.glob(os.path.join(DATA_PATH, 'val/*/*'))
    test_filenames = glob.glob(os.path.join(DATA_PATH, 'test/*/*'))

    #combine train and val for better splitting
    all_train_filenames = train_filenames + val_filenames
    train_filenames, val_filenames = train_test_split(
        all_train_filenames,
        test_size=0.2,
        random_state=42,
        stratify=[1 if "PNEUMONIA" in f else 0 for f in all_train_filenames]
    )

    #count class distribution
    COUNT_NORMAL_TRAIN = len([f for f in train_filenames if "NORMAL" in f])
    COUNT_PNEUMONIA_TRAIN = len([f for f in train_filenames if "PNEUMONIA" in f])
    TRAIN_IMG_COUNT = len(train_filenames)
    VAL_IMG_COUNT = len(val_filenames)
    TEST_IMG_COUNT = len(test_filenames)

    print(f"Training images: {TRAIN_IMG_COUNT} (Normal: {COUNT_NORMAL_TRAIN}, Pneumonia: {COUNT_PNEUMONIA_TRAIN})")
    print(f"Validation images: {VAL_IMG_COUNT}")
    print(f"Test images: {TEST_IMG_COUNT}")

    #calculate class weights for handling imbalance
    weight_for_0 = (1 / COUNT_NORMAL_TRAIN) * (TRAIN_IMG_COUNT) / 2.0
    weight_for_1 = (1 / COUNT_PNEUMONIA_TRAIN) * (TRAIN_IMG_COUNT) / 2.0
    class_weights = torch.tensor([weight_for_0, weight_for_1], dtype=torch.float32).to(device)
    print(f'Weight for class 0 (Normal): {weight_for_0:.2f}')
    print(f'Weight for class 1 (Pneumonia): {weight_for_1:.2f}')

    #data augmentation
    train_transform = transforms.Compose([
        transforms.Resize(IMAGE_SIZE),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(10),
        transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    val_transform = transforms.Compose([
        transforms.Resize(IMAGE_SIZE),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])

    #create datasets
    train_ds = ChestXRayDataset(train_filenames, transform=train_transform)
    val_ds = ChestXRayDataset(val_filenames, transform=val_transform)
    test_ds = ChestXRayDataset(test_filenames, transform=val_transform)

    #create dataloaders
    train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
    val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
    test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

    #initialize the model, loss, and optimizer
    model = PneumoniaCNN(image_size=IMAGE_SIZE).to(device)
    criterion = FocalLoss(alpha=0.25, gamma=2.0)  # Use Focal Loss
    optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE, weight_decay=WEIGHT_DECAY)
    scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.1, patience=3, verbose=True)

    #set up TensorBoard for logging
    writer = SummaryWriter(log_dir='logs')

    #get a batch to verify shapes
    for images, labels in train_loader:
        print(f"Batch image shape: {images.shape}")
        print(f"Batch label shape: {labels.shape}")
        break

    #initialize tracking metrics
    train_history = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'auroc': []}
    val_history = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'auroc': []}

    #main training loop with Early Stopping
    early_stopping = EarlyStopping(patience=EARLY_STOPPING_PATIENCE, verbose=True, path=os.path.join(CHECKPOINT_DIR, 'best_model.pt'))

    print(f"Starting training for {EPOCHS} epochs...")
    for epoch in range(EPOCHS):
        #train
        train_metrics = train_epoch(model, train_loader, optimizer, criterion, class_weights, device)

        #evaluate
        val_metrics = evaluate(model, val_loader, criterion, device)

        #store metrics history
        for metric in train_metrics:
            if metric in train_history:
                train_history[metric].append(train_metrics[metric])
                val_history[metric].append(val_metrics[metric])

        scheduler.step(val_metrics['loss'])

        #print statistics
        print(f"Epoch [{epoch+1}/{EPOCHS}]")
        print(f"  Train: Loss={train_metrics['loss']:.4f}, Acc={train_metrics['accuracy']:.4f}, Prec={train_metrics['precision']:.4f}, Rec={train_metrics['recall']:.4f}, AUROC={train_metrics['auroc']:.4f}")
        print(f"  Val:   Loss={val_metrics['loss']:.4f}, Acc={val_metrics['accuracy']:.4f}, Prec={val_metrics['precision']:.4f}, Rec={val_metrics['recall']:.4f}, AUROC={val_metrics['auroc']:.4f}")

        #log to TensorBoard
        writer.add_scalars('Loss', {'train': train_metrics['loss'], 'val': val_metrics['loss']}, epoch)
        writer.add_scalars('Accuracy', {'train': train_metrics['accuracy'], 'val': val_metrics['accuracy']}, epoch)
        writer.add_scalars('Precision', {'train': train_metrics['precision'], 'val': val_metrics['precision']}, epoch)
        writer.add_scalars('Recall', {'train': train_metrics['recall'], 'val': val_metrics['recall']}, epoch)

        #early stopping
        early_stopping(val_metrics['loss'], model)
        if early_stopping.early_stop:
            print("Early stopping triggered!")
            break

    #plot learning curves
    plot_learning_curves(train_history, val_history, os.path.join(CHECKPOINT_DIR, 'learning_curves.png'))

    #load the best model and evaluate on test set
    print("Loading best model and evaluating on test set...")
    model.load_state_dict(torch.load(os.path.join(CHECKPOINT_DIR, 'best_model.pt')))
    test_metrics = evaluate(model, test_loader, criterion, device)

    print("\nCustom CNN Test Results:")
    print(f"  Loss: {test_metrics['loss']:.4f}")
    print(f"  Accuracy: {test_metrics['accuracy']:.4f}")
    print(f"  Precision: {test_metrics['precision']:.4f}")
    print(f"  Recall: {test_metrics['recall']:.4f}")
    print(f"  AUROC: {test_metrics['auroc']:.4f}")

    #generate classification report for custom model
    custom_report = classification_report(
        test_metrics['labels'].numpy(),
        test_metrics['predictions'].numpy(),
        target_names=['Normal', 'Pneumonia'],
        output_dict=True
    )

    #print detailed class-wise results for Custom CNN
    print("\nCustom CNN Class-wise Results:")
    print(f"Normal class:")
    print(f"  Precision: {custom_report['Normal']['precision']:.4f}")
    print(f"  Recall: {custom_report['Normal']['recall']:.4f}")
    print(f"  F1-Score: {custom_report['Normal']['f1-score']:.4f}")
    print(f"  Support: {int(custom_report['Normal']['support'])}")

    print(f"Pneumonia class:")
    print(f"  Precision: {custom_report['Pneumonia']['precision']:.4f}")
    print(f"  Recall: {custom_report['Pneumonia']['recall']:.4f}")
    print(f"  F1-Score: {custom_report['Pneumonia']['f1-score']:.4f}")
    print(f"  Support: {int(custom_report['Pneumonia']['support'])}")

    #visualizations and Analysis for Custom CNN
    print("\nGenerating visualizations...")
    
    #confusion Matrix
    print("Creating confusion matrix...")
    cm = confusion_matrix(test_metrics['labels'].numpy(), test_metrics['predictions'].numpy())
    plt.figure(figsize=(8, 6))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=['Normal', 'Pneumonia'], yticklabels=['Normal', 'Pneumonia'])
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix - Custom CNN')
    plt.savefig(os.path.join(CHECKPOINT_DIR, 'custom_confusion_matrix.png'))
    plt.close()

    #ROC Curve
    print("Creating ROC curve...")
    fpr, tpr, _ = roc_curve(test_metrics['labels'].numpy(), test_metrics['outputs'].numpy())
    roc_auc = auc(fpr, tpr)
    plt.figure(figsize=(8, 6))
    plt.plot(fpr, tpr, color='darkorange', lw=2, label=f'ROC curve (area = {roc_auc:.2f})')
    plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curve - Custom CNN')
    plt.legend(loc="lower right")
    plt.savefig(os.path.join(CHECKPOINT_DIR, 'custom_roc_curve.png'))
    plt.close()

    #GradCAM visualizations for model interpretability
    print("Generating GradCAM visualizations...")
    gradcam_dir = os.path.join(CHECKPOINT_DIR, 'gradcam_custom')
    os.makedirs(gradcam_dir, exist_ok=True)
    visualize_simple_gradcam(model, test_loader, device, gradcam_dir, num_images=5)

    #store metrics for comparison
    custom_results = {
        'accuracy': test_metrics['accuracy'],
        'precision': test_metrics['precision'],
        'recall': test_metrics['recall'],
        'auroc': test_metrics['auroc']
    }
    
    print(f"\nCustom CNN evaluation complete. Results saved to: {CHECKPOINT_DIR}")
    
    #return important variables that might be needed for further analysis
    return model, test_loader, device, custom_results, custom_report, test_metrics, class_weights

## Run the Training Pipeline

Execute the main function with proper multiprocessing support

In [28]:
if __name__ == '__main__':
    #this is crucial for windows multiprocessing
    multiprocessing.freeze_support()
    
    #run main function and capture returned variables
    model, test_loader, device, custom_results, custom_report, test_metrics, class_weights = main()
    
    print("\n\n" + "="*50)
    print("Starting transfer learning model training and evaluation...")
    
    print("Custom CNN fpr and tpr are available for comparison with transfer learning results.")
    
    print("\nExecution complete!")

Device: cuda
Number of GPUs: 1
GPU Name: NVIDIA GeForce RTX 3050 Laptop GPU
PyTorch Version: 2.5.1
Training images: 4185 (Normal: 1079, Pneumonia: 3106)
Validation images: 1047
Test images: 624
Weight for class 0 (Normal): 1.94
Weight for class 1 (Pneumonia): 0.67




Batch image shape: torch.Size([32, 3, 150, 150])
Batch label shape: torch.Size([32])
Starting training for 20 epochs...
Epoch [1/20]
  Train: Loss=0.0231, Acc=0.8370, Prec=0.8734, Rec=0.9127, AUROC=0.8917
  Val:   Loss=0.0119, Acc=0.9398, Prec=0.9648, Rec=0.9537, AUROC=0.9815
Validation loss decreased (inf --> 0.011896). Saving model...
Epoch [2/20]
  Train: Loss=0.0140, Acc=0.9142, Prec=0.9415, Rec=0.9430, AUROC=0.9642
  Val:   Loss=0.0189, Acc=0.8730, Prec=0.9969, Rec=0.8314, AUROC=0.9908
EarlyStopping counter: 1 out of 5
Epoch [3/20]
  Train: Loss=0.0093, Acc=0.9470, Prec=0.9661, Rec=0.9623, AUROC=0.9843
  Val:   Loss=0.0065, Acc=0.9561, Prec=0.9574, Rec=0.9846, AUROC=0.9936
Validation loss decreased (0.011896 --> 0.006508). Saving model...
Epoch [4/20]
  Train: Loss=0.0088, Acc=0.9524, Prec=0.9666, Rec=0.9694, AUROC=0.9859
  Val:   Loss=0.0076, Acc=0.9589, Prec=0.9973, Rec=0.9472, AUROC=0.9962
EarlyStopping counter: 1 out of 5
Epoch [5/20]
  Train: Loss=0.0071, Acc=0.9615, Prec=0.9

  model.load_state_dict(torch.load(os.path.join(CHECKPOINT_DIR, 'best_model.pt')))



Custom CNN Test Results:
  Loss: 0.0537
  Accuracy: 0.7804
  Precision: 0.7410
  Recall: 0.9974
  AUROC: 0.9480

Custom CNN Class-wise Results:
Normal class:
  Precision: 0.9899
  Recall: 0.4188
  F1-Score: 0.5886
  Support: 234
Pneumonia class:
  Precision: 0.7410
  Recall: 0.9974
  F1-Score: 0.8503
  Support: 390

Generating visualizations...
Creating confusion matrix...
Creating ROC curve...
Generating GradCAM visualizations...


  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)



Custom CNN evaluation complete. Results saved to: checkpoints


Starting transfer learning model training and evaluation...
Custom CNN fpr and tpr are available for comparison with transfer learning results.

Execution complete!


## Transfer Learning with ResNet50

Training a model using transfer learning with ResNet50

In [None]:
#define device before using it
import torch
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")


#recreate data loaders
DATA_PATH = r'E:\neat\data set kaggle pnemonia\chest_xray' 
BATCH_SIZE = 32
IMAGE_SIZE = (150, 150)
EPOCHS = 20
LEARNING_RATE = 1e-4
WEIGHT_DECAY = 1e-5
NUM_WORKERS = 0
EARLY_STOPPING_PATIENCE = 5
CHECKPOINT_DIR = "checkpoints"
os.makedirs(CHECKPOINT_DIR, exist_ok=True)

train_filenames = glob.glob(os.path.join(DATA_PATH, 'train/*/*'))
val_filenames = glob.glob(os.path.join(DATA_PATH, 'val/*/*'))
test_filenames = glob.glob(os.path.join(DATA_PATH, 'test/*/*'))

#count class distribution for class weights
COUNT_NORMAL_TRAIN = len([f for f in train_filenames if "NORMAL" in f])
COUNT_PNEUMONIA_TRAIN = len([f for f in train_filenames if "PNEUMONIA" in f])
TRAIN_IMG_COUNT = len(train_filenames)

#calculate class weights
weight_for_0 = (1 / COUNT_NORMAL_TRAIN) * (TRAIN_IMG_COUNT) / 2.0
weight_for_1 = (1 / COUNT_PNEUMONIA_TRAIN) * (TRAIN_IMG_COUNT) / 2.0
class_weights = torch.tensor([weight_for_0, weight_for_1], dtype=torch.float32).to(device)

#create transforms
val_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

train_transform = transforms.Compose([
    transforms.Resize(IMAGE_SIZE),
    transforms.RandomHorizontalFlip(),
    transforms.RandomRotation(10),
    transforms.RandomAffine(degrees=0, translate=(0.05, 0.05)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
])

#create datasets
class ChestXRayDataset(Dataset):
    def __init__(self, filenames, transform=None):
        self.filenames = filenames
        self.transform = transform

    def __len__(self):
        return len(self.filenames)

    def __getitem__(self, idx):
        file_path = self.filenames[idx]
        label = 1 if "PNEUMONIA" in file_path else 0

        #load and process image
        img = Image.open(file_path).convert('RGB')

        if self.transform:
            img = self.transform(img)

        return img, torch.tensor(label, dtype=torch.float32)

#create dataset instances
train_ds = ChestXRayDataset(train_filenames, transform=train_transform)
val_ds = ChestXRayDataset(val_filenames, transform=val_transform)
test_ds = ChestXRayDataset(test_filenames, transform=val_transform)

#create dataloaders
train_loader = DataLoader(train_ds, batch_size=BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
val_loader = DataLoader(val_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)
test_loader = DataLoader(test_ds, batch_size=BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)

#transfer learning model
print("\nTraining Transfer Learning Model...")

#initialize model
pretrained_model = models.resnet50(pretrained=True)
num_ftrs = pretrained_model.fc.in_features

#modify the final layer for binary classification
pretrained_model.fc = nn.Sequential(
    nn.Linear(num_ftrs, 512),
    nn.BatchNorm1d(512),
    nn.ReLU(inplace=True),
    nn.Dropout(0.4),
    nn.Linear(512, 128),
    nn.BatchNorm1d(128),
    nn.ReLU(inplace=True),
    nn.Dropout(0.2),
    nn.Linear(128, 1),
    nn.Sigmoid()
)

pretrained_model = pretrained_model.to(device)

#freeze early layers
for param in list(pretrained_model.parameters())[:-36]:  # Freeze all except the last few layers
    param.requires_grad = False

#optimizer and loss for transfer learning
transfer_criterion = FocalLoss(alpha=0.25, gamma=2.0)
transfer_optimizer = optim.Adam(
    filter(lambda p: p.requires_grad, pretrained_model.parameters()),
    lr=LEARNING_RATE,
    weight_decay=WEIGHT_DECAY
)
transfer_scheduler = ReduceLROnPlateau(
    transfer_optimizer,
    mode='min',
    factor=0.1,
    patience=3,
    verbose=True
)

#early stopping for transfer learning
transfer_early_stopping = EarlyStopping(
    patience=EARLY_STOPPING_PATIENCE,
    verbose=True,
    path=os.path.join(CHECKPOINT_DIR, 'best_transfer_model.pt')
)

#initialize history for transfer learning
transfer_train_history = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'auroc': []}
transfer_val_history = {'loss': [], 'accuracy': [], 'precision': [], 'recall': [], 'auroc': []}

#train the transfer learning model
for epoch in range(EPOCHS):
    #train
    train_metrics = train_epoch(
        pretrained_model,
        train_loader,
        transfer_optimizer,
        transfer_criterion,
        class_weights,
        device
    )

    #evaluate
    val_metrics = evaluate(pretrained_model, val_loader, transfer_criterion, device)

    #store metrics history
    for metric in train_metrics:
        if metric in transfer_train_history:
            transfer_train_history[metric].append(train_metrics[metric])
            transfer_val_history[metric].append(val_metrics[metric])

    #scheduler step
    transfer_scheduler.step(val_metrics['loss'])

    #print statistics
    print(f"Epoch [{epoch+1}/{EPOCHS}]")
    print(f"  Train: Loss={train_metrics['loss']:.4f}, Acc={train_metrics['accuracy']:.4f}, Prec={train_metrics['precision']:.4f}, Rec={train_metrics['recall']:.4f}, AUROC={train_metrics['auroc']:.4f}")
    print(f"  Val:   Loss={val_metrics['loss']:.4f}, Acc={val_metrics['accuracy']:.4f}, Prec={val_metrics['precision']:.4f}, Rec={val_metrics['recall']:.4f}, AUROC={val_metrics['auroc']:.4f}")

    #early stopping
    transfer_early_stopping(val_metrics['loss'], pretrained_model)
    if transfer_early_stopping.early_stop:
        print("Early stopping triggered!")
        break

#plot learning curves for transfer learning
plot_learning_curves(
    transfer_train_history,
    transfer_val_history,
    os.path.join(CHECKPOINT_DIR, 'transfer_learning_curves.png')
)

#load best transfer model and evaluate
pretrained_model.load_state_dict(torch.load(os.path.join(CHECKPOINT_DIR, 'best_transfer_model.pt')))
transfer_test_metrics = evaluate(pretrained_model, test_loader, transfer_criterion, device)

print("Transfer Learning Test Results:")
print(f"  Loss: {transfer_test_metrics['loss']:.4f}")
print(f"  Accuracy: {transfer_test_metrics['accuracy']:.4f}")
print(f"  Precision: {transfer_test_metrics['precision']:.4f}")
print(f"  Recall: {transfer_test_metrics['recall']:.4f}")
print(f"  AUROC: {transfer_test_metrics['auroc']:.4f}")

#visualizations and Analysis for Transfer Learning
#Confusion Matrix
transfer_cm = confusion_matrix(
    transfer_test_metrics['labels'].numpy(),
    transfer_test_metrics['predictions'].numpy()
)
plt.figure(figsize=(8, 6))
sns.heatmap(
    transfer_cm,
    annot=True,
    fmt='d',
    cmap='Blues',
    xticklabels=['Normal', 'Pneumonia'],
    yticklabels=['Normal', 'Pneumonia']
)
plt.xlabel('Predicted')
plt.ylabel('True')
plt.title('Confusion Matrix - Transfer Learning (ResNet50)')
plt.savefig(os.path.join(CHECKPOINT_DIR, 'transfer_confusion_matrix.png'))
plt.close()

#ROC Curve
transfer_fpr, transfer_tpr, _ = roc_curve(
    transfer_test_metrics['labels'].numpy(),
    transfer_test_metrics['outputs'].numpy()
)
transfer_roc_auc = auc(transfer_fpr, transfer_tpr)
plt.figure(figsize=(8, 6))
plt.plot(
    transfer_fpr,
    transfer_tpr,
    color='darkorange',
    lw=2,
    label=f'ROC curve (area = {transfer_roc_auc:.2f})'
)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curve - Transfer Learning (ResNet50)')
plt.legend(loc="lower right")
plt.savefig(os.path.join(CHECKPOINT_DIR, 'transfer_roc_curve.png'))
plt.close()

#GradCAM Visualizations for Transfer Learning
transfer_gradcam_dir = os.path.join(CHECKPOINT_DIR, 'gradcam_transfer')
os.makedirs(transfer_gradcam_dir, exist_ok=True)
visualize_simple_gradcam(pretrained_model, test_loader, device, transfer_gradcam_dir, num_images=5)

#Classification Report
transfer_report = classification_report(
    transfer_test_metrics['labels'].numpy(),
    transfer_test_metrics['predictions'].numpy(),
    target_names=['Normal', 'Pneumonia'],
    output_dict=True
)

#store metrics for comparison
transfer_results = {
    'accuracy': transfer_test_metrics['accuracy'],
    'precision': transfer_test_metrics['precision'],
    'recall': transfer_test_metrics['recall'],
    'auroc': transfer_test_metrics['auroc']
}

Using device: cuda

Training Transfer Learning Model...




Epoch [1/20]
  Train: Loss=0.0141, Acc=0.9262, Prec=0.9597, Rec=0.9401, AUROC=0.9727
  Val:   Loss=0.0378, Acc=0.7500, Prec=0.6667, Rec=1.0000, AUROC=0.8750
Validation loss decreased (inf --> 0.037812). Saving model...
Epoch [2/20]
  Train: Loss=0.0096, Acc=0.9530, Prec=0.9659, Rec=0.9711, AUROC=0.9870
  Val:   Loss=0.0351, Acc=0.7500, Prec=0.6667, Rec=1.0000, AUROC=0.9531
Validation loss decreased (0.037812 --> 0.035098). Saving model...
Epoch [3/20]
  Train: Loss=0.0072, Acc=0.9628, Prec=0.9716, Rec=0.9786, AUROC=0.9910
  Val:   Loss=0.0456, Acc=0.6250, Prec=0.5714, Rec=1.0000, AUROC=1.0000
EarlyStopping counter: 1 out of 5
Epoch [4/20]
  Train: Loss=0.0064, Acc=0.9688, Prec=0.9779, Rec=0.9801, AUROC=0.9930
  Val:   Loss=0.0400, Acc=0.6875, Prec=0.6154, Rec=1.0000, AUROC=0.9688
EarlyStopping counter: 2 out of 5
Epoch [5/20]
  Train: Loss=0.0058, Acc=0.9726, Prec=0.9797, Rec=0.9835, AUROC=0.9943
  Val:   Loss=0.0259, Acc=0.8125, Prec=0.7273, Rec=1.0000, AUROC=1.0000
Validation loss de

  pretrained_model.load_state_dict(torch.load(os.path.join(CHECKPOINT_DIR, 'best_transfer_model.pt')))


Transfer Learning Test Results:
  Loss: 0.0295
  Accuracy: 0.8766
  Precision: 0.8410
  Recall: 0.9897
  AUROC: 0.9451


  self._maybe_warn_non_full_backward_hook(args, result, grad_fn)


## Model Comparison and Results

Comparing the performance of custom CNN and transfer learning models

In [None]:
#define constants and paths
CHECKPOINT_DIR = "checkpoints"
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

#load the custom model
print("Loading best custom model...")
custom_model = PneumoniaCNN().to(device)
try:
    custom_model.load_state_dict(torch.load(os.path.join(CHECKPOINT_DIR, 'best_model.pt')))
    print("Custom model loaded successfully.")
except Exception as e:
    print(f"Error loading custom model: {e}")
    print("Please run the main() function first to train the custom model.")
    import sys
    sys.exit()

try:
    pretrained_model
    transfer_test_metrics
    transfer_report
except NameError:
    print("Transfer learning model not found.")
    print("Please run the transfer learning cell first.")
    import sys
    sys.exit()

#evaluate the custom model on the test set
print("Evaluating custom model on test set...")
criterion = FocalLoss(alpha=0.25, gamma=2.0)
custom_test_metrics = evaluate(custom_model, test_loader, criterion, device)

#print overall custom CNN results
print("\nCustom CNN Test Results:")
print(f"  Loss: {custom_test_metrics['loss']:.4f}")
print(f"  Accuracy: {custom_test_metrics['accuracy']:.4f}")
print(f"  Precision: {custom_test_metrics['precision']:.4f}")
print(f"  Recall: {custom_test_metrics['recall']:.4f}")
print(f"  AUROC: {custom_test_metrics['auroc']:.4f}")

#generate classification report for custom model
custom_report = classification_report(
    custom_test_metrics['labels'].numpy(),
    custom_test_metrics['predictions'].numpy(),
    target_names=['Normal', 'Pneumonia'],
    output_dict=True
)

#print detailed class-wise results for Custom CNN
print("\nCustom CNN Class-wise Results:")
print(f"Normal class:")
print(f"  Precision: {custom_report['Normal']['precision']:.4f}")
print(f"  Recall: {custom_report['Normal']['recall']:.4f}")
print(f"  F1-Score: {custom_report['Normal']['f1-score']:.4f}")
print(f"  Support: {int(custom_report['Normal']['support'])}")

print(f"Pneumonia class:")
print(f"  Precision: {custom_report['Pneumonia']['precision']:.4f}")
print(f"  Recall: {custom_report['Pneumonia']['recall']:.4f}")
print(f"  F1-Score: {custom_report['Pneumonia']['f1-score']:.4f}")
print(f"  Support: {int(custom_report['Pneumonia']['support'])}")

#get ROC curve data for custom model
fpr, tpr, _ = roc_curve(
    custom_test_metrics['labels'].numpy(),
    custom_test_metrics['outputs'].numpy()
)
roc_auc = auc(fpr, tpr)

#store custom model results
custom_results = {
    'accuracy': custom_test_metrics['accuracy'],
    'precision': custom_test_metrics['precision'],
    'recall': custom_test_metrics['recall'],
    'auroc': custom_test_metrics['auroc']
}

#print transfer learning results for comparison
print("\nTransfer Learning Test Results:")
print(f"  Loss: {transfer_test_metrics['loss']:.4f}")
print(f"  Accuracy: {transfer_test_metrics['accuracy']:.4f}")
print(f"  Precision: {transfer_test_metrics['precision']:.4f}")
print(f"  Recall: {transfer_test_metrics['recall']:.4f}")
print(f"  AUROC: {transfer_test_metrics['auroc']:.4f}")

#print detailed class-wise results for Transfer Learning
print("\nTransfer Learning Class-wise Results:")
print(f"Normal class:")
print(f"  Precision: {transfer_report['Normal']['precision']:.4f}")
print(f"  Recall: {transfer_report['Normal']['recall']:.4f}")
print(f"  F1-Score: {transfer_report['Normal']['f1-score']:.4f}")
print(f"  Support: {int(transfer_report['Normal']['support'])}")

print(f"Pneumonia class:")
print(f"  Precision: {transfer_report['Pneumonia']['precision']:.4f}")
print(f"  Recall: {transfer_report['Pneumonia']['recall']:.4f}")
print(f"  F1-Score: {transfer_report['Pneumonia']['f1-score']:.4f}")
print(f"  Support: {int(transfer_report['Pneumonia']['support'])}")

#compare both models
print("\nGenerating comparison visualizations...")
compare_models(
    custom_results,
    transfer_results,
    os.path.join(CHECKPOINT_DIR, 'model_comparison.png')
)

#create a comparison table
comparison_df = pd.DataFrame({
    'Metric': ['Accuracy', 'Precision', 'Recall', 'AUROC'],
    'Custom CNN': [custom_results[m] for m in ['accuracy', 'precision', 'recall', 'auroc']],
    'Transfer Learning (ResNet50)': [transfer_results[m] for m in ['accuracy', 'precision', 'recall', 'auroc']]
})

print("\nModel Comparison:")
print(comparison_df)
comparison_df.to_csv(os.path.join(CHECKPOINT_DIR, 'model_comparison.csv'), index=False)

#combined ROC curves
plt.figure(figsize=(10, 8))
plt.plot(
    fpr,
    tpr,
    color='blue',
    lw=2,
    label=f'Custom CNN (AUC = {roc_auc:.3f})'
)
plt.plot(
    transfer_fpr,
    transfer_tpr,
    color='red',
    lw=2,
    label=f'Transfer Learning (AUC = {transfer_roc_auc:.3f})'
)
plt.plot([0, 1], [0, 1], color='navy', lw=2, linestyle='--')
plt.xlim([0.0, 1.0])
plt.ylim([0.0, 1.05])
plt.xlabel('False Positive Rate')
plt.ylabel('True Positive Rate')
plt.title('ROC Curves Comparison')
plt.legend(loc="lower right")
plt.grid(True, linestyle='--', alpha=0.7)
plt.savefig(os.path.join(CHECKPOINT_DIR, 'combined_roc_curves.png'))
plt.close()

#create a detailed class-wise performance comparison
class_comparison = {
    'Model': ['Custom CNN', 'Custom CNN', 'Transfer Learning', 'Transfer Learning'],
    'Class': ['Normal', 'Pneumonia', 'Normal', 'Pneumonia'],
    'Precision': [
        custom_report['Normal']['precision'],
        custom_report['Pneumonia']['precision'],
        transfer_report['Normal']['precision'],
        transfer_report['Pneumonia']['precision']
    ],
    'Recall': [
        custom_report['Normal']['recall'],
        custom_report['Pneumonia']['recall'],
        transfer_report['Normal']['recall'],
        transfer_report['Pneumonia']['recall']
    ],
    'F1-Score': [
        custom_report['Normal']['f1-score'],
        custom_report['Pneumonia']['f1-score'],
        transfer_report['Normal']['f1-score'],
        transfer_report['Pneumonia']['f1-score']
    ]
}

class_comparison_df = pd.DataFrame(class_comparison)
print("\nClass-wise Performance Comparison:")
print(class_comparison_df)
class_comparison_df.to_csv(os.path.join(CHECKPOINT_DIR, 'class_performance_comparison.csv'), index=False)

print("\nTraining complete! Models and visualizations saved to:", CHECKPOINT_DIR)

Using device: cuda
Loading best custom model...
Custom model loaded successfully.
Evaluating custom model on test set...


  custom_model.load_state_dict(torch.load(os.path.join(CHECKPOINT_DIR, 'best_model.pt')))



Custom CNN Test Results:
  Loss: 0.0537
  Accuracy: 0.7804
  Precision: 0.7410
  Recall: 0.9974
  AUROC: 0.9480

Custom CNN Class-wise Results:
Normal class:
  Precision: 0.9899
  Recall: 0.4188
  F1-Score: 0.5886
  Support: 234
Pneumonia class:
  Precision: 0.7410
  Recall: 0.9974
  F1-Score: 0.8503
  Support: 390

Transfer Learning Test Results:
  Loss: 0.0295
  Accuracy: 0.8766
  Precision: 0.8410
  Recall: 0.9897
  AUROC: 0.9451

Transfer Learning Class-wise Results:
Normal class:
  Precision: 0.9758
  Recall: 0.6880
  F1-Score: 0.8070
  Support: 234
Pneumonia class:
  Precision: 0.8410
  Recall: 0.9897
  F1-Score: 0.9093
  Support: 390

Generating comparison visualizations...

Model Comparison:
      Metric  Custom CNN  Transfer Learning (ResNet50)
0   Accuracy    0.780449                      0.876603
1  Precision    0.740952                      0.840959
2     Recall    0.997436                      0.989744
3      AUROC    0.948006                      0.945080

Class-wise Per

Due to the limits on google collab I had to run the models locally using my gpu in vscode so there was a problem in showing the plots and visualizations(explainable AI) so they are not shown in the outputs of the notebook but the insights have been mentioned in the document of the project.