In [1]:
import gdown
import zipfile

url = 'https://drive.google.com/uc?id=1h3WmTxwDAKy2eRw0SCXcNVFdreuquRQN'
output = 'new_data.zip'
gdown.download(url, output, quiet=False)

# Unzip the new_data.zip file
with zipfile.ZipFile("new_data.zip", "r") as zip_ref:
    zip_ref.extractall("dataset")

Downloading...
From (original): https://drive.google.com/uc?id=1h3WmTxwDAKy2eRw0SCXcNVFdreuquRQN
From (redirected): https://drive.google.com/uc?id=1h3WmTxwDAKy2eRw0SCXcNVFdreuquRQN&confirm=t&uuid=c468cc47-851d-484a-828c-ee87f8a2b79f
To: /content/new_data.zip
100%|██████████| 1.23G/1.23G [00:21<00:00, 58.4MB/s]


In [2]:
## Import libraries
import os

import cv2
import numpy as np
import pandas as pd
import torch
import torch.nn.functional as F
from PIL import Image
from skimage.measure import label, regionprops
from sklearn.metrics import (
    accuracy_score,
)
from torch import nn, optim
from imblearn.over_sampling import RandomOverSampler
from torch.utils.data import DataLoader, Subset
from torchvision import datasets, transforms, models
from tqdm import tqdm
from collections import Counter


def resize_image(image, target_size=(224, 224)):
    """Resizes an input image to the specified target size.

    Args:
        image (PIL Image or Torch Tensor): Input image to be resized.
        target_size (tuple): Desired output size (height, width).

    Returns:
        PIL Image: Resized image.

    """
    transform = transforms.Resize(target_size)
    if isinstance(image, torch.Tensor):
        image = transforms.ToPILImage()(image)
    return transform(image)


def histogram_equalization(image):
    """Performs histogram equalization on a grayscale image and returns a tensor.

    Args:
        image (PIL Image or Torch Tensor): Input grayscale image.

    Returns:
        torch.Tensor: Equalized image as a tensor.

    """
    # Convert the image to a NumPy array if it's a PIL Image
    if isinstance(image, Image.Image):
        image = np.array(image.convert("L"))  # Convert to grayscale
    elif isinstance(image, torch.Tensor):
        image = image.numpy()

    # Ensure the image is single-channel and np.uint8
    if image.ndim == 3 and image.shape[0] == 1:  # If shape is (1, H, W)
        image = image.squeeze(0)  # Remove the extra channel dimension
    elif image.ndim == 3 and image.shape[-1] == 1:  # If shape is (H, W, 1)
        image = image.squeeze(-1)

    if image.dtype != np.uint8:
        image = image.astype(np.uint8)

    equalized_image = cv2.equalizeHist(image)
    equalized_pil_image = Image.fromarray(equalized_image)
    # equalized_tensor = torch.from_numpy(equalized_image).float().unsqueeze(0)  # Add channel dimension for grayscale
    return equalized_pil_image


def gaussian_blur(image, kernel_size=(5, 5), sigma=0):
    """Applies Gaussian blur to a grayscale image.

    Args:
        image (PIL Image or Torch Tensor): Input grayscale image.
        kernel_size (tuple): Size of the Gaussian kernel.
        sigma (float): Standard deviation for Gaussian kernel.
                       If 0, it will be calculated based on the kernel size.

    Returns:
        torch.Tensor: Blurred image as a tensor.

    """  # noqa: D401
    # Convert to NumPy array if the image is a PIL Image
    if isinstance(image, Image.Image):
        image = np.array(image)
    elif isinstance(image, torch.Tensor):
        # Convert to NumPy if the input is a tensor
        image = image.numpy()

    # If the image has a channel dimension (1, H, W), squeeze it to (H, W)
    if image.ndim == 3 and image.shape[0] == 1:
        image = np.squeeze(image, axis=0)

    blurred_image = cv2.GaussianBlur(image, kernel_size, sigma)
    blurred_tensor = (
        torch.from_numpy(blurred_image).float().unsqueeze(0)
    )  # Add channel dimension for grayscale

    return blurred_tensor


def bilateral_filter(image, diameter=5, sigma_color=75, sigma_space=75):
    """Applies a bilateral filter to a grayscale image.

    Args:
        image (PIL Image, NumPy array, or Torch Tensor): Input grayscale image.
        diameter (int): Diameter of each pixel neighborhood used in the filter.
        sigma_color (float): Filter sigma in the color space.
        sigma_space (float): Filter sigma in the coordinate space.

    Returns:
        torch.Tensor: Filtered image as a tensor.

    """  # noqa: D401
    # Convert to NumPy array if the image is a PIL Image
    if isinstance(image, Image.Image):
        image = np.array(image)
    elif isinstance(image, torch.Tensor):
        image = image.numpy()

    # If the image has a channel dimension (1, H, W), squeeze it to (H, W)
    if image.ndim == 3 and image.shape[0] == 1:
        image = np.squeeze(image, axis=0)

    if image.dtype != np.uint8:
        image = (255 * (image - image.min()) / (image.max() - image.min())).astype(
            np.uint8
        )

    # Apply bilateral filter using OpenCV
    filtered_image = cv2.bilateralFilter(image, diameter, sigma_color, sigma_space)

    # Convert back to a PyTorch tensor
    filtered_tensor = (
        torch.from_numpy(filtered_image).float().unsqueeze(0)
    )  # Add back channel dimension for grayscale

    return filtered_tensor


def adaptive_masking(image, closing_kernel_size=(5, 5)):
    """Applies adaptive masking by removing the diaphragm from a grayscale image.

    Args:
        image (PIL Image, NumPy array, or Torch Tensor): Input grayscale image.
        closing_kernel_size (tuple): Size of the structuring element for morphological closing.

    Returns:
        torch.Tensor: Image with diaphragm removed as a tensor.

    """
    # Convert to NumPy array if the image is a PIL Image
    if isinstance(image, Image.Image):
        image = np.array(image.convert("L"))  # Ensure grayscale
    elif isinstance(image, torch.Tensor):
        # Convert to NumPy if the input is a tensor
        image = image.numpy()

    # If the image has a channel dimension (1, H, W), squeeze it to (H, W)
    if image.ndim == 3 and image.shape[0] == 1:
        image = np.squeeze(image, axis=0)

    # Step 1: Find max and min intensity values
    min_intensity = np.min(image)
    max_intensity = np.max(image)

    # Step 2: Calculate threshold using the formula: threshold = min + 0.9 * (max - min)
    threshold_value = min_intensity + 0.9 * (max_intensity - min_intensity)

    # Step 3: Apply binary thresholding
    _, binary_mask = cv2.threshold(image, threshold_value, 255, cv2.THRESH_BINARY)

    # Step 4: Label connected regions and keep only the largest region
    labeled_mask = label(binary_mask)
    regions = regionprops(labeled_mask)
    if not regions:
        print("No regions found in the binary mask.")
        return torch.from_numpy(image).float().unsqueeze(0)

    # Identify the largest connected region
    largest_region = max(regions, key=lambda r: r.area)

    # Create a mask with only the largest region filled
    diaphragm_mask = np.zeros_like(binary_mask, dtype=np.uint8)
    diaphragm_mask[labeled_mask == largest_region.label] = 255

    # Step 5: Fill any holes in the diaphragm region
    diaphragm_mask = cv2.morphologyEx(
        diaphragm_mask, cv2.MORPH_CLOSE, np.ones((3, 3), np.uint8)
    )

    # Step 6: Apply morphological closing to smooth mask (remove small holes)
    kernel = np.ones(closing_kernel_size, np.uint8)
    diaphragm_mask = cv2.morphologyEx(diaphragm_mask, cv2.MORPH_CLOSE, kernel)

    # Step 7: Bitwise operation to remove diaphragm from the source image
    result_image = cv2.bitwise_and(image, image, mask=cv2.bitwise_not(diaphragm_mask))

    equalized_pil_image = Image.fromarray(result_image)

    return equalized_pil_image

In [3]:
preprocess_types = {
    "baseline": [resize_image],
    "histogram_equalization": [resize_image, histogram_equalization],
    "gaussian_blur": [resize_image, histogram_equalization, gaussian_blur],
    "bilateral_filer": [resize_image, histogram_equalization, bilateral_filter],
    "adaptive_masking": [resize_image, adaptive_masking],
    "adaptive_masking_equalized": [
        resize_image,
        adaptive_masking,
        histogram_equalization,
    ],
    "adaptive_masking_gaussian": [
        resize_image,
        adaptive_masking,
        histogram_equalization,
        gaussian_blur,
    ],
    "adaptive_masking_bilateral": [
        resize_image,
        adaptive_masking,
        histogram_equalization,
        bilateral_filter,
    ],
}

In [4]:
def train_model(
    model, train_loader, val_loader, criterion, optimizer, device, num_epochs=10
):
    """Trains and validates a model for a specified number of epochs.

    Parameters
    ----------
        model: PyTorch model
        train_loader: DataLoader for training data
        val_loader: DataLoader for validation data
        criterion: Loss function
        optimizer: Optimizer
        device: Device to train on ('cuda' or 'cpu')
        num_epochs: Number of epochs

    Returns
    -------
        history: Dictionary containing training and validation loss and accuracy

    """
    model.to(device)
    history = {
        "train_loss": [],
        "train_acc": [],
        "val_loss": [],
        "val_acc": [],
    }

    for epoch in range(num_epochs):
        print(f"Epoch {epoch + 1}/{num_epochs}")

        # Training phase
        model.train()
        running_loss = 0.0
        correct = 0
        total = 0

        for inputs, labels in tqdm(train_loader, desc="Training"):
            inputs, labels = inputs.to(device), labels.to(device)

            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            _, predictions = torch.max(outputs, 1)
            correct += (predictions == labels).sum().item()
            total += labels.size(0)

        train_loss = running_loss / len(train_loader)
        train_acc = correct / total

        # Validation phase
        model.eval()
        running_loss = 0.0
        correct = 0
        total = 0

        with torch.no_grad():
            for inputs, labels in tqdm(val_loader, desc="Validation"):
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                loss = criterion(outputs, labels)

                running_loss += loss.item()
                _, predictions = torch.max(outputs, 1)
                correct += (predictions == labels).sum().item()
                total += labels.size(0)

        val_loss = running_loss / len(val_loader)
        val_acc = correct / total

        # Logging
        print(f"Train Loss: {train_loss:.4f} - Train Acc: {train_acc:.4f}")
        print(f"Validation Loss: {val_loss:.4f} - Validation Acc: {val_acc:.4f}")
        print()

        history["train_loss"].append(train_loss)
        history["train_acc"].append(train_acc)
        history["val_loss"].append(val_loss)
        history["val_acc"].append(val_acc)

    return history


def test_model(model, test_loader, device):
    """Tests a model on a test set.

    Parameters
    ----------
        model: PyTorch model
        test_loader: DataLoader for test data
        device: Device to test on ('cuda' or 'cpu')

    Returns
    -------
        y_true: True labels
        y_pred: Predicted labels

    """
    model.eval()
    y_true = []
    y_pred = []

    with torch.no_grad():
        for inputs, labels in test_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            _, predictions = torch.max(outputs, 1)
            y_true += labels.tolist()
            y_pred += predictions.tolist()

        # compute accuracy
        accuracy = accuracy_score(y_true, y_pred)
    return accuracy

In [5]:
## Model pipeline
def model_pipelines(
    model,
    model_name,
    preprocess=None,
    root="dataset/new_data",
    save_path="models_pretrained",
):
    # Result table
    results = np.array([["Preprocess", "Test Accuracy"]])
    os.makedirs(save_path, exist_ok=True)

    # Loop through the preprocess_types
    for key, value in preprocess_types.items():
        functions = preprocess_types[key]
        if preprocess is not None and key not in preprocess:
            continue
        print(f"\n===== {key} =====")
        transform = transforms.Compose(
            functions
            + [
                transforms.Lambda(
                    lambda x: x.convert("L") if isinstance(x, Image.Image) else x
                ),  # convert to grayscale
                transforms.Lambda(
                    lambda x: x
                    if isinstance(x, torch.Tensor)
                    else transforms.ToTensor()(x)
                ),  # convert to tensor (To ensure torch.Size([1, 224, 224]))
                transforms.Lambda(
                    lambda x: x.repeat(3, 1, 1) if x.shape[0] == 1 else x
                ),  # Convert single channel to RGB (3 channels)
            ]
        )

        train_data = datasets.ImageFolder(root=f"{root}/train", transform=transform)
        test_data = datasets.ImageFolder(root=f"{root}/test", transform=transform)
        val_data = datasets.ImageFolder(root=f"{root}/val", transform=transform)

        # Apply oversampling using imblearn
        targets = [sample[1] for sample in train_data.imgs]  # Extract labels
        sampler = RandomOverSampler(random_state=42)
        indices = list(range(len(targets)))
        resampled_indices, _ = sampler.fit_resample(np.array(indices).reshape(-1, 1), targets)
        resampled_indices = resampled_indices.flatten()
        resampled_dataset = Subset(train_data, resampled_indices)
        print(f"Original class distribution: {Counter(targets)}")
        print(f"Resampled class distribution: {Counter([train_data.imgs[i][1] for i in resampled_indices])}")


        train_loader = DataLoader(train_data, batch_size=32, shuffle=True)
        test_loader = DataLoader(test_data, batch_size=32, shuffle=False)
        val_loader = DataLoader(val_data, batch_size=32, shuffle=False)

        # Initialize the model
        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
        model = model.to(device)

        # Define the loss function and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.001)

        # Train the model
        history = train_model(
            model, train_loader, val_loader, criterion, optimizer, device, num_epochs=20
        )

        # Evaluate the model
        accuracy = test_model(model, test_loader, device)
        torch.save(model, f"{save_path}/{model_name}_{key}.pth")
        results = np.append(results, [[key, accuracy]], axis=0)
        print(f"Test Accuracy: {accuracy}")
        print("\n")

    return results

In [6]:
## Define the CNN model
class PneumoniaCNN(nn.Module):
    def __init__(self):
        super(PneumoniaCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 32, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.25)

        self.conv3 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.conv4 = nn.Conv2d(64, 64, kernel_size=3, padding=1)
        self.dropout2 = nn.Dropout(0.25)

        self.conv5 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(128, 128, kernel_size=3, padding=1)
        self.dropout3 = nn.Dropout(0.4)

        self.fc1 = nn.Linear(128 * 28 * 28, 512)
        self.dropout4 = nn.Dropout(0.5)
        self.fc2 = nn.Linear(512, 3)  # Three classes: NORMAL, BACTERIA, VIRUS

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = self.dropout1(x)

        x = F.relu(self.conv3(x))
        x = F.relu(self.conv4(x))
        x = self.pool(x)
        x = self.dropout2(x)

        x = F.relu(self.conv5(x))
        x = F.relu(self.conv6(x))
        x = self.pool(x)
        x = self.dropout3(x)

        x = x.view(-1, 128 * 28 * 28)  # Flatten the tensor
        x = F.relu(self.fc1(x))
        x = self.dropout4(x)
        x = self.fc2(x)
        return x


In [7]:
# Use the oversampled dataset
root="dataset/new_data"
# DenseNet161 with adaptive_masking_bilateral
print("\n=========================\nDenseNet161 with adaptive_masking_bilateral\n=========================")
model = models.densenet161(pretrained=True)
results = model_pipelines(model, "DenseNet161", root=root, preprocess=["adaptive_masking_bilateral"])
print(results)

# EfficientNetB1 with adaptive_masking_gaussian
print("\n=========================\nEfficientNetB1 with adaptive_masking_gaussian\n=========================")
model = models.efficientnet_b1(pretrained=True)
results = model_pipelines(model, "EfficientNetB1", root=root, preprocess=["adaptive_masking_gaussian"])
print(results)

# ResNet50 with adaptive_masking_equalized
print("\n=========================\nResNet50 with adaptive_masking_equalized\n=========================")
model = models.resnet50(pretrained=True)
results = model_pipelines(model, "ResNet50", root=root, preprocess=["adaptive_masking_equalized"])
print(results)

# VGG16 with adaptive_masking
print("\n=========================\nVGG16 with gaussian_blur\n=========================")
model = models.vgg16(pretrained=True)
results = model_pipelines(model, "VGG16", root=root, preprocess=["gaussian_blur"])
print(results)


DenseNet161 with adaptive_masking_bilateral


Downloading: "https://download.pytorch.org/models/densenet161-8d451a50.pth" to /root/.cache/torch/hub/checkpoints/densenet161-8d451a50.pth
100%|██████████| 110M/110M [00:00<00:00, 137MB/s]



===== adaptive_masking_bilateral =====
Original class distribution: Counter({0: 2338, 1: 1149, 2: 1145})
Resampled class distribution: Counter({0: 2338, 1: 2338, 2: 2338})
Epoch 1/20


Training: 100%|██████████| 145/145 [02:59<00:00,  1.24s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.22it/s]


Train Loss: 0.7176 - Train Acc: 0.7478
Validation Loss: 0.7238 - Validation Acc: 0.7083

Epoch 2/20


Training: 100%|██████████| 145/145 [02:59<00:00,  1.24s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.19it/s]


Train Loss: 0.4602 - Train Acc: 0.7964
Validation Loss: 0.4850 - Validation Acc: 0.7767

Epoch 3/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.23s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.22it/s]


Train Loss: 0.4380 - Train Acc: 0.8068
Validation Loss: 0.4094 - Validation Acc: 0.8400

Epoch 4/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.22s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.19it/s]


Train Loss: 0.4108 - Train Acc: 0.8184
Validation Loss: 0.4155 - Validation Acc: 0.8150

Epoch 5/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.22s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.22it/s]


Train Loss: 0.3857 - Train Acc: 0.8275
Validation Loss: 0.4837 - Validation Acc: 0.7733

Epoch 6/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.23s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.21it/s]


Train Loss: 0.3575 - Train Acc: 0.8394
Validation Loss: 0.4385 - Validation Acc: 0.8067

Epoch 7/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.22s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.22it/s]


Train Loss: 0.3537 - Train Acc: 0.8426
Validation Loss: 0.6010 - Validation Acc: 0.7117

Epoch 8/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.23s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.22it/s]


Train Loss: 0.3341 - Train Acc: 0.8538
Validation Loss: 0.4544 - Validation Acc: 0.8050

Epoch 9/20


Training: 100%|██████████| 145/145 [02:58<00:00,  1.23s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.20it/s]


Train Loss: 0.3083 - Train Acc: 0.8666
Validation Loss: 0.4485 - Validation Acc: 0.8017

Epoch 10/20


Training: 100%|██████████| 145/145 [02:58<00:00,  1.23s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.20it/s]


Train Loss: 0.2948 - Train Acc: 0.8715
Validation Loss: 0.5470 - Validation Acc: 0.7667

Epoch 11/20


Training: 100%|██████████| 145/145 [02:58<00:00,  1.23s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.25it/s]


Train Loss: 0.2887 - Train Acc: 0.8748
Validation Loss: 0.7104 - Validation Acc: 0.7450

Epoch 12/20


Training: 100%|██████████| 145/145 [03:00<00:00,  1.25s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.25it/s]


Train Loss: 0.2327 - Train Acc: 0.9026
Validation Loss: 0.5279 - Validation Acc: 0.8150

Epoch 13/20


Training: 100%|██████████| 145/145 [02:59<00:00,  1.24s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.24it/s]


Train Loss: 0.2196 - Train Acc: 0.9106
Validation Loss: 0.5830 - Validation Acc: 0.8000

Epoch 14/20


Training: 100%|██████████| 145/145 [02:58<00:00,  1.23s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.25it/s]


Train Loss: 0.2027 - Train Acc: 0.9175
Validation Loss: 0.8639 - Validation Acc: 0.7500

Epoch 15/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.22s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.26it/s]


Train Loss: 0.1655 - Train Acc: 0.9298
Validation Loss: 0.6550 - Validation Acc: 0.8017

Epoch 16/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.22s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.24it/s]


Train Loss: 0.1527 - Train Acc: 0.9404
Validation Loss: 0.9439 - Validation Acc: 0.7550

Epoch 17/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.23s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.26it/s]


Train Loss: 0.1552 - Train Acc: 0.9404
Validation Loss: 0.5879 - Validation Acc: 0.8217

Epoch 18/20


Training: 100%|██████████| 145/145 [02:57<00:00,  1.22s/it]
Validation: 100%|██████████| 19/19 [00:14<00:00,  1.27it/s]


Train Loss: 0.1112 - Train Acc: 0.9553
Validation Loss: 0.6696 - Validation Acc: 0.8000

Epoch 19/20


Training: 100%|██████████| 145/145 [02:56<00:00,  1.22s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.24it/s]


Train Loss: 0.1200 - Train Acc: 0.9560
Validation Loss: 0.7652 - Validation Acc: 0.8133

Epoch 20/20


Training: 100%|██████████| 145/145 [02:55<00:00,  1.21s/it]
Validation: 100%|██████████| 19/19 [00:15<00:00,  1.19it/s]


Train Loss: 0.0768 - Train Acc: 0.9709
Validation Loss: 0.7281 - Validation Acc: 0.8150

Test Accuracy: 0.6666666666666666


[['Preprocess' 'Test Accuracy']
 ['adaptive_masking_bilateral' '0.6666666666666666']]

EfficientNetB1 with adaptive_masking_gaussian


Downloading: "https://download.pytorch.org/models/efficientnet_b1_rwightman-bac287d4.pth" to /root/.cache/torch/hub/checkpoints/efficientnet_b1_rwightman-bac287d4.pth
100%|██████████| 30.1M/30.1M [00:00<00:00, 144MB/s]



===== adaptive_masking_gaussian =====
Original class distribution: Counter({0: 2338, 1: 1149, 2: 1145})
Resampled class distribution: Counter({0: 2338, 1: 2338, 2: 2338})
Epoch 1/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Train Loss: 0.7120 - Train Acc: 0.7638
Validation Loss: 0.5805 - Validation Acc: 0.7383

Epoch 2/20


Training: 100%|██████████| 145/145 [01:50<00:00,  1.32it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Train Loss: 0.4016 - Train Acc: 0.8232
Validation Loss: 0.3907 - Validation Acc: 0.8467

Epoch 3/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.34it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.54it/s]


Train Loss: 0.3425 - Train Acc: 0.8506
Validation Loss: 0.3831 - Validation Acc: 0.8250

Epoch 4/20


Training: 100%|██████████| 145/145 [01:50<00:00,  1.32it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Train Loss: 0.2671 - Train Acc: 0.8912
Validation Loss: 0.4514 - Validation Acc: 0.8233

Epoch 5/20


Training: 100%|██████████| 145/145 [01:47<00:00,  1.34it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Train Loss: 0.2186 - Train Acc: 0.9152
Validation Loss: 0.7490 - Validation Acc: 0.8383

Epoch 6/20


Training: 100%|██████████| 145/145 [01:49<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.58it/s]


Train Loss: 0.1416 - Train Acc: 0.9424
Validation Loss: 0.8818 - Validation Acc: 0.7983

Epoch 7/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.34it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Train Loss: 0.1484 - Train Acc: 0.9419
Validation Loss: 0.4970 - Validation Acc: 0.8150

Epoch 8/20


Training: 100%|██████████| 145/145 [01:49<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Train Loss: 0.0958 - Train Acc: 0.9665
Validation Loss: 0.8090 - Validation Acc: 0.8317

Epoch 9/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.34it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Train Loss: 0.0906 - Train Acc: 0.9665
Validation Loss: 0.9284 - Validation Acc: 0.8133

Epoch 10/20


Training: 100%|██████████| 145/145 [01:50<00:00,  1.32it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Train Loss: 0.0656 - Train Acc: 0.9771
Validation Loss: 0.6885 - Validation Acc: 0.8217

Epoch 11/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.34it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Train Loss: 0.0488 - Train Acc: 0.9829
Validation Loss: 0.8338 - Validation Acc: 0.8317

Epoch 12/20


Training: 100%|██████████| 145/145 [01:49<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:11<00:00,  1.58it/s]


Train Loss: 0.0593 - Train Acc: 0.9801
Validation Loss: 0.8826 - Validation Acc: 0.8250

Epoch 13/20


Training: 100%|██████████| 145/145 [01:49<00:00,  1.32it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Train Loss: 0.0593 - Train Acc: 0.9788
Validation Loss: 1.1421 - Validation Acc: 0.7733

Epoch 14/20


Training: 100%|██████████| 145/145 [01:49<00:00,  1.32it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Train Loss: 0.0755 - Train Acc: 0.9741
Validation Loss: 0.8479 - Validation Acc: 0.7850

Epoch 15/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.34it/s]
Validation: 100%|██████████| 19/19 [00:13<00:00,  1.41it/s]


Train Loss: 0.0412 - Train Acc: 0.9864
Validation Loss: 1.1022 - Validation Acc: 0.8200

Epoch 16/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Train Loss: 0.0410 - Train Acc: 0.9866
Validation Loss: 0.7684 - Validation Acc: 0.8367

Epoch 17/20


Training: 100%|██████████| 145/145 [01:49<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.57it/s]


Train Loss: 0.0302 - Train Acc: 0.9909
Validation Loss: 0.7842 - Validation Acc: 0.8383

Epoch 18/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Train Loss: 0.0258 - Train Acc: 0.9907
Validation Loss: 0.8989 - Validation Acc: 0.8300

Epoch 19/20


Training: 100%|██████████| 145/145 [01:49<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Train Loss: 0.0360 - Train Acc: 0.9875
Validation Loss: 1.1395 - Validation Acc: 0.8033

Epoch 20/20


Training: 100%|██████████| 145/145 [01:48<00:00,  1.33it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.56it/s]


Train Loss: 0.0466 - Train Acc: 0.9829
Validation Loss: 0.8611 - Validation Acc: 0.8067

Test Accuracy: 0.6314102564102564


[['Preprocess' 'Test Accuracy']
 ['adaptive_masking_gaussian' '0.6314102564102564']]

ResNet50 with adaptive_masking_equalized


Downloading: "https://download.pytorch.org/models/resnet50-0676ba61.pth" to /root/.cache/torch/hub/checkpoints/resnet50-0676ba61.pth
100%|██████████| 97.8M/97.8M [00:00<00:00, 166MB/s]



===== adaptive_masking_equalized =====
Original class distribution: Counter({0: 2338, 1: 1149, 2: 1145})
Resampled class distribution: Counter({0: 2338, 1: 2338, 2: 2338})
Epoch 1/20


Training: 100%|██████████| 145/145 [02:02<00:00,  1.19it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Train Loss: 0.6785 - Train Acc: 0.7563
Validation Loss: 0.4481 - Validation Acc: 0.8033

Epoch 2/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.18it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Train Loss: 0.4434 - Train Acc: 0.8061
Validation Loss: 0.4320 - Validation Acc: 0.8083

Epoch 3/20


Training: 100%|██████████| 145/145 [02:02<00:00,  1.18it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.52it/s]


Train Loss: 0.3963 - Train Acc: 0.8232
Validation Loss: 0.4887 - Validation Acc: 0.8050

Epoch 4/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 0.3767 - Train Acc: 0.8310
Validation Loss: 0.5757 - Validation Acc: 0.7200

Epoch 5/20


Training: 100%|██████████| 145/145 [02:02<00:00,  1.18it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Train Loss: 0.3653 - Train Acc: 0.8418
Validation Loss: 0.7879 - Validation Acc: 0.7233

Epoch 6/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Train Loss: 0.3309 - Train Acc: 0.8577
Validation Loss: 0.7697 - Validation Acc: 0.6783

Epoch 7/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.18it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Train Loss: 0.2951 - Train Acc: 0.8756
Validation Loss: 0.5005 - Validation Acc: 0.7567

Epoch 8/20


Training: 100%|██████████| 145/145 [02:02<00:00,  1.18it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Train Loss: 0.2737 - Train Acc: 0.8828
Validation Loss: 0.4774 - Validation Acc: 0.8000

Epoch 9/20


Training: 100%|██████████| 145/145 [02:04<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Train Loss: 0.2040 - Train Acc: 0.9147
Validation Loss: 0.6111 - Validation Acc: 0.7917

Epoch 10/20


Training: 100%|██████████| 145/145 [02:04<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Train Loss: 0.1837 - Train Acc: 0.9251
Validation Loss: 0.6877 - Validation Acc: 0.7300

Epoch 11/20


Training: 100%|██████████| 145/145 [02:01<00:00,  1.19it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Train Loss: 0.1360 - Train Acc: 0.9467
Validation Loss: 0.9113 - Validation Acc: 0.7217

Epoch 12/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Train Loss: 0.1572 - Train Acc: 0.9421
Validation Loss: 0.8239 - Validation Acc: 0.7450

Epoch 13/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.18it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.49it/s]


Train Loss: 0.0910 - Train Acc: 0.9676
Validation Loss: 0.9337 - Validation Acc: 0.7683

Epoch 14/20


Training: 100%|██████████| 145/145 [02:02<00:00,  1.19it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 0.1091 - Train Acc: 0.9579
Validation Loss: 0.9501 - Validation Acc: 0.7567

Epoch 15/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.49it/s]


Train Loss: 0.0709 - Train Acc: 0.9732
Validation Loss: 0.9570 - Validation Acc: 0.7567

Epoch 16/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Train Loss: 0.0869 - Train Acc: 0.9659
Validation Loss: 1.4846 - Validation Acc: 0.7383

Epoch 17/20


Training: 100%|██████████| 145/145 [02:02<00:00,  1.18it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Train Loss: 0.0812 - Train Acc: 0.9693
Validation Loss: 0.9017 - Validation Acc: 0.7883

Epoch 18/20


Training: 100%|██████████| 145/145 [02:04<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Train Loss: 0.0615 - Train Acc: 0.9775
Validation Loss: 1.0793 - Validation Acc: 0.7733

Epoch 19/20


Training: 100%|██████████| 145/145 [02:03<00:00,  1.17it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 0.0246 - Train Acc: 0.9911
Validation Loss: 1.3243 - Validation Acc: 0.7767

Epoch 20/20


Training: 100%|██████████| 145/145 [02:02<00:00,  1.18it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Train Loss: 0.0568 - Train Acc: 0.9793
Validation Loss: 0.9499 - Validation Acc: 0.8000

Test Accuracy: 0.6923076923076923


[['Preprocess' 'Test Accuracy']
 ['adaptive_masking_equalized' '0.6923076923076923']]

VGG16 with gaussian_blur


Downloading: "https://download.pytorch.org/models/vgg16-397923af.pth" to /root/.cache/torch/hub/checkpoints/vgg16-397923af.pth
100%|██████████| 528M/528M [00:08<00:00, 68.1MB/s]



===== gaussian_blur =====
Original class distribution: Counter({0: 2338, 1: 1149, 2: 1145})
Resampled class distribution: Counter({0: 2338, 1: 2338, 2: 2338})
Epoch 1/20


Training: 100%|██████████| 145/145 [02:18<00:00,  1.04it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 4.1146 - Train Acc: 0.5367
Validation Loss: 0.8193 - Validation Acc: 0.6383

Epoch 2/20


Training: 100%|██████████| 145/145 [02:18<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.49it/s]


Train Loss: 0.6649 - Train Acc: 0.7165
Validation Loss: 0.6672 - Validation Acc: 0.6117

Epoch 3/20


Training: 100%|██████████| 145/145 [02:18<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Train Loss: 0.7723 - Train Acc: 0.6949
Validation Loss: 1.3261 - Validation Acc: 0.3333

Epoch 4/20


Training: 100%|██████████| 145/145 [02:16<00:00,  1.07it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 2.5429 - Train Acc: 0.4985
Validation Loss: 1.1937 - Validation Acc: 0.3333

Epoch 5/20


Training: 100%|██████████| 145/145 [02:18<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 1.0491 - Train Acc: 0.5043
Validation Loss: 1.1887 - Validation Acc: 0.3333

Epoch 6/20


Training: 100%|██████████| 145/145 [02:17<00:00,  1.06it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Train Loss: 1.0439 - Train Acc: 0.5043
Validation Loss: 1.1713 - Validation Acc: 0.3333

Epoch 7/20


Training: 100%|██████████| 145/145 [02:17<00:00,  1.06it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Train Loss: 1.0406 - Train Acc: 0.5047
Validation Loss: 1.1636 - Validation Acc: 0.3333

Epoch 8/20


Training: 100%|██████████| 145/145 [02:14<00:00,  1.07it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Train Loss: 1.0409 - Train Acc: 0.5047
Validation Loss: 1.1451 - Validation Acc: 0.3333

Epoch 9/20


Training: 100%|██████████| 145/145 [02:16<00:00,  1.06it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.55it/s]


Train Loss: 1.0405 - Train Acc: 0.5047
Validation Loss: 1.1481 - Validation Acc: 0.3333

Epoch 10/20


Training: 100%|██████████| 145/145 [02:16<00:00,  1.06it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Train Loss: 1.0430 - Train Acc: 0.5047
Validation Loss: 1.2021 - Validation Acc: 0.3333

Epoch 11/20


Training: 100%|██████████| 145/145 [02:16<00:00,  1.07it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.53it/s]


Train Loss: 1.0420 - Train Acc: 0.5047
Validation Loss: 1.1947 - Validation Acc: 0.3333

Epoch 12/20


Training: 100%|██████████| 145/145 [02:16<00:00,  1.06it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 1.0410 - Train Acc: 0.5047
Validation Loss: 1.1445 - Validation Acc: 0.3333

Epoch 13/20


Training: 100%|██████████| 145/145 [02:17<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.49it/s]


Train Loss: 1.0406 - Train Acc: 0.5047
Validation Loss: 1.1500 - Validation Acc: 0.3333

Epoch 14/20


Training: 100%|██████████| 145/145 [02:19<00:00,  1.04it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 1.0392 - Train Acc: 0.5047
Validation Loss: 1.1587 - Validation Acc: 0.3333

Epoch 15/20


Training: 100%|██████████| 145/145 [02:17<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.49it/s]


Train Loss: 1.0403 - Train Acc: 0.5047
Validation Loss: 1.1572 - Validation Acc: 0.3333

Epoch 16/20


Training: 100%|██████████| 145/145 [02:18<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 1.0413 - Train Acc: 0.5047
Validation Loss: 1.1534 - Validation Acc: 0.3333

Epoch 17/20


Training: 100%|██████████| 145/145 [02:17<00:00,  1.06it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.50it/s]


Train Loss: 1.0401 - Train Acc: 0.5047
Validation Loss: 1.1280 - Validation Acc: 0.3333

Epoch 18/20


Training: 100%|██████████| 145/145 [02:17<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.48it/s]


Train Loss: 1.0413 - Train Acc: 0.5047
Validation Loss: 1.1260 - Validation Acc: 0.3333

Epoch 19/20


Training: 100%|██████████| 145/145 [02:18<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.47it/s]


Train Loss: 1.0423 - Train Acc: 0.5047
Validation Loss: 1.1822 - Validation Acc: 0.3333

Epoch 20/20


Training: 100%|██████████| 145/145 [02:17<00:00,  1.05it/s]
Validation: 100%|██████████| 19/19 [00:12<00:00,  1.51it/s]


Train Loss: 1.0418 - Train Acc: 0.5047
Validation Loss: 1.1466 - Validation Acc: 0.3333

Test Accuracy: 0.38782051282051283


[['Preprocess' 'Test Accuracy']
 ['gaussian_blur' '0.38782051282051283']]


In [None]:
# Data for the comparison table of the four models with their preprocess techniques and test accuracies
model_comparison_data = {
    "Model & Preprocess": [
        "DenseNet161 with adaptive_masking_bilateral",
        "EfficientNetB1 with adaptive_masking_gaussian",
        "ResNet50 with adaptive_masking_equalized",
        "VGG16 with gaussian_blur"
    ],
    "Test Accuracy": [
        "0.6666666666666666",
        "0.6314102564102564",
        "0.6923076923076923",
        "0.38782051282051283"
    ]
}

# Creating the comparison DataFrame
model_comparison_df = pd.DataFrame(model_comparison_data)
model_comparison_df["Test Accuracy"] = model_comparison_df["Test Accuracy"].astype(float) * 100
model_comparison_df.to_csv("results/2_oversampling_model_comparison.csv", index=False)

model_comparison_df = model_comparison_df.round(2)
model_comparison_df

Unnamed: 0,Model & Preprocess,Test Accuracy
0,DenseNet161 with adaptive_masking_bilateral,66.67
1,EfficientNetB1 with adaptive_masking_gaussian,63.14
2,ResNet50 with adaptive_masking_equalized,69.23
3,VGG16 with gaussian_blur,38.78
