# Library Imports

In [None]:
import os
import glob
import numpy as np
import torch
import matplotlib.pyplot as plt

# Data import

In [None]:
main_path = os.path.join('data', 'intel-mobileodt-cervical-cancer-screening')
data_path = os.path.join(main_path, 'train', 'train')
images  =  [glob.glob(os.path.join(data_path, d, "*.*")) for d in os.listdir(data_path)]
train_paths = np.hstack(images)
# Additional data
extra_1 = os.path.join(main_path, 'additional_Type_1_v2')
extra_2 = os.path.join(main_path, 'additional_Type_2_v2')
extra_3 = os.path.join(main_path, 'additional_Type_3_v2')
images1 = [glob.glob(os.path.join(extra_1, d, "*.*")) for d in os.listdir(extra_1)]
images2 = [glob.glob(os.path.join(extra_2, d, "*.*")) for d in os.listdir(extra_2)]
images3 = [glob.glob(os.path.join(extra_3, d, "*.*")) for d in os.listdir(extra_3)]
train_paths = np.append(train_paths, np.hstack(images1))
train_paths = np.append(train_paths, np.hstack(images2))
train_paths = np.append(train_paths, np.hstack(images3))

In [None]:
print(f'In this train set we have got a total of {len(train_paths)}')
N_EPOCHS = 10
OUTPUT_PATH = './'
BATCH_SIZE = 32
# detect and define device 
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
device = torch.device(device)
cpu = torch.device('cpu')

# Image examples

In [None]:
fig = plt.figure(figsize=(8, 8), dpi=80)
columns = 3
rows = 1
img_type1 = plt.imread(os.path.join(data_path, 'Type_1', '0.jpg'))
fig.add_subplot(rows, columns, 1)
plt.title("Type1")
plt.axis('off')
plt.imshow(img_type1)

img_type2 = plt.imread(os.path.join(data_path, 'Type_2', '1.jpg'))
fig.add_subplot(rows, columns, 2)
plt.title("Type2")
plt.axis('off')
plt.imshow(img_type2)

img_type3 = plt.imread(os.path.join(data_path, 'Type_3', '3.jpg'))
fig.add_subplot(rows, columns, 3)
plt.title("Type3")
plt.axis('off')
plt.imshow(img_type3)

# Library Imports

In [None]:
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Dataset
from torch import nn, optim
from PIL import Image

# Define Transformations

In [None]:
# Define your transformations
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# Dataset Class

In [None]:
# Custom dataset class
class CervicalCancerDataset(Dataset):
    def __init__(self, img_paths, transform=None):
        self.img_paths = img_paths
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, idx):
        img_path = self.img_paths[idx]
        try:
            image = Image.open(img_path).convert('RGB')
        except IOError:
            print(f"Could not read image: {img_path}. Possibly corrupted file.")
            return None, None
        label = int(os.path.basename(os.path.dirname(img_path)).split('_')[-1]) - 1

        if self.transform:
            image = self.transform(image)

        return image, label

# Real Image Validation

In [None]:
def validate_image(image_path):
    try:
        Image.open(image_path).convert('RGB')
        return True
    except IOError:
        return False

train_paths = [path for path in train_paths if validate_image(path)]

# Dataset Creation

In [None]:
# Create dataset
dataset = CervicalCancerDataset(train_paths, transform=transform)

# Create Train and Test Dataloaders

In [None]:
# # Create dataloader
# dataloader = DataLoader(dataset, batch_size=32, shuffle=True)

from torch.utils.data import random_split

# Split the dataset into training and test sets
train_size = int(0.8 * len(dataset))  # 80% of the dataset for training
test_size = len(dataset) - train_size  # 20% of the dataset for testing
train_dataset, test_dataset = random_split(dataset, [train_size, test_size])

# Create data loaders for the training and test sets
train_dataloader = DataLoader(train_dataset, batch_size=32, shuffle=True)
test_dataloader = DataLoader(test_dataset, batch_size=32, shuffle=False)

# CUDA (GPU) or CPU

In [None]:
# Detect if we have a GPU available
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

# Model Initialization (RESNET used)

In [None]:
# Initialize the model
model = models.resnet50(pretrained=True)

# Final Layer Creation

In [None]:
# Adjust the final layer to match the number of classes in your dataset
num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 3)  # Assuming there are 3 types of cervix

# Moves the Model to choosen Device

In [None]:
model = model.to(device)

# Define the loss function and optimizer

In [None]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.001, momentum=0.9)

# FGSM Attack Code

In [None]:
# FGSM attack code
def fgsm_attack(image, epsilon, data_grad):
    # Collect the element-wise sign of the data gradient
    sign_data_grad = data_grad.sign()
    # Create the perturbed image by adjusting each pixel of the input image
    perturbed_image = image + epsilon*sign_data_grad
    # Adding clipping to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

# Original Dataset Training and Adversarial Attack

In [None]:
# Training and adversarial attack
num_epochs = 10  # define the number of epochs you want
epsilon = 0.3  # Adjust as necessary
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    adversarial_corrects = 0

    for inputs, labels in train_dataloader:
        if inputs is None or labels is None:  # Skip corrupted files
            continue

        inputs = inputs.to(device).requires_grad_(True)  # Set requires_grad attribute to True for creating adversarial images
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        loss.backward()

        # Collect datagrad
        data_grad = inputs.grad.data

        # Call FGSM Attack
        adversarial_data = fgsm_attack(inputs, epsilon, data_grad)

        # Re-classify the perturbed image
        adversarial_outputs = model(adversarial_data)
        _, adversarial_preds = torch.max(adversarial_outputs, 1)

        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        adversarial_corrects += torch.sum(adversarial_preds == labels.data)

    epoch_loss = running_loss / len(dataset)
    epoch_acc = running_corrects.double() / len(dataset)
    adversarial_acc = adversarial_corrects.double() / len(dataset)

    print('Epoch {}/{} Loss: {:.4f} Acc: {:.4f} Adversarial Acc: {:.4f}'.format(epoch + 1, num_epochs, epoch_loss, epoch_acc, adversarial_acc))

print('Training complete')

# Adversarial Training

In [None]:
# Adversarial Training
num_epochs = 10  # define the number of epochs you want
epsilon = 0.3  # Adjust as necessary
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0

    for inputs, labels in train_dataloader:
        if inputs is None or labels is None:  # Skip corrupted files
            continue

        inputs = inputs.to(device).requires_grad_(True)  # Set requires_grad attribute to True for creating adversarial images
        labels = labels.to(device)

        optimizer.zero_grad()

        # Generate adversarial data
        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)
        loss.backward()
        data_grad = inputs.grad.data
        adversarial_data = fgsm_attack(inputs, epsilon, data_grad)

        # Train on adversarial data
        optimizer.zero_grad()
        adversarial_outputs = model(adversarial_data)
        _, adversarial_preds = torch.max(adversarial_outputs, 1)
        adversarial_loss = criterion(adversarial_outputs, labels)
        adversarial_loss.backward()
        optimizer.step()

        running_loss += adversarial_loss.item() * inputs.size(0)
        running_corrects += torch.sum(adversarial_preds == labels.data)

    epoch_loss = running_loss / len(dataset)
    epoch_acc = running_corrects.double() / len(dataset)

    print('Epoch {}/{} Loss: {:.4f} Acc: {:.4f}'.format(epoch + 1, num_epochs, epoch_loss, epoch_acc))

print('Training complete')

In [None]:
# Random Perturbation Attack
def random_perturbation_attack(image, epsilon):
    # Generate random perturbation
    perturbation = torch.FloatTensor(image.size()).uniform_(-epsilon, epsilon).to(image.device)
    # Add the perturbation to the image
    perturbed_image = image + perturbation
    # Clip the perturbed image to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

In [None]:
# Training and adversarial attack
num_epochs = 10  # define the number of epochs you want
epsilon = 0.3  # Adjust as necessary

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    adversarial_corrects = 0

    for inputs, labels in train_dataloader:
        if inputs is None or labels is None:  # Skip corrupted files
            continue

        inputs = inputs.to(device).requires_grad_(True)  # Set requires_grad attribute to True for creating adversarial images
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        loss.backward()

        # Collect datagrad
        data_grad = inputs.grad.data

        # Call Random Perturbation Attack
        adversarial_data = random_perturbation_attack(inputs, epsilon)

        # Re-classify the perturbed image
        adversarial_outputs = model(adversarial_data)
        _, adversarial_preds = torch.max(adversarial_outputs, 1)

        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        adversarial_corrects += torch.sum(adversarial_preds == labels.data)

    epoch_loss = running_loss / len(dataset)
    epoch_acc = running_corrects.double() / len(dataset)
    adversarial_acc = adversarial_corrects.double() / len(dataset)

    print('Epoch {}/{} Loss: {:.4f} Acc: {:.4f} Adversarial Acc: {:.4f}'.format(epoch + 1, num_epochs, epoch_loss, epoch_acc, adversarial_acc))

print('Training complete')

In [None]:
# Gaussian Noise Attack
def gaussian_noise_attack(image, epsilon):
    # Generate Gaussian noise
    noise = torch.randn_like(image) * epsilon
    # Add the noise to the image
    perturbed_image = image + noise
    # Clip the perturbed image to maintain [0,1] range
    perturbed_image = torch.clamp(perturbed_image, 0, 1)
    # Return the perturbed image
    return perturbed_image

In [None]:
# Training and adversarial attack
num_epochs = 10  # define the number of epochs you want
epsilon = 0.3  # Adjust as necessary

for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    adversarial_corrects = 0

    for inputs, labels in train_dataloader:
        if inputs is None or labels is None:  # Skip corrupted files
            continue

        inputs = inputs.to(device).requires_grad_(True)  # Set requires_grad attribute to True for creating adversarial images
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        loss.backward()

        # Collect datagrad
        data_grad = inputs.grad.data

        # Call Gaussian Noise Attack
        adversarial_data = gaussian_noise_attack(inputs, epsilon)

        # Re-classify the perturbed image
        adversarial_outputs = model(adversarial_data)
        _, adversarial_preds = torch.max(adversarial_outputs, 1)

        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        adversarial_corrects += torch.sum(adversarial_preds == labels.data)

    epoch_loss = running_loss / len(dataset)
    epoch_acc = running_corrects.double() / len(dataset)
    adversarial_acc = adversarial_corrects.double() / len(dataset)

    print('Epoch {}/{} Loss: {:.4f} Acc: {:.4f} Adversarial Acc: {:.4f}'.format(epoch + 1, num_epochs, epoch_loss, epoch_acc, adversarial_acc))

print('Training complete')

In [None]:
# BIM attack code
def bim_attack(model, loss, images, labels, epsilon, alpha, num_iter):
    original_images = images.clone()
    for i in range(num_iter):
        images.requires_grad = True
        outputs = model(images)

        cost = loss(outputs, labels).to(device)
        cost.backward()

        attack_images = images + alpha*images.grad.sign()
        eta = torch.clamp(attack_images - original_images, min=-epsilon, max=epsilon)
        images = torch.clamp(original_images + eta, min=0, max=1).detach_()

    return images

In [None]:
# Training and adversarial attack
num_epochs = 10  # define the number of epochs you want
epsilon = 0.3  # Adjust as necessary
alpha = 0.01  # step size for BIM attack
num_iter = 10  # number of iterations for BIM attack
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    running_corrects = 0
    adversarial_corrects = 0

    for inputs, labels in train_dataloader:
        if inputs is None or labels is None:  # Skip corrupted files
            continue

        inputs = inputs.to(device).requires_grad_(True)  # Set requires_grad attribute to True for creating adversarial images
        labels = labels.to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)
        loss = criterion(outputs, labels)

        loss.backward()

        # Collect datagrad
        data_grad = inputs.grad.data

        # Call BIM Attack
        adversarial_data = bim_attack(model, criterion, inputs, labels, epsilon, alpha, num_iter)

        # Re-classify the perturbed image
        adversarial_outputs = model(adversarial_data)
        _, adversarial_preds = torch.max(adversarial_outputs, 1)

        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        running_corrects += torch.sum(preds == labels.data)
        adversarial_corrects += torch.sum(adversarial_preds == labels.data)

    epoch_loss = running_loss / len(dataset)
    epoch_acc = running_corrects.double() / len(dataset)
    adversarial_acc = adversarial_corrects.double() / len(dataset)

    print('Epoch {}/{} Loss: {:.4f} Acc: {:.4f} Adversarial Acc: {:.4f}'.format(epoch + 1, num_epochs, epoch_loss, epoch_acc, adversarial_acc))

print('Training complete')

# Testing on Original and Adversarial Examples

In [None]:
def test(model, attack_type, dataloader, epsilon):
    model.eval()

    # Accuracy counter for original and adversarial examples
    correct = 0
    adversarial_correct = 0

    for inputs, labels in dataloader:
        if inputs is None or labels is None:  # Skip corrupted files
            continue

        inputs = inputs.to(device).requires_grad_(True)  # Set requires_grad attribute to True for creating adversarial images
        labels = labels.to(device)

        outputs = model(inputs)
        _, preds = torch.max(outputs, 1)

        correct += (preds == labels).sum().item()

        # Generate adversarial examples
        loss = criterion(outputs, labels)
        model.zero_grad()
        loss.backward()

        data_grad = inputs.grad.data
        adversarial_data = attack_type(inputs, epsilon, data_grad)

        # Classify the adversarial examples
        adversarial_outputs = model(adversarial_data)
        _, adversarial_preds = torch.max(adversarial_outputs, 1)

        adversarial_correct += (adversarial_preds == labels).sum().item()

    # Calculate final accuracy for original and adversarial examples
    acc = correct / len(dataloader.dataset)
    adversarial_acc = adversarial_correct / len(dataloader.dataset)

    print(f"Epsilon: {epsilon}\tTest Accuracy = {acc}\tAdversarial Accuracy = {adversarial_acc}")

    return acc, adversarial_acc

# Use the test function

In [None]:
# Use the test function
test(model, fgsm_attack, test_dataloader, epsilon=0.3)
test(model, random_perturbation_attack, test_dataloader, epsilon=0.3)
test(model, gaussian_noise_attack, test_dataloader, epsilon=0.3)
test(model, bim_attack, test_dataloader, epsilon=0.3)