<a href="https://colab.research.google.com/github/AndreYang333/ExplainableAI/blob/main/adversarialpatch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# Install required libraies
!pip install torch torchvision numpy matplotlib




In [None]:
# Import required modules
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
from torchvision import models, transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader
import numpy as np
import matplotlib.pyplot as plt
from PIL import Image
import os
import random
from tqdm import tqdm
import torchvision.transforms.functional as F
from sklearn.model_selection import train_test_split

In [None]:
# Load the pretrained ResNet34 model
model = models.resnet34(weights=models.ResNet34_Weights.IMAGENET1K_V1)
model.eval()  # Set model to evaluation mode


Downloading: "https://download.pytorch.org/models/resnet34-b627a593.pth" to /root/.cache/torch/hub/checkpoints/resnet34-b627a593.pth
100%|██████████| 83.3M/83.3M [00:00<00:00, 108MB/s]


ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [None]:

# Define Transformation
transform = transforms.Compose([transforms.ToTensor()])

# Load train dataset
trainset = torchvision.datasets.STL10(root='./data', split='train', download=True, transform=transform)
trainloader = DataLoader(trainset, batch_size=64, shuffle=True)

# Load test dataset
testset = torchvision.datasets.STL10(root='./data', split='test', download=True, transform=transform)
testloader = DataLoader(testset, batch_size=64, shuffle=False)

# Convert test dataset to a list of samples for splitting
test_data = [(img, label) for img, label in testset]

# Split test data: Move 6000 samples to train set
train_data_from_test, remaining_test_data = train_test_split(test_data, test_size=0.25, shuffle=True)

# Combine trainset with additional training data from testset
train_data_combined = list(trainset) + train_data_from_test

# Recreate trainloader and testloader with updated data
trainloader = DataLoader(train_data_combined, batch_size=64, shuffle=True)
testloader = DataLoader(remaining_test_data, batch_size=64, shuffle=False)

# print shape of images and size of dataset
images, labels = next(iter(trainloader))
print(f"New Train Image batch shape: {images.shape}")
print(f"New Train dataset size: {len(train_data_combined)}")

images, labels = next(iter(testloader))
print(f"New Test Image batch shape: {images.shape}")
print(f"New Test dataset size: {len(remaining_test_data)}")


Downloading http://ai.stanford.edu/~acoates/stl10/stl10_binary.tar.gz to ./data/stl10_binary.tar.gz


100%|██████████| 2640397119/2640397119 [01:45<00:00, 24939174.57it/s]


Extracting ./data/stl10_binary.tar.gz to ./data
Files already downloaded and verified
New Train Image batch shape: torch.Size([64, 3, 96, 96])
New Train dataset size: 11000
New Test Image batch shape: torch.Size([64, 3, 96, 96])
New Test dataset size: 2000


In [None]:
with open('Assignment_data/imagenet_classes.txt') as f:
    class_names = [line.strip() for line in f.readlines()]

# Select a target class
target_class_name = 'banana'
target_class_index = class_names.index(target_class_name)
print(f"Target Class Index for '{target_class_name}': {target_class_index}")

Target Class Index for 'banana': 954


In [None]:
# Apply patch to the image at a random location with random rotation
def apply_patch(image, patch):
    """
    Applies the adversarial patch to the image at a random location with random rotation.
    """
    image = image.clone()
    _, img_height, img_width = image.shape
    patch_height, patch_width = patch.shape[1], patch.shape[2]

    # Random rotation angle between 0 and 360 degrees
    rotation_angle = np.random.uniform(0, 360)

    # Rotate the image and patch randomly
    rotated_image = F.rotate(image, rotation_angle)
    rotated_patch = F.rotate(patch, rotation_angle)

    # Ensure patch fits within the image after rotation
    patch_height, patch_width = rotated_patch.shape[1], rotated_patch.shape[2]

    # Random position for the patch
    x_pos = np.random.randint(0, img_width - patch_width)
    y_pos = np.random.randint(0, img_height - patch_height)

    # Apply the patch to the rotated image
    rotated_image[:, y_pos:y_pos+patch_height, x_pos:x_pos+patch_width] = rotated_patch

    return rotated_image

In [None]:
patch_size = (3, 10, 10)

# Initialize patch
adversarial_patch = torch.rand(*patch_size, requires_grad=True)

# Ensure value is in [0,1]
adversarial_patch.data.clamp_(0, 1)

# print the shape of patch
print(f"Adversarial patch shape: {adversarial_patch.shape}")

Adversarial patch shape: torch.Size([3, 10, 10])


In [None]:
# Use CrossEntropyLoss
criterion = nn.CrossEntropyLoss()

# Use Adam optimizer with learning rate and weight decay
optimizer = optim.Adam([adversarial_patch], lr=0.01)




In [None]:
# Initialize lists to store training, test loss values, and parameters
train_loss_values = []
test_loss_values = []
saved_params = []  # List to store the parameters of the model and adversarial patch

num_epochs = 15

# Training loop
for epoch in range(num_epochs):
    epoch_loss = 0.0
    batch_count = 0

    # Training phase
    model.train()  # Set the model to training mode
    with tqdm(trainloader, unit="batch") as tepoch:
        for images, _ in tepoch:
            tepoch.set_description(f"Epoch {epoch+1}/{num_epochs}")

            images = images.clone()

            # Apply the patch to each image in the batch
            patched_images = torch.stack([apply_patch(img, adversarial_patch) for img in images])

            # Forward pass
            outputs = model(patched_images)

            # Create target labels (all as the target class)
            target_labels = torch.full((images.size(0),), target_class_index, dtype=torch.long)

            # Compute loss
            loss = criterion(outputs, target_labels)

            # Backward pass and optimization
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Clamp patch values to [0,1]
            adversarial_patch.data.clamp_(0, 1)

            # Accumulate loss for the current batch
            epoch_loss += loss.item()
            batch_count += 1

            # Display the loss for the current batch
            tepoch.set_postfix(loss=loss.item())

    # Calculate average training loss for the epoch
    avg_train_loss = epoch_loss / batch_count
    train_loss_values.append(avg_train_loss)

    print(f"Epoch [{epoch+1}/{num_epochs}], Average Training Loss: {avg_train_loss:.4f}")

    # Save model parameters and adversarial_patch for each epoch
    model_params = {name: param.clone() for name, param in model.named_parameters()}  # Save model parameters
    saved_params.append({
        'epoch': epoch + 1,
        'model_params': model_params,
        'adversarial_patch': adversarial_patch.clone()
    })

    # Evaluation phase (Test loss calculation)
    model.eval()  # Set the model to evaluation mode
    test_loss = 0.0
    test_batch_count = 0

    # Turn off gradient computation during evaluation
    with torch.no_grad():
        with tqdm(testloader, unit="batch") as tepoch:
            for images, _ in tepoch:
                tepoch.set_description(f"Evaluating Test Set (Epoch {epoch+1})")

                images = images.clone()

                # Apply the patch to each test image in the batch
                patched_images = torch.stack([apply_patch(img, adversarial_patch) for img in images])

                # Forward pass
                outputs = model(patched_images)

                # Create target labels (all as the target class)
                target_labels = torch.full((images.size(0),), target_class_index, dtype=torch.long)

                # Compute test loss
                loss = criterion(outputs, target_labels)

                # Accumulate the test loss
                test_loss += loss.item()
                test_batch_count += 1

                tepoch.set_postfix(test_loss=loss.item())

    # Calculate average test loss for the epoch
    avg_test_loss = test_loss / test_batch_count
    test_loss_values.append(avg_test_loss)

    print(f"Epoch [{epoch+1}/{num_epochs}], Average Test Loss: {avg_test_loss:.4f}")

# Plot training and test loss over epochs
plt.plot(train_loss_values, label='Training Loss', marker='o')
plt.plot(test_loss_values, label='Test Loss', marker='x')
plt.title('Loss over Epochs')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.grid(True)
plt.legend()
plt.show()



Epoch 1/15:  28%|██▊       | 48/172 [05:20<13:48,  6.68s/batch, loss=11.7]

In [None]:


# Evaluate the patch
model.eval()
success = 0
total = 0

for images, labels in testloader:
    images = images.clone()
    patched_image = apply_patch(images[0], adversarial_patch)
    patched_image = patched_image.unsqueeze(0)

    # Forward pass
    outputs = model(patched_image)
    _, predicted = outputs.max(1)

    total += 1
    if predicted.item() == target_class_index:
        success += 1

print(f"Attack Success Rate: {100 * success / total:.2f}%")


In [None]:
# Convert patch tensor to PIL Image for visualization
def tensor_to_pil(tensor):
    tensor = tensor.detach().cpu()
    tensor = transforms.ToPILImage()(tensor)
    return tensor

patch_image = tensor_to_pil(adversarial_patch)
plt.imshow(patch_image)
plt.title("Adversarial Patch")
plt.axis('off')
plt.show()


In [None]:
# Display a sample patched image
sample_image, _ = next(iter(testloader))
patched_sample = apply_patch(sample_image[0], adversarial_patch)
patched_sample = patched_sample.unsqueeze(0)

# Get model prediction
outputs = model(patched_sample)
_, predicted = outputs.max(1)
predicted_class = class_names[predicted.item()]

# Display the image
patched_sample_image = tensor_to_pil(patched_sample[0])
plt.imshow(patched_sample_image)
plt.title(f"Predicted Class: {predicted_class}")
plt.axis('off')
plt.show()
