In [1]:
import torch
import shutil
import numpy as np
import pandas as pd
import torch.nn as nn
import torch.optim as optim
from torchvision.datasets import MNIST
from torchvision import datasets, transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader, TensorDataset, ConcatDataset

In [2]:
# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [3]:
# Define the transform
transform = transforms.Compose([transforms.ToTensor()])

In [4]:
# Load the training dataset
trainset = MNIST(root='./data', train=True, download=True, transform=transform)

# Load the test dataset
testset = MNIST(root='./data', train=False, download=True, transform=transform)

# Combine the trainset and testset
mnist_dataset = ConcatDataset([trainset, testset])
data_loader = DataLoader(mnist_dataset, batch_size=64, shuffle=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1006)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1006)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1006)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
<urlopen error [SSL: CERTIFICATE_VERIFY_FAILED] certificate verify failed: certificate has expired (_ssl.c:1006)>

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [5]:
# Function to perform PGD attack on GPU
def pgd_attack(model, images, labels, epsilon=0.5, alpha=0.04, iters=10):
    images, labels = images.to(device), labels.to(device)
    perturbed_images = images.clone().detach().requires_grad_(True).to(device)
    
    for _ in range(iters):
        outputs = model(perturbed_images)
        loss = nn.CrossEntropyLoss()(outputs, labels)
        loss.backward()
        
        # Perturbation step
        with torch.no_grad():
            perturbed_images = perturbed_images + alpha * perturbed_images.grad.sign()
            eta = torch.clamp(perturbed_images - images, min=-epsilon, max=epsilon)
            perturbed_images = torch.clamp(images + eta, min=0, max=1).detach_()
        
        perturbed_images.requires_grad = True  # Re-enable gradients for next iteration
    
    return perturbed_images

In [6]:
# Create a simple model to compute gradients (dummy model for PGD perturbation)
class SimpleCNN(nn.Module):
    def __init__(self):
        super(SimpleCNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.fc1 = nn.Linear(10 * 24 * 24, 10)  # Adjusted to match MNIST feature dimensions

    def forward(self, x):
        x = torch.relu(self.conv1(x))
        x = x.view(x.size(0), -1)  # Reshape to (batch_size, 10 * 24 * 24)
        x = self.fc1(x)
        return x

# Initialize model and move to GPU
model = SimpleCNN().to(device)

In [7]:
# Perturb the images in the dataset
perturbed_features = []
original_labels = []

for images, labels in data_loader:
    # Perform PGD attack on the batch of images
    perturbed_images = pgd_attack(model, images, labels)
    
    # Append perturbed features and original labels
    perturbed_features.append(perturbed_images.cpu())  # Move back to CPU for storage
    original_labels.append(labels.cpu())

# Combine all batches into a single dataset
perturbed_features = torch.cat(perturbed_features)
original_labels = torch.cat(original_labels)

In [8]:
# Create new dataset with perturbed images and original labels
new_dataset = TensorDataset(perturbed_features, original_labels)

# DataLoader for the new perturbed dataset
new_data_loader = DataLoader(new_dataset, batch_size=64, shuffle=True)

In [9]:
# Extract features and labels from new_dataset
features = perturbed_features  # Input features (images)
labels = original_labels  # Target labels

# Detach the tensors from the computation graph before converting to NumPy arrays
features_np = features.detach().numpy()  # Convert to NumPy arrays after detaching
labels_np = labels.detach().numpy()  # Same for labels

In [10]:
# Perform the train-test split (80% train, 20% test, for example)
X_train, X_test, y_train, y_test = train_test_split(features_np, labels_np, test_size=0.2, random_state=42)

# Convert back to PyTorch tensors
X_train_tensor = torch.tensor(X_train)
X_test_tensor = torch.tensor(X_test)
y_train_tensor = torch.tensor(y_train)
y_test_tensor = torch.tensor(y_test)

In [11]:
# Create TensorDataset for train and test sets
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)

# Create DataLoader for train and test sets
trainloader = DataLoader(train_dataset, batch_size=64, shuffle=True)
testloader = DataLoader(test_dataset, batch_size=64, shuffle=False)

In [12]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=5, stride=1, padding=2)
        self.bn1 = nn.BatchNorm2d(32)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=5, stride=1, padding=2)
        self.bn2 = nn.BatchNorm2d(64)

        self.fc1 = nn.Linear(64 * 7 * 7, 1024)
        self.fc2 = nn.Linear(1024, 10)
        self.dropout = nn.Dropout(0.5)

    def forward(self, x):
        x = self.pool(torch.relu(self.bn1(self.conv1(x))))

        x = self.pool(torch.relu(self.bn2(self.conv2(x))))

        x = x.view(-1, 64 * 7 * 7)

        x = torch.relu(self.fc1(x))
        x = self.dropout(x)

        # Output Layer
        x = self.fc2(x)
        return x

In [13]:
# Hyperparameters
learning_rate = 0.01
num_epochs = 10
batch_size = 32

In [14]:
model = CNNModel().to(device)
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
criterion = nn.CrossEntropyLoss()

In [15]:
for epoch in range(num_epochs):
    model.train()
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

    print(f'Epoch [{epoch+1}/{num_epochs}], Loss: {loss.item():.4f}')

Epoch [1/10], Loss: 0.3153
Epoch [2/10], Loss: 0.1797
Epoch [3/10], Loss: 0.0134
Epoch [4/10], Loss: 0.0009
Epoch [5/10], Loss: 0.0000
Epoch [6/10], Loss: 0.0000
Epoch [7/10], Loss: 0.0000
Epoch [8/10], Loss: 0.0000
Epoch [9/10], Loss: 0.0000
Epoch [10/10], Loss: 0.0000


In [16]:
model.eval()
correct = 0
total = 0

with torch.no_grad():
    for images, labels in trainloader:
        images, labels = images.to(device), labels.to(device)
        output = model(images)
        _, predicted = torch.max(output.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f'Accuracy on Perturbed Dataset: {100 * correct / total:.2f}%')

Accuracy on Perturbed Dataset: 100.00%


In [17]:
# Clean up the dataset directory
shutil.rmtree('./data')  # This will delete the 'data' directory and its contents
print("Dataset deleted successfully.")

Dataset deleted successfully.
