In [4]:
# Setup TensorFlow environment
from utils import setup_tensorflow_environment

setup_tensorflow_environment()

Available devices: [PhysicalDevice(name='/physical_device:CPU:0', device_type='CPU'), PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [5]:
import tensorflow as tf
import tensorflow_datasets as tfds
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.applications import ResNet50
from tensorflow.keras.applications.resnet50 import preprocess_input, decode_predictions
from tensorflow.keras import losses, optimizers, metrics

# Constants
IMG_SIZE = 224
BATCH_SIZE = 32
AUTOTUNE = tf.data.AUTOTUNE
EPOCHS = 3

### 1. Load dataset (using Imagenette as a stand-in for ImageNet)
def preprocess(example):
    image = tf.image.resize(example['image'], (IMG_SIZE, IMG_SIZE))
    image = preprocess_input(image)
    label = example['label']
    return image, label

train_ds, val_ds = tfds.load('imagenette/320px-v2', split=['train', 'validation'], as_supervised=False)
train_ds = train_ds.map(preprocess).shuffle(1024).batch(BATCH_SIZE).prefetch(AUTOTUNE)
val_ds = val_ds.map(preprocess).batch(BATCH_SIZE).prefetch(AUTOTUNE)


In [6]:

### 2. Load pretrained ResNet-50
base_model = ResNet50(weights='imagenet', include_top=True)

# Evaluate baseline accuracy
def evaluate(model, dataset):
    acc = metrics.SparseCategoricalAccuracy()
    for x_batch, y_batch in dataset:
        logits = model(x_batch, training=False)
        acc.update_state(y_batch, logits)
    return acc.result().numpy()

print("Evaluating baseline ResNet-50 on validation set...")
baseline_acc = evaluate(base_model, val_ds)
print(f"Baseline Accuracy: {baseline_acc:.4f}")


Evaluating baseline ResNet-50 on validation set...
Baseline Accuracy: 0.0920


2025-06-01 09:07:25.989767: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


In [7]:

### 3. PGD Attack Implementation
def pgd_attack(model, images, labels, eps=8/255, alpha=2/255, iters=10):
    adv_images = tf.identity(images)
    for i in range(iters):
        with tf.GradientTape() as tape:
            tape.watch(adv_images)
            predictions = model(adv_images, training=False)
            loss = losses.sparse_categorical_crossentropy(labels, predictions)

        grad = tape.gradient(loss, adv_images)
        adv_images = adv_images + alpha * tf.sign(grad)
        adv_images = tf.clip_by_value(adv_images, images - eps, images + eps)
        adv_images = tf.clip_by_value(adv_images, -1, 1)
    return adv_images

# Evaluate under PGD attack
def evaluate_under_attack(model, dataset):
    acc = metrics.SparseCategoricalAccuracy()
    for x_batch, y_batch in dataset:
        x_adv = pgd_attack(model, x_batch, y_batch)
        logits = model(x_adv, training=False)
        acc.update_state(y_batch, logits)
    return acc.result().numpy()

print("Evaluating model under PGD attack...")
adversarial_acc = evaluate_under_attack(base_model, val_ds)
print(f"Accuracy under PGD Attack: {adversarial_acc:.4f}")


Evaluating model under PGD attack...
Accuracy under PGD Attack: 0.0000


In [8]:
### 4. Adversarial Training
def create_adversarial_model():
    base = ResNet50(weights=None, classes=10)
    base.compile(
        optimizer=optimizers.Adam(),
        loss=losses.SparseCategoricalCrossentropy(from_logits=True),
        metrics=['accuracy']
    )
    return base

adv_model = create_adversarial_model()

# Custom training loop for adversarial training
@tf.function
def train_step(model, images, labels):
    adv_images = pgd_attack(model, images, labels)
    with tf.GradientTape() as tape:
        predictions = model(adv_images, training=True)
        loss = losses.sparse_categorical_crossentropy(labels, predictions)
        loss = tf.reduce_mean(loss)
    grads = tape.gradient(loss, model.trainable_variables)
    model.optimizer.apply_gradients(zip(grads, model.trainable_variables))
    return loss

print("Starting adversarial training...")
for epoch in range(EPOCHS):
    print(f"Epoch {epoch+1}/{EPOCHS}")
    for step, (x_batch, y_batch) in enumerate(train_ds):
        loss_value = train_step(adv_model, x_batch, y_batch)
        if step % 100 == 0:
            print(f"Step {step}, Loss: {loss_value:.4f}")

print("Evaluating adversarially trained model...")
clean_acc = evaluate(adv_model, val_ds)
adv_acc = evaluate_under_attack(adv_model, val_ds)
print(f"Adversarially Trained Model Accuracy (Clean): {clean_acc:.4f}")
print(f"Adversarially Trained Model Accuracy (PGD): {adv_acc:.4f}")


Starting adversarial training...
Epoch 1/3


2025-06-01 09:24:05.288157: I tensorflow/core/grappler/optimizers/custom_graph_optimizer_registry.cc:117] Plugin optimizer for device_type GPU is enabled.


Step 0, Loss: 3.2246
Step 100, Loss: 2.5332
Step 200, Loss: 1.9688


2025-06-01 09:48:56.275423: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence


Epoch 2/3
Step 0, Loss: 1.1709
Step 100, Loss: 1.2559
Step 200, Loss: 1.4053
Epoch 3/3
Step 0, Loss: 1.5605
Step 100, Loss: 1.5459
Step 200, Loss: 1.3496
Evaluating adversarially trained model...
Adversarially Trained Model Accuracy (Clean): 0.0071
Adversarially Trained Model Accuracy (PGD): 0.6084


2025-06-01 10:30:16.687788: I tensorflow/core/framework/local_rendezvous.cc:407] Local rendezvous is aborting with status: OUT_OF_RANGE: End of sequence
