## Setup

In [21]:
DATA_PATH = "/content/drive/MyDrive/IDL Image Generation/data"
#OUTPUT_PATH = "/content/drive/MyDrive/IDL Image Generation/images/classic_pgd_outputs"
OUTPUT_PATH = "/content/drive/MyDrive/IDL Image Generation/images/attn_pgd_outputs"
MODEL_PATH = "./CLIP_discriminator.pt"
GENERATORS = [
    "dalle",
    "openjourney",
    "stable_diff",
    "openjourney_v4",
    "titan"
]
BATCH_SIZE = 2

EXPERIMENT_MODE = "attn" # "pgd" or "patch" or "attn"

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
!pip install torchattacks --quiet
!pip install transformers --quiet


In [22]:
%cd "/content/drive/MyDrive/IDL Image Generation"

/content/drive/.shortcut-targets-by-id/1SUnyLWY7LvpxPNxyFvip9Ae4S39ePRqJ/IDL Image Generation


In [23]:
import os
import ast
import csv

import numpy as np
import pandas as pd

from PIL import Image

import torch
from torch.utils.data import Dataset, DataLoader
from transformers import CLIPProcessor, CLIPModel

from AttentionPatchExtractor import AttentionPatchExtractor
from CLIP_patch_pgd import CLIPPatchPGDAttack

from sklearn.svm import SVC
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score,
    precision_score,
    recall_score,
    f1_score,
    roc_auc_score,
    average_precision_score,
)

from torchattacks import PGD
from torchattacks.attack import Attack

import torchvision

from tqdm import tqdm

os.makedirs(OUTPUT_PATH, exist_ok=True)

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
DEVICE

'cuda'

## Discriminator

In [24]:
class CLIPSVMDiscriminator(torch.nn.Module):
    """def __init__(self, model_name="openai/clip-vit-base-patch32", device=None):
        self.device = (
            device
            if device is not None
            else torch.device("cuda" if torch.cuda.is_available() else "cpu")
        )
        print("Running on:", self.device)
        self.model = CLIPModel.from_pretrained(model_name)
        self.processor = CLIPProcessor.from_pretrained(model_name)
        self.model.to(self.device)
        self.model.eval()
        self.svm = SVC(kernel="linear", C=1.0, probability=True)
        self.svm_trained = False"""
    def __init__(self, model_name="openai/clip-vit-base-patch32", device=None):
        super().__init__()
        self.device = (
            device
            if device is not None
            else torch.device("cuda" if torch.cuda.is_available() else "cpu")
        )
        print("Running on:", self.device)
        self.model = CLIPModel.from_pretrained(model_name)
        self.processor = CLIPProcessor.from_pretrained(model_name)
        self.model.to(self.device)
        self.model.eval()
        self.svm = SVC(kernel="linear", C=1.0, probability=True)
        self.svm_trained = False
    def run_clip(self, imgs):
        # inputs = self.processor(images=imgs, return_tensors="pt").to(self.device)
        with torch.no_grad():
            outputs = self.model.vision_model(imgs)
            image_features = outputs.last_hidden_state[:, 0, :]
            image_features = image_features / image_features.norm(dim=-1, keepdim=True)
            return image_features.squeeze().cpu().numpy()

    def train_svm(self, X_train, y_train):
        self.svm.fit(X_train, y_train)
        self.svm_trained = True
        train_accuracy = self.svm.score(X_train, y_train)
        print(f"Training accuracy for discriminator: {train_accuracy:.4f}")
        return self.svm

    def predict_from_embeddings(self, embeddings):
        preds = self.svm.predict(embeddings)
        probs = self.svm.predict_proba(embeddings)[:, 1]
        return preds, probs

    def evaluate(self, X_test, y_test):
        model = self.svm
        y_pred = model.predict(X_test)
        y_pred_proba = model.predict_proba(X_test)[:, 1]
        accuracy = accuracy_score(y_test, y_pred)
        precision = precision_score(y_test, y_pred, average="weighted")
        recall = recall_score(y_test, y_pred, average="weighted")
        f1 = f1_score(y_test, y_pred, average="weighted")
        auc = roc_auc_score(y_test, y_pred_proba)
        ap_per_class = []
        for class_label in np.unique(y_test):
            y_test_binary = (y_test == class_label).astype(int)
            ap = average_precision_score(y_test_binary, y_pred_proba)
            ap_per_class.append(ap)
        map_score = np.mean(ap_per_class)
        print(f"Accuracy: {accuracy:.4f}")
        print(f"Precision: {precision:.4f}")
        print(f"Recall: {recall:.4f}")
        print(f"F1 Score: {f1:.4f}")
        print(f"AUC: {auc:.4f}")
        print(f"mAP: {map_score:.4f}")
        return {
            "accuracy": accuracy,
            "precision": precision,
            "recall": recall,
            "f1": f1,
            "auc": auc,
            "map": map_score,
        }

In [25]:
model: CLIPSVMDiscriminator = torch.load(
    MODEL_PATH, weights_only=False, map_location=DEVICE
)

In [29]:
# model.model

## Dataloader

In [26]:
class ArtEmbeddingDataset(Dataset):
    def __init__(self, ai_only=False):
        self.transform = torchvision.transforms.Resize((224, 224))
        self.image_info = {}
        for directory in GENERATORS:
            if not os.path.exists(os.path.join(DATA_PATH, directory)):
                print(f"{directory} does not exist. Skipping.")
                continue
            print(f"{directory} has {len(os.listdir(os.path.join(DATA_PATH, directory)))} images.")
            for filepath in tqdm(os.listdir(os.path.join(DATA_PATH, directory)), desc="Loading "+ directory):
                full_path = os.path.join(DATA_PATH, directory, filepath)
                if full_path.endswith(".png") or full_path.endswith(".jpg"):
                    id_idx = filepath.rfind('_') + 1
                    id = filepath[id_idx:-4]
                    label = 1 if directory == "real" else 0
                    self.image_info[full_path] = {
                        "generator": directory,
                        "label": label, # 0 = fake, 1 = real
                        # "id": id,
                        # "data": torchvision.io.read_image(full_path),
                    }
        self.paths = list(self.image_info.keys())

    def preprocess_image(self, image_path):
        # Load the image to torch
        image = torchvision.io.read_image(image_path)
        # image = self.transform(image)
        # image = image.float() / 255.0  # Normalize to [0, 1]
        return image

    def __len__(self):
        return len(self.image_info)

    def __getitem__(self, idx):
        filepath = self.paths[idx]
        label = self.image_info[filepath]["label"]
        image = torchvision.io.read_image(filepath) #.convert("RGB")
        # image = self.transform(image)
        # image = image.float() / 255.0

        sample = {"filepath": filepath, "data": image, "label": label}

        return sample

In [27]:
dataset = ArtEmbeddingDataset(ai_only=True)
len(dataset)

dalle has 435 images.


Loading dalle: 100%|██████████| 435/435 [00:00<00:00, 320597.83it/s]


openjourney has 3376 images.


Loading openjourney: 100%|██████████| 3376/3376 [00:00<00:00, 371321.40it/s]


stable_diff has 3345 images.


Loading stable_diff: 100%|██████████| 3345/3345 [00:00<00:00, 415554.38it/s]


openjourney_v4 has 3465 images.


Loading openjourney_v4: 100%|██████████| 3465/3465 [00:00<00:00, 434477.23it/s]


titan has 2058 images.


Loading titan: 100%|██████████| 2058/2058 [00:00<00:00, 407048.84it/s]


12679

In [28]:
TRANSFORM = torchvision.transforms.Resize((224, 224))

def collate_fn(batch):
    images = [item["data"] for item in batch]
    labels = [item["label"] for item in batch]
    paths = [item["filepath"] for item in batch]
    labels = torch.tensor(labels)

    images = torch.stack(images)
    images = images.float() / 255.0  # Normalize to [0, 1]
    images = TRANSFORM(images)

    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    return images, labels, paths

In [29]:
dataloader = DataLoader(
    dataset=dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    collate_fn=collate_fn,
)

## Attacks

In [30]:
import torchvision.utils as vutils


class CLIPPGDAttack(PGD):
    def __init__(self, model, svm, eps=8 / 255, alpha=2 / 255, steps=10, random_start=True ):
        super().__init__(model, eps, alpha, steps, random_start)
        device = self.device
        self.svm_weights = torch.FloatTensor(svm.coef_[0]).to(device)
        self.svm_bias = torch.tensor(svm.intercept_[0],dtype=torch.float32, device=device)
        # self.register_buffer('svm_weights', self.svm_weights)
        # self.register_buffer('svm_bias', self.svm_bias)


    def get_logits(self, inputs):
        if self._normalization_applied is False:
            inputs = self.normalize(inputs)

        # Get image features from the vision model
        vision_outputs = self.model.vision_model(inputs)
        image_features = vision_outputs.last_hidden_state[:, 0, :]

        return image_features

    def svm_boundary_loss(self, clip_embedding):
        # Distance to decision boundary (negative = wrong side)
        if self.svm_weights is None or self.svm_bias is None:
            raise ValueError("SVM weights and bias not set. Call set_svm_params() first.")

        distance = torch.matmul(clip_embedding, self.svm_weights) + self.svm_bias
        # Loss is higher when distance is positive (correct classification)
        return -distance  # Maximize to cross boundary

    def forward(self, images, labels):
        """
        Override forward method to use custom loss function
        """
        images = images.clone().detach().to(self.device)

        adv_images = images.clone().detach()

        if self.random_start:
            # Starting at a uniformly random point
            adv_images = adv_images + torch.empty_like(adv_images).uniform_(-self.eps, self.eps)
            adv_images = torch.clamp(adv_images, min=0, max=1).detach()

        for _ in range(self.steps):
            adv_images.requires_grad = True

            # Forward pass
            outputs = self.get_logits(adv_images)

            # Calculate loss using our custom SVM boundary loss
            loss = self.svm_boundary_loss(outputs).mean()
            # print(f"Loss: {loss.item():.6f}")

            # Backward pass
            grad = torch.autograd.grad(loss, adv_images,
                                      retain_graph=False, create_graph=False)[0]

            adv_images = adv_images.detach() - self.alpha * grad.sign()
            delta = torch.clamp(adv_images - images, min=-self.eps, max=self.eps)
            adv_images = torch.clamp(images + delta, min=0, max=1).detach()

        return adv_images


In [31]:
"""if EXPERIMENT_MODE == "pgd":
    attack = CLIPPGDAttack(model.model, model.svm, eps=8 / 255, alpha=2 / 255, steps=10, random_start=True)
elif EXPERIMENT_MODE == "patch":
    attack = CLIPPatchPGDAttack(model.model, model.svm, eps=8 / 255, alpha=2 / 255, steps=10, patch_selection_strategy= 'grad', random_start=True)
elif EXPERIMENT_MODE == "attn":
    attack = CLIPPatchPGDAttack(model.model, model.svm, eps=8 / 255, alpha=2 / 255, steps=10, patch_selection_strategy= 'attention', random_start=True)
else:
    raise ValueError(f"Invalid experiment mode:", EXPERIMENT_MODE)"""
if EXPERIMENT_MODE == "pgd":
    attack = CLIPPGDAttack(model.model, model.svm, eps=8 / 255, alpha=2 / 255, steps=20, patch_size=32, random_start=True)
elif EXPERIMENT_MODE == "patch":
    attack = CLIPPatchPGDAttack(model.model, model.svm, eps=.3, alpha=2 / 255, steps=20, patch_size=32, num_patches = 5, patch_selection_strategy= 'grad', random_start=True)
elif EXPERIMENT_MODE == "attn":
    attack = CLIPPatchPGDAttack(model.model, model.svm, eps=.3, alpha=2 / 255, steps=20, patch_size=32, num_patches = 5, patch_selection_strategy= 'attention', random_start=True)
else:
    raise ValueError(f"Invalid experiment mode:", EXPERIMENT_MODE)

## Generate Adversarial Images

In [32]:
for i, (images, labels, paths) in enumerate(tqdm(dataloader)):
    images = images.to(DEVICE)
    labels = labels.to(DEVICE)

    # Generate adversarial examples
    adv_images = attack(images, labels)
    # print(len(adv_images))
    # print(len(images))

    # Save the adversarial images
    for j in range(len(images)):
        image_path = paths[j]
        basename = os.path.basename(image_path)
        adv_image_path = os.path.join(OUTPUT_PATH, f"adv_{EXPERIMENT_MODE}_{basename}")
        torchvision.utils.save_image(adv_images[j], adv_image_path)

100%|██████████| 6340/6340 [1:14:23<00:00,  1.42it/s]


## Test Attack Effectiveness

In [33]:
###FOR ATTN PGD
# Replace the Test Attack Effectiveness section with this batch processing approach

# Define batch size for testing
TEST_BATCH_SIZE = 16  # Adjust based on your GPU memory

# Create lists to store all embeddings and labels
all_clip_embeddings = []
all_labels = []

# Process adversarial images in batches
adv_images = []
adv_labels = []
for file in tqdm(os.listdir(OUTPUT_PATH), desc="Loading generated images"):
    adv_images.append(torchvision.io.read_image(os.path.join(OUTPUT_PATH, file)))
    adv_labels.append(0)

# Process in batches
for i in range(0, len(adv_images), TEST_BATCH_SIZE):
    batch_images = adv_images[i:i+TEST_BATCH_SIZE]
    batch_labels = adv_labels[i:i+TEST_BATCH_SIZE]

    # Stack and preprocess
    batch_images = torch.stack(batch_images).to(DEVICE)
    batch_images = TRANSFORM(batch_images)
    batch_images = batch_images.float() / 255.0

    # Get embeddings
    with torch.no_grad():
        batch_embeddings = model.run_clip(batch_images)

    # Store results (move to CPU to free GPU memory)
    all_clip_embeddings.append(batch_embeddings)
    all_labels.extend(batch_labels)

    # Clear cache
    torch.cuda.empty_cache()

    # Move processed images off memory
    del batch_images
    torch.cuda.empty_cache()

# Process real images in batches
real_images = []
real_labels = []
for file in tqdm(os.listdir(os.path.join(DATA_PATH, "real")), desc="Loading real images"):
    real_images.append(torchvision.io.read_image(os.path.join(DATA_PATH, "real", file)))
    real_labels.append(1)

# Process in batches
for i in range(0, len(real_images), TEST_BATCH_SIZE):
    batch_images = real_images[i:i+TEST_BATCH_SIZE]
    batch_labels = real_labels[i:i+TEST_BATCH_SIZE]

    # Stack and preprocess
    batch_images = torch.stack(batch_images).to(DEVICE)
    batch_images = TRANSFORM(batch_images)
    batch_images = batch_images.float() / 255.0

    # Get embeddings
    with torch.no_grad():
        batch_embeddings = model.run_clip(batch_images)

    # Store results (move to CPU to free GPU memory)
    all_clip_embeddings.append(batch_embeddings)
    all_labels.extend(batch_labels)

    # Clear cache
    torch.cuda.empty_cache()

    # Move processed images off memory
    del batch_images
    torch.cuda.empty_cache()

# Combine all embeddings and evaluate
clip_embeddings = np.vstack(all_clip_embeddings)
print(clip_embeddings.shape)

# Evaluate
model.evaluate(clip_embeddings, all_labels)

Loading generated images: 100%|██████████| 12679/12679 [02:37<00:00, 80.68it/s] 
Loading real images: 100%|██████████| 3633/3633 [00:38<00:00, 94.69it/s] 


(16312, 768)
Accuracy: 0.4342
Precision: 0.7338
Recall: 0.4342
F1 Score: 0.4518
AUC: 0.6351
mAP: 0.5116


{'accuracy': 0.4341589014222658,
 'precision': 0.7338212883171638,
 'recall': 0.4341589014222658,
 'f1': 0.45181728329871035,
 'auc': np.float64(0.6351374548233675),
 'map': np.float64(0.511593211857198)}

In [13]:
###FOR CLASSIC PGD
# Replace the Test Attack Effectiveness section with this batch processing approach

# Define batch size for testing
TEST_BATCH_SIZE = 16  # Adjust based on your GPU memory

# Create lists to store all embeddings and labels
all_clip_embeddings = []
all_labels = []

# Process adversarial images in batches
adv_images = []
adv_labels = []
for file in tqdm(os.listdir(OUTPUT_PATH), desc="Loading generated images"):
    adv_images.append(torchvision.io.read_image(os.path.join(OUTPUT_PATH, file)))
    adv_labels.append(0)

# Process in batches
for i in range(0, len(adv_images), TEST_BATCH_SIZE):
    batch_images = adv_images[i:i+TEST_BATCH_SIZE]
    batch_labels = adv_labels[i:i+TEST_BATCH_SIZE]

    # Stack and preprocess
    batch_images = torch.stack(batch_images).to(DEVICE)
    batch_images = TRANSFORM(batch_images)
    batch_images = batch_images.float() / 255.0

    # Get embeddings
    with torch.no_grad():
        batch_embeddings = model.run_clip(batch_images)

    # Store results (move to CPU to free GPU memory)
    all_clip_embeddings.append(batch_embeddings)
    all_labels.extend(batch_labels)

    # Clear cache
    torch.cuda.empty_cache()

    # Move processed images off memory
    del batch_images
    torch.cuda.empty_cache()

# Process real images in batches
real_images = []
real_labels = []
for file in tqdm(os.listdir(os.path.join(DATA_PATH, "real")), desc="Loading real images"):
    real_images.append(torchvision.io.read_image(os.path.join(DATA_PATH, "real", file)))
    real_labels.append(1)

# Process in batches
for i in range(0, len(real_images), TEST_BATCH_SIZE):
    batch_images = real_images[i:i+TEST_BATCH_SIZE]
    batch_labels = real_labels[i:i+TEST_BATCH_SIZE]

    # Stack and preprocess
    batch_images = torch.stack(batch_images).to(DEVICE)
    batch_images = TRANSFORM(batch_images)
    batch_images = batch_images.float() / 255.0

    # Get embeddings
    with torch.no_grad():
        batch_embeddings = model.run_clip(batch_images)

    # Store results (move to CPU to free GPU memory)
    all_clip_embeddings.append(batch_embeddings)
    all_labels.extend(batch_labels)

    # Clear cache
    torch.cuda.empty_cache()

    # Move processed images off memory
    del batch_images
    torch.cuda.empty_cache()

# Combine all embeddings and evaluate
clip_embeddings = np.vstack(all_clip_embeddings)
print(clip_embeddings.shape)

# Evaluate
model.evaluate(clip_embeddings, all_labels)

Loading generated images: 100%|██████████| 12679/12679 [02:06<00:00, 100.44it/s]
Loading real images: 100%|██████████| 3633/3633 [00:36<00:00, 100.37it/s]


(16312, 768)
Accuracy: 0.1854
Precision: 0.0429
Recall: 0.1854
F1 Score: 0.0697
AUC: 0.0110
mAP: 0.5588


{'accuracy': 0.18544629720451203,
 'precision': 0.04290157907182834,
 'recall': 0.18544629720451203,
 'f1': 0.0696826185803374,
 'auc': np.float64(0.011045418921169952),
 'map': np.float64(0.5587876268380603)}

In [14]:
'''images = []
labels = []
for file in tqdm(os.listdir(OUTPUT_PATH), desc="Loading generated images"):
    images.append(torchvision.io.read_image(os.path.join(OUTPUT_PATH, file)))
    labels.append(0)
for file in tqdm(os.listdir(os.path.join(DATA_PATH, "real")), desc="Loading real images"):
    images.append(TRANSFORM(torchvision.io.read_image(os.path.join(DATA_PATH, "real", file))))
    labels.append(1)


images = torch.stack(images).to(DEVICE)
images = images.float() / 255.0  # Normalize to [0, 1]

clip_embeddings = model.run_clip(images)
print(clip_embeddings.shape)

model.evaluate(clip_embeddings, labels)'''

'images = []\nlabels = []\nfor file in tqdm(os.listdir(OUTPUT_PATH), desc="Loading generated images"):\n    images.append(torchvision.io.read_image(os.path.join(OUTPUT_PATH, file)))\n    labels.append(0)\nfor file in tqdm(os.listdir(os.path.join(DATA_PATH, "real")), desc="Loading real images"):\n    images.append(TRANSFORM(torchvision.io.read_image(os.path.join(DATA_PATH, "real", file))))\n    labels.append(1)\n\n\nimages = torch.stack(images).to(DEVICE)\nimages = images.float() / 255.0  # Normalize to [0, 1]\n\nclip_embeddings = model.run_clip(images)\nprint(clip_embeddings.shape)\n\nmodel.evaluate(clip_embeddings, labels)'

In [34]:
images = []
labels = []
adv_files = os.listdir(OUTPUT_PATH)
for file in tqdm(adv_files, desc="Loading generated images"):
    images.append(torchvision.io.read_image(os.path.join(OUTPUT_PATH, file)))
    labels.append(0)
for file in tqdm(os.listdir(os.path.join(DATA_PATH, "real")), desc="Loading real images"):
    images.append(TRANSFORM(torchvision.io.read_image(os.path.join(DATA_PATH, "real", file))))
    labels.append(1)

images = torch.stack(images).to(DEVICE)
images = images.float() / 255.0  # Normalize to [0, 1]

Loading generated images: 100%|██████████| 12679/12679 [02:02<00:00, 103.52it/s]
Loading real images: 100%|██████████| 3633/3633 [00:41<00:00, 86.74it/s] 


In [35]:
attn_file_paths = os.listdir(OUTPUT_PATH) + os.listdir(os.path.join(DATA_PATH, "real"))
ids = [(f.split('.jpg')[0]).split('_')[-1] for f in attn_file_paths]

generator = []
for f in attn_file_paths:
  try:
    x = (f.split('.jpg')[0]).split('_')[2]
    generator.append(x)
  except:
    generator.append('real')

In [36]:
clip_batch_size = 2048
all_embeddings = []

for i in range(0, len(images), clip_batch_size):
  if i + clip_batch_size > len(images):
    images_batch = images[i:]
  else:
    images_batch = images[i:i+clip_batch_size]

  clip_embeddings = model.run_clip(images_batch)
  all_embeddings.extend(clip_embeddings)

preds, _ = model.predict_from_embeddings(all_embeddings)

In [37]:
import pandas as pd

df = pd.DataFrame({
    'id': ids,
    'generator': generator,
    'embedding': all_embeddings,
    'label': labels,
    'prediction': preds,
    'attack_type': ['grad'] * len(preds)
})

df.to_csv('./attn_pgd_clip_embeddings.csv', index=False)