In [5]:
import torch
import torchvision.transforms as T
from torchvision.models import resnet50
from PIL import Image
import clip
import numpy as np
from sklearn.manifold import TSNE
import matplotlib.pyplot as plt


device = "cuda" if torch.cuda.is_available() else "cpu"

# Load models
import ssl
ssl._create_default_https_context = ssl._create_unverified_context

clip_model, preprocess_clip = clip.load("ViT-B/32", device=device)

def generate_cloak(image_pil, epsilon=0.003):
    # Resize and convert to tensor
    transform_to_tensor = T.Compose([
        T.Resize((224, 224)),      # Required for CLIP
        T.ToTensor(),
    ])
    image_tensor = transform_to_tensor(image_pil).unsqueeze(0).to(device)
    
    # Clone and normalize with grad enabled
    image_clip = image_tensor.clone().detach().requires_grad_(True)
    normalize_clip = T.Normalize(mean=(0.4815, 0.4578, 0.4082), std=(0.2686, 0.2613, 0.2758))
    image_clip_norm = normalize_clip(image_clip)

    # Text prompts
    text_prompt = clip.tokenize(["a painting of a cat", "an abstract landscape"]).to(device)
    text_features = clip_model.encode_text(text_prompt).mean(dim=0, keepdim=True)

    # CLIP forward and loss
    image_features = clip_model.encode_image(image_clip_norm)
    clip_loss = -torch.cosine_similarity(image_features, text_features).mean()
    clip_loss.backward()

    # Cosine similarity of the original image and text (before perturbation)
    original_image_features = clip_model.encode_image(image_clip_norm)
    original_image_features = original_image_features / original_image_features.norm(dim=-1, keepdim=True)
    text_features = text_features / text_features.norm(dim=-1, keepdim=True)
    original_cosine_similarity_score = torch.cosine_similarity(original_image_features, text_features).item()
    print(f"Original Cosine Similarity Score: {original_cosine_similarity_score}")

    # FGSM perturbation
    perturbed = image_clip + epsilon * image_clip.grad.data.sign()
    perturbed = torch.clamp(perturbed, 0, 1)

    # Cosine similarity of the perturbed image and text
    perturbed_image_features = clip_model.encode_image(normalize_clip(perturbed))
    perturbed_image_features = perturbed_image_features / perturbed_image_features.norm(dim=-1, keepdim=True)
    cosine_similarity_score = torch.cosine_similarity(perturbed_image_features, original_image_features).item()
    print(f"Perturbed Cosine Similarity Score: {cosine_similarity_score}")
    
    embeddings = [perturbed_image_features, original_image_features]
    embeddings = torch.cat([perturbed_image_features, original_image_features], dim=0)
    reduced = TSNE(n_components=2).fit_transform(embeddings.detach().cpu().numpy())
    plt.scatter(reduced[:, 0], reduced[:, 1])
    # Convert back to PIL
    cloaked_img = T.ToPILImage()(perturbed.squeeze().detach().cpu())
    return cloaked_img




In [6]:
from PIL import Image
import os

# Import the generate_cloak function from the module (assuming it's in cloak.py)
# from cloak import generate_cloak

# Paste the generate_cloak function code here if not using as a module.

def cloak_and_save(input_path, output_path):
    # Load input image
    image = Image.open(input_path).convert("RGB")
    
    # Cloak the image
    cloaked_image = generate_cloak(image, 0.001)
    
    # Save the cloaked image
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    cloaked_image.save(output_path)

    print(f"Cloaked image saved to {output_path}")

# === Example ===
if __name__ == "__main__":
    input_art_path = "image.png"      # Your input image path
    output_cloak_path = "cloaked_output/art_cloaked.jpg"  # Output path

    cloak_and_save(input_art_path, output_cloak_path)


Original Cosine Similarity Score: 0.188808873295784
Perturbed Cosine Similarity Score: 0.9323042631149292


ValueError: perplexity must be less than n_samples

In [None]:

# embeddings: [original_embedding, cloaked_embedding, ...reference_embeddings]

