# Extract CLIP Weights for JAX

Convert PyTorch CLIP (ViT-B/32) to numpy arrays for JAX

## 1. Install Dependencies

In [None]:
%pip install -q torch torchvision

%pip install -q git+https://github.com/openai/CLIP.git

%pip install -q numpy pillow

## 2. Mount Google Drive

In [None]:
import os
from google.colab import drive

drive.mount('/content/drive')

folders = [
    '/content/drive/MyDrive/hope-models/checkpoints',
    '/content/drive/MyDrive/hope-models/exports'
]

for folder in folders:
    os.makedirs(folder, exist_ok=True)

print("Drive mounted")

## 3. Import libraries

In [None]:
import clip
import torch

import numpy as np
import pickle

## 4. Load CLIP Models

Loading ViT-B/32 (base) and ViT-L/14 (large)

In [None]:
device = "cuda" if torch.cuda.is_available() else "cpu"

print(f"Device: {device}")
print("\nLoading CLIP ViT-B/32...")

clip_base, preprocess_base = clip.load("ViT-B/32", device=device)
clip_base.eval()
print("ViT-B/32 loaded")

print("\nLoading CLIP ViT-L/14...")
try:
    clip_large, preprocess_large = clip.load("ViT-L/14", device=device)
    clip_large.eval()
    print("ViT-L/14 loaded")
    has_large = True
except Exception as e:
    print(f"ViT-L/14 not available: {e}")
    has_large = False

for param in clip_base.parameters():
    param.requires_grad = False

if has_large:
    for param in clip_large.parameters():
        param.requires_grad = False

## 5. Extract Weights

Convert PyTorch tensors to numpy

In [None]:
def extract_weights(model, name):
    weights = {}
    for param_name, param in model.named_parameters():
        weights[param_name] = param.detach().cpu().numpy()
    print(f"Extracted {len(weights)} layers from {name}")
    return weights

base_weights = extract_weights(clip_base.visual, "ViT-B/32")

if has_large:
    large_weights = extract_weights(clip_large.visual, "ViT-L/14")
else:
    large_weights = None

## 6. CLIP Constants

Normalization parameters and prompts from original Hope

In [None]:
CLIP_MEAN = np.array([0.48145466, 0.4578275, 0.40821073], dtype=np.float32)
CLIP_STD = np.array([0.26862954, 0.26130258, 0.27577711], dtype=np.float32)
CLIP_INPUT_SIZE = 224

CHAOS_PROMPTS = [
    "completely destroyed corrupted visual data",
    "incomprehensible chaotic noise patterns",
    "severely distorted unrecognizable imagery",
    "broken fragmented visual information",
    "extreme digital corruption and artifacts",
    "meaningless random pixel arrangements",
    "visual chaos with no coherent structure",
    "utterly corrupted incomprehensible forms",
]

NORMAL_PROMPTS = [
    "a clear image",
    "a recognizable picture",
    "coherent visual content",
]

GLAZE_STYLE_PRESETS = {
    'Abstract': "abstract expressionist painting with chaotic brushstrokes",
    'Impressionist': "soft impressionist artwork with gentle light",
    'Cubist': "geometric cubist painting with fragmented forms",
    'Sketch': "rough pencil sketch with loose lines",
    'Watercolor': "delicate watercolor painting with flowing colors",
}

GLAZE_SOURCE_PROMPT = "realistic photograph with natural lighting"

NIGHTSHADE_TARGET_PROMPTS = {
    'Dog': "a photo of a dog",
    'Cat': "a photo of a cat",
    'Car': "a photo of a car",
    'Landscape': "a landscape photograph",
    'Person': "a photo of a person",
    'Building': "a photo of a building",
    'Food': "a photo of food",
    'Abstract': "abstract digital art",
}

NIGHTSHADE_GENERIC_PROMPT = "a clear photograph"

print(f"Input size: {CLIP_INPUT_SIZE}x{CLIP_INPUT_SIZE}")
print(f"Mean: {CLIP_MEAN}")
print(f"Std: {CLIP_STD}")

print(f"\nNoise algorithm:")
print(f"  Chaos prompts: {len(CHAOS_PROMPTS)}")
print(f"  Normal prompts: {len(NORMAL_PROMPTS)}")

print(f"\nGlaze algorithm:")
print(f"  Style presets: {list(GLAZE_STYLE_PRESETS.keys())}")
print(f"  Source prompt: '{GLAZE_SOURCE_PROMPT}'")

print(f"\nNightshade algorithm:")
print(f"  Target prompts: {list(NIGHTSHADE_TARGET_PROMPTS.keys())}")
print(f"  Generic prompt: '{NIGHTSHADE_GENERIC_PROMPT}'")

## 7. Extract Text Embeddings

Pre-compute embeddings for chaos and normal prompts

In [None]:
print("Extracting Noise algorithm embeddings...")

chaos_tokens = clip.tokenize(CHAOS_PROMPTS).to(device)
normal_tokens = clip.tokenize(NORMAL_PROMPTS).to(device)

with torch.no_grad():
    chaos_base = clip_base.encode_text(chaos_tokens)
    chaos_base = chaos_base / chaos_base.norm(dim=-1, keepdim=True)

    normal_base = clip_base.encode_text(normal_tokens)
    normal_base = normal_base / normal_base.norm(dim=-1, keepdim=True)

    if has_large:
        chaos_large = clip_large.encode_text(chaos_tokens)
        chaos_large = chaos_large / chaos_large.norm(dim=-1, keepdim=True)

        normal_large = clip_large.encode_text(normal_tokens)
        normal_large = normal_large / normal_large.norm(dim=-1, keepdim=True)
    else:
        chaos_large = None
        normal_large = None

print(f"  Chaos embeddings (base): {chaos_base.shape}")
print(f"  Normal embeddings (base): {normal_base.shape}")
if has_large:
    print(f"  Chaos embeddings (large): {chaos_large.shape}")
    print(f"  Normal embeddings (large): {normal_large.shape}")

print("\nExtracting Glaze algorithm embeddings...")

glaze_style_embeddings_base = {}
for style_name, style_prompt in GLAZE_STYLE_PRESETS.items():
    tokens = clip.tokenize([style_prompt]).to(device)
    with torch.no_grad():
        features = clip_base.encode_text(tokens)
        features = features / features.norm(dim=-1, keepdim=True)
    glaze_style_embeddings_base[style_name] = features.cpu().numpy()[0]
    print(f"  {style_name}: {glaze_style_embeddings_base[style_name].shape}")

tokens = clip.tokenize([GLAZE_SOURCE_PROMPT]).to(device)
with torch.no_grad():
    features = clip_base.encode_text(tokens)
    features = features / features.norm(dim=-1, keepdim=True)
glaze_source_emb_base = features.cpu().numpy()[0]
print(f"  Source (realistic): {glaze_source_emb_base.shape}")

if has_large:
    glaze_style_embeddings_large = {}
    for style_name, style_prompt in GLAZE_STYLE_PRESETS.items():
        tokens = clip.tokenize([style_prompt]).to(device)
        with torch.no_grad():
            features = clip_large.encode_text(tokens)
            features = features / features.norm(dim=-1, keepdim=True)
        glaze_style_embeddings_large[style_name] = features.cpu().numpy()[0]

    tokens = clip.tokenize([GLAZE_SOURCE_PROMPT]).to(device)
    with torch.no_grad():
        features = clip_large.encode_text(tokens)
        features = features / features.norm(dim=-1, keepdim=True)
    glaze_source_emb_large = features.cpu().numpy()[0]
else:
    glaze_style_embeddings_large = None
    glaze_source_emb_large = None

print("\nExtracting Nightshade algorithm embeddings...")

nightshade_target_embeddings_base = {}
for target_name, target_prompt in NIGHTSHADE_TARGET_PROMPTS.items():
    tokens = clip.tokenize([target_prompt]).to(device)
    with torch.no_grad():
        features = clip_base.encode_text(tokens)
        features = features / features.norm(dim=-1, keepdim=True)
    nightshade_target_embeddings_base[target_name] = features.cpu().numpy()[0]
    print(f"  {target_name}: {nightshade_target_embeddings_base[target_name].shape}")

tokens = clip.tokenize([NIGHTSHADE_GENERIC_PROMPT]).to(device)
with torch.no_grad():
    features = clip_base.encode_text(tokens)
    features = features / features.norm(dim=-1, keepdim=True)
nightshade_generic_emb_base = features.cpu().numpy()[0]
print(f"  Generic: {nightshade_generic_emb_base.shape}")

if has_large:
    nightshade_target_embeddings_large = {}
    for target_name, target_prompt in NIGHTSHADE_TARGET_PROMPTS.items():
        tokens = clip.tokenize([target_prompt]).to(device)
        with torch.no_grad():
            features = clip_large.encode_text(tokens)
            features = features / features.norm(dim=-1, keepdim=True)
        nightshade_target_embeddings_large[target_name] = features.cpu().numpy()[0]

    tokens = clip.tokenize([NIGHTSHADE_GENERIC_PROMPT]).to(device)
    with torch.no_grad():
        features = clip_large.encode_text(tokens)
        features = features / features.norm(dim=-1, keepdim=True)
    nightshade_generic_emb_large = features.cpu().numpy()[0]
else:
    nightshade_target_embeddings_large = None
    nightshade_generic_emb_large = None

print("\nAll embeddings extracted successfully!")

## 8. Save to Drive

In [None]:
save_path = '/content/drive/MyDrive/hope-models/checkpoints/clip_data.pkl'

data = {
    'base_weights': base_weights,
    'large_weights': large_weights,

    'clip_mean': CLIP_MEAN,
    'clip_std': CLIP_STD,
    'clip_input_size': CLIP_INPUT_SIZE,

    'chaos_embeddings_base': chaos_base.cpu().numpy(),
    'normal_embeddings_base': normal_base.cpu().numpy(),
    'chaos_embeddings_large': chaos_large.cpu().numpy() if has_large else None,
    'normal_embeddings_large': normal_large.cpu().numpy() if has_large else None,
    'chaos_prompts': CHAOS_PROMPTS,
    'normal_prompts': NORMAL_PROMPTS,

    'glaze_style_embeddings_base': glaze_style_embeddings_base,
    'glaze_style_embeddings_large': glaze_style_embeddings_large,
    'glaze_source_emb_base': glaze_source_emb_base,
    'glaze_source_emb_large': glaze_source_emb_large,
    'glaze_style_presets': GLAZE_STYLE_PRESETS,
    'glaze_source_prompt': GLAZE_SOURCE_PROMPT,

    'nightshade_target_embeddings_base': nightshade_target_embeddings_base,
    'nightshade_target_embeddings_large': nightshade_target_embeddings_large,
    'nightshade_generic_emb_base': nightshade_generic_emb_base,
    'nightshade_generic_emb_large': nightshade_generic_emb_large,
    'nightshade_target_prompts': NIGHTSHADE_TARGET_PROMPTS,
    'nightshade_generic_prompt': NIGHTSHADE_GENERIC_PROMPT,
}

with open(save_path, 'wb') as f:
    pickle.dump(data, f)

file_size = os.path.getsize(save_path) / (1024**2)
print(f"Saved to: {save_path}")
print(f"Size: {file_size:.2f} MB")

## 9. Verify Data

In [None]:
with open(save_path, 'rb') as f:
    loaded = pickle.load(f)

print("Loaded successfully\n")
print("=" * 60)

categories = {
    'Model Weights': ['base_weights', 'large_weights'],
    'CLIP Constants': ['clip_mean', 'clip_std', 'clip_input_size'],
    'Noise Algorithm': ['chaos_embeddings_base', 'normal_embeddings_base',
                        'chaos_embeddings_large', 'normal_embeddings_large',
                        'chaos_prompts', 'normal_prompts'],
    'Glaze Algorithm': ['glaze_style_embeddings_base', 'glaze_style_embeddings_large',
                        'glaze_source_emb_base', 'glaze_source_emb_large',
                        'glaze_style_presets', 'glaze_source_prompt'],
    'Nightshade Algorithm': ['nightshade_target_embeddings_base', 'nightshade_target_embeddings_large',
                             'nightshade_generic_emb_base', 'nightshade_generic_emb_large',
                             'nightshade_target_prompts', 'nightshade_generic_prompt'],
}

for category, keys in categories.items():
    print(f"\n{category}:")
    for key in keys:
        if key not in loaded:
            continue
        value = loaded[key]
        if value is None:
            print(f"  {key}: None")
        elif isinstance(value, np.ndarray):
            print(f"  {key}: shape {value.shape}")
        elif isinstance(value, dict):
            if all(isinstance(v, np.ndarray) for v in value.values()):
                print(f"  {key}: {len(value)} embeddings")
                for k, v in value.items():
                    print(f"    - {k}: {v.shape}")
            else:
                print(f"  {key}: {len(value)} items")
        elif isinstance(value, list):
            print(f"  {key}: {len(value)} items")
        elif isinstance(value, str):
            print(f"  {key}: '{value}'")
        else:
            print(f"  {key}: {value}")

print("\n" + "=" * 60)
print("Verification complete!")

## Complete

**Extracted:**
- ViT-B/32 weights
- ViT-L/14 weights (if available)

**Noise Algorithm:**
- Chaos embeddings (8 prompts)
- Normal embeddings (3 prompts)

**Glaze Algorithm:**
- Style embeddings (5 presets: Abstract, Impressionist, Cubist, Sketch, Watercolor)
- Source style embedding (realistic photograph)

**Nightshade Algorithm:**
- Target embeddings (8 targets: Dog, Cat, Car, Landscape, Person, Building, Food, Abstract)
- Generic embedding (clear photograph)

**Saved:** `/content/drive/MyDrive/hope-models/checkpoints/clip_data.pkl`

**Next:** Run `2_noise_algorithm.ipynb`, `3_glaze_algorithm.ipynb`, or `4_nightshade_algorithm.ipynb`