In [1]:
# --- 0. Setup ---
!pip install diffusers accelerate transformers safetensors torch torchvision scikit-learn




In [2]:
!pip install --upgrade diffusers transformers torch
!pip install xformers

Collecting transformers
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m3.8 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.56.2-py3-none-any.whl (11.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m11.6/11.6 MB[0m [31m127.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.56.1
    Uninstalling transformers-4.56.1:
      Successfully uninstalled transformers-4.56.1
Successfully installed transformers-4.56.2
Collecting xformers
  Downloading xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl.metadata (1.1 kB)
Downloading xformers-0.0.32.post2-cp39-abi3-manylinux_2_28_x86_64.whl (117.2 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.2/117.2 MB[0m [31m20.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collecte

In [4]:
#torch.cuda.empty_cache()
!pip install accelerate



In [None]:
# --- 0. Setup ---
#!pip install diffusers accelerate transformers safetensors torch torchvision scikit-learn imageio -q

import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionPipeline
from sklearn.decomposition import TruncatedSVD
import imageio

# --- 1. Device and RNG ---
device = "cuda"
generator = torch.Generator(device=device).manual_seed(1234)
print("Using device:", device)

torch.cuda.empty_cache()

# --- 2. Load SSD-1B pipeline ---
pipe = StableDiffusionPipeline.from_pretrained(
    "segmind/SSD-1B",
    torch_dtype=torch.float16
).to(device)

pipe.enable_vae_tiling()
vae = pipe.vae

# --- 3. Encode prompt ---
prompt = "a surreal city of glass towers at sunset"
prompt_embeds = pipe.encode_prompt(prompt)
print(f"prompt_embeds shape: {prompt_embeds.shape}")

# --- 4. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64),
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:  # mid-step anchor
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 5. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 6. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 7. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 8. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 9. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12, 4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1, 6, i + 1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 10. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")


In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio
import os

# --- 1. Device and RNG ---
device = "cuda"
generator = torch.Generator(device=device).manual_seed(1234)
print("Using device:", device)

# Clear GPU memory
torch.cuda.empty_cache()

# --- 2. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
).to(device, dtype=torch.float16)

# Enable memory optimizations
pipe.enable_model_cpu_offload()  # Primary memory fix
pipe.enable_vae_tiling()
pipe.enable_xformers_memory_efficient_attention()  # Uncomment if xformers is installed
vae = pipe.vae
print("U-Net config:", pipe.unet.config)

# --- 3. Encode prompt ---
prompt = "a surreal city of glass towers at sunset"
prompt_embeds, _, pooled_embeds, _ = pipe.encode_prompt(
    prompt=prompt,
    device=device,
    num_images_per_prompt=1,
    do_classifier_free_guidance=False
)
print(f"prompt_embeds shape: {prompt_embeds.shape}")
print(f"pooled_embeds shape: {pooled_embeds.shape}")
time_ids = torch.tensor([[1024, 1024, 0, 0, 1024, 1024]], device=device, dtype=torch.float16)
print(f"time_ids shape: {time_ids.shape}")

# Concatenate pooled_embeds and time_ids for added_cond_kwargs
added_cond_embeds = torch.cat([pooled_embeds, time_ids], dim=-1)
print(f"added_cond_embeds shape: {added_cond_embeds.shape}")


# --- 4. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64), # Reduced resolution
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    print(f"Step {i}, latent_model_input shape: {latent_model_input.shape}")
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds,
        added_cond_kwargs={
            "text_embeds": added_cond_embeds, # Use the concatenated embeddings
            "time_ids": time_ids # Still need time_ids for other parts of the UNet
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 5. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 6. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 7. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 8. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 9. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12, 4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1, 6, i + 1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 10. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio
import os

# Set PyTorch memory configuration
os.environ["PYTORCH_CUDA_ALLOC_CONF"] = "expandable_segments:True"

# --- 1. Device and RNG ---
device = "cuda"
generator = torch.Generator(device=device).manual_seed(1234)
print("Using device:", device)

# Clear GPU memory
torch.cuda.empty_cache()

# --- 2. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
).to(device, dtype=torch.float16)

# Enable memory optimizations
pipe.enable_model_cpu_offload()  # Primary memory fix
pipe.enable_vae_tiling()
pipe.enable_xformers_memory_efficient_attention()  # Uncomment if xformers is installed
vae = pipe.vae
print("U-Net config:", pipe.unet.config)

# --- 3. Encode prompt ---
prompt = "a surreal city of glass towers at sunset"
prompt_embeds, _, pooled_embeds, _ = pipe.encode_prompt(
    prompt=prompt,
    device=device,
    num_images_per_prompt=1,
    do_classifier_free_guidance=False
)
print(f"prompt_embeds shape: {prompt_embeds.shape}")
print(f"pooled_embeds shape: {pooled_embeds.shape}")
time_ids = torch.tensor([[1024, 1024, 0, 0, 1024, 1024]], device=device, dtype=torch.float16)
print(f"time_ids shape: {time_ids.shape}")

# --- 4. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64), # Reduced resolution
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    print(f"Step {i}, latent_model_input shape: {latent_model_input.shape}")
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds,
        added_cond_kwargs={
            "text_embeds": pooled_embeds,
            "time_ids": time_ids
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 5. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 6. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 7. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 8. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 9. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12, 4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1, 6, i + 1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 10. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio
torch.cuda.empty_cache()
# --- 1. Device and RNG ---
device = "cuda"
generator = torch.Generator(device=device).manual_seed(1234)
print("Using device:", device)

# --- 2. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae
print("U-Net config:", pipe.unet.config)

# --- 3. Encode prompt ---
prompt = "a surreal city of glass towers at sunset"

# Use pipe.encode_prompt to handle both text encoders
prompt_embeds, _, pooled_embeds, _ = pipe.encode_prompt(
    prompt=prompt,
    device=device,
    num_images_per_prompt=1,
    do_classifier_free_guidance=False
)

# Ensure shapes are correct
print(f"prompt_embeds shape: {prompt_embeds.shape}")  # Expected: (1, 77, 2048)
print(f"pooled_embeds shape: {pooled_embeds.shape}")  # Expected: (1, 1280)
# Time IDs for 1024x1024
time_ids = torch.tensor([[1024, 1024, 0, 0, 1024, 1024]], device=device, dtype=torch.float16)
print(f"time_ids shape: {time_ids.shape}")

# --- 4. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 128, 128),
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    print(f"Step {i}, latent_model_input shape: {latent_model_input.shape}")
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds,
        added_cond_kwargs={
            "text_embeds": pooled_embeds,
            "time_ids": time_ids
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 5. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 6. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 7. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 8. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 9. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12, 4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1, 6, i + 1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 10. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio

# --- 1. Device and RNG ---
device = "cuda"
generator = torch.Generator(device=device).manual_seed(1234)
print("Using device:", device)

# --- 2. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    # variant="fp16"  # Try without variant to rule out issues
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae
print("U-Net config:", pipe.unet.config)  # Debug U-Net configuration

# --- 3. Encode prompt ---
prompt = "a surreal city of glass towers at sunset"
prompt_embeds, _, _, _ = pipe.encode_prompt(prompt)
text_inputs = pipe.tokenizer(prompt, return_tensors="pt").input_ids.to(device)
pooled_embeds = pipe.text_encoder(
    text_inputs,
    output_hidden_states=True
).pooler_output.to(torch.float16)

# Debug shapes
print(f"prompt_embeds[0] shape: {prompt_embeds[0].shape}")
print(f"pooled_embeds shape: {pooled_embeds.shape}")

# Time IDs for 1024x1024
time_ids = torch.tensor([[1024, 1024, 0, 0, 1024, 1024]], device=device, dtype=torch.float16)
print(f"time_ids shape: {time_ids.shape}")

# --- 4. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 128, 128),  # 1024x1024
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

# Add hook to debug U-Net layers
def hook_fn(module, input, output):
    print(f"Module: {module.__class__.__name__}, Input shape: {input[0].shape}")

for name, module in pipe.unet.named_modules():
    if isinstance(module, torch.nn.Linear) or isinstance(module, torch.nn.Conv2d):
        module.register_forward_hook(hook_fn)

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    print(f"Step {i}, latent_model_input shape: {latent_model_input.shape}")
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds[0],
        added_cond_kwargs={
            "text_embeds": pooled_embeds,
            "time_ids": time_ids
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 5. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 6. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 7. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 8. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1, 2, 0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 9. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12, 4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1, 6, i + 1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 10. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")

In [None]:
import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio

# --- 1. Device and RNG ---
device = "cuda"
generator = torch.Generator(device=device).manual_seed(1234)
print("Using device:", device)

# --- 2. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16"
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae

# --- 3. Encode prompt ---
prompt = "a surreal city of glass towers at sunset"

# Token-level embeddings (cross-attention)
prompt_embeds, _, _, _ = pipe.encode_prompt(prompt)  # SDXL returns multiple outputs

# Pooled embeddings (for SDXL UNet 'text_time' conditioning)
text_inputs = pipe.tokenizer(prompt, return_tensors="pt").input_ids.to(device)
pooled_embeds = pipe.text_encoder(
    text_inputs,
    output_hidden_states=True
).pooler_output.to(torch.float16)

# Time IDs (required by SDXL UNet)
time_ids = torch.tensor([[1024, 1024, 0, 0, 1024, 1024]], device=device, dtype=torch.float16)  # Adjusted for 1024x1024

# --- 4. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 128, 128),  # Corrected for 1024x1024
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    # Debug: Print shape to verify
    print(f"Step {i}, latent_model_input shape: {latent_model_input.shape}")
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds[0],  # Use first prompt embed
        added_cond_kwargs={
            "text_embeds": pooled_embeds,
            "time_ids": time_ids
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:  # mid-step anchor
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 5. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 6. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 7. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 8. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1,2,0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 9. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12,4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1,6,i+1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 10. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")

In [None]:
# --- 0. Setup ---
#!pip install diffusers accelerate transformers safetensors torch torchvision scikit-learn imageio -q

import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio

# --- 1. Device and RNG ---
device = "cuda"  # assume GPU is always available
generator = torch.Generator(device=device).manual_seed(1234)
print("Using device:", device)

# --- 2. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16"
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae

# --- 3. Encode prompt ---
prompt = "a surreal city of glass towers at sunset"

# Token-level embeddings (cross-attention)
prompt_embeds = pipe.encode_prompt(prompt)

# Pooled embeddings (for SDXL UNet 'text_time' conditioning)
text_inputs = pipe.tokenizer(prompt, return_tensors="pt").input_ids.to(device)
pooled_embeds = pipe.text_encoder(
    text_inputs,
    output_hidden_states=True
).pooler_output.to(torch.float16)

# Time IDs (required by SDXL UNet)
time_ids = torch.zeros((1, 6), device=device, dtype=torch.float16)

# --- 4. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64),
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds,
        added_cond_kwargs={
            "text_embeds": pooled_embeds,
            "time_ids": time_ids
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:  # mid-step anchor
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 5. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 6. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 7. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 8. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1,2,0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 9. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12,4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1,6,i+1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 10. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")


In [None]:
# --- 0. Setup ---
!pip install diffusers accelerate transformers safetensors torch torchvision scikit-learn imageio -q

import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

generator = torch.Generator(device=device).manual_seed(1234)

# --- 1. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16"
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae

# --- 2. Encode prompt ---
prompt = "a surreal city of glass towers at sunset"

# Per-token embeddings (pipeline handles tokenization internally)
prompt_embeds = pipe.encode_prompt(prompt)

# Pooled embeddings for SDXL UNet 'text_time' conditioning
text_inputs = pipe.tokenizer(prompt, return_tensors="pt").input_ids.to(device)
pooled_embeds = pipe.text_encoder(
    text_inputs,
    output_hidden_states=True
).pooler_output.to(torch.float16)

# --- 3. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64),
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds,
        added_cond_kwargs={"text_embeds": pooled_embeds}
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:  # mid-step anchor
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 4. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=z.device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 5. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 6. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 7. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=z.device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1,2,0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 8. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12,4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1,6,i+1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 9. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")


In [None]:
# --- 0. Setup ---
!pip install diffusers accelerate transformers safetensors torch torchvision scikit-learn imageio -q

import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# RNG
generator = torch.Generator(device=device).manual_seed(1234)

# --- 1. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16"
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae

# --- 2. Encode prompt via pipeline ---
prompt = "a surreal city of glass towers at sunset"

# SDXL pipeline handles tokenization, text encoder projection, and pooled embeddings internally
prompt_embeds = pipe.encode_prompt(prompt)

# --- 3. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64),
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:  # mid-step anchor
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 4. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=z.device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 5. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 6. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 7. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=z.device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1,2,0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 8. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12,4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1,6,i+1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 9. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")


In [None]:
# --- 0. Setup ---
!pip install diffusers accelerate transformers safetensors torch torchvision scikit-learn imageio

import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD
import imageio

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Device-aware RNG
generator = torch.Generator(device=device).manual_seed(1234)

# --- 1. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16"
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae

# --- 2. Prompt embeddings ---
prompt = "a surreal city of glass towers at sunset"

# Tokenize
text_inputs = pipe.tokenizer(
    prompt,
    padding="max_length",
    max_length=pipe.tokenizer.model_max_length,
    truncation=True,
    return_tensors="pt"
).to(device)

# Text encoder outputs
outputs = pipe.text_encoder(
    text_inputs.input_ids,
    attention_mask=text_inputs.attention_mask,
    output_hidden_states=True,
)

prompt_embeds = outputs.last_hidden_state.to(torch.float16)       # (batch, seq_len, hidden_size)
pooled_embeds = outputs.pooler_output.to(torch.float16)           # (batch, hidden_size)

# Conditioning kwargs
add_text_embeds = pooled_embeds
add_time_ids = torch.zeros((1, 6), device=device, dtype=torch.float16)

# --- 3. Generate anchor latent ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64),
    device=device,
    dtype=torch.float16,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds,
        added_cond_kwargs={
            "text_embeds": add_text_embeds,
            "time_ids": add_time_ids,
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:  # mid-step anchor
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 4. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=z.device, dtype=torch.float16)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 5. Decode probes ---
with torch.no_grad():
    decoded = vae.decode((z_probes / vae.config.scaling_factor).half()).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 6. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 7. Animate along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=z.device, dtype=torch.float16)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode((z / vae.config.scaling_factor).half()).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append((img[0].permute(1,2,0).cpu().numpy() * 255).astype(np.uint8))
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 8. Display first frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12,4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1,6,i+1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "BIG singular mode")
show_frames(frames_small, "SMALL singular mode")

# --- 9. Export GIFs ---
imageio.mimsave("animation_big.gif", frames_big, duration=0.2)
imageio.mimsave("animation_small.gif", frames_small, duration=0.2)

print("GIFs saved: animation_big.gif, animation_small.gif")


In [None]:
# --- 0. Setup ---

import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Device-aware RNG
generator = torch.Generator(device=device).manual_seed(1234)

# --- 1. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16"
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae

# --- 2. Prompt embeddings ---
prompt = "a surreal city of glass towers at sunset"

# Tokenize
text_inputs = pipe.tokenizer(
    prompt,
    padding="max_length",
    max_length=pipe.tokenizer.model_max_length,
    truncation=True,
    return_tensors="pt"
).to(device)

# Text encoder outputs
outputs = pipe.text_encoder(
    text_inputs.input_ids,
    attention_mask=text_inputs.attention_mask,
    output_hidden_states=True,
)

prompt_embeds = outputs.last_hidden_state       # (batch, seq_len, hidden_size)
pooled_embeds = outputs.pooler_output          # (batch, hidden_size)

# Conditioning kwargs for UNet
add_text_embeds = pooled_embeds
add_time_ids = torch.zeros((1, 6), device=device, dtype=prompt_embeds.dtype)

# --- 3. Generate anchor latent from prompt ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64),
    device=device,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds,
        added_cond_kwargs={
            "text_embeds": add_text_embeds,
            "time_ids": add_time_ids,
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:  # mid-step anchor
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 4. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=z.device)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 5. Decode probes ---
with torch.no_grad():
    decoded = vae.decode(z_probes / vae.config.scaling_factor).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 6. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 7. Animate by stepping along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    # heuristic: map flat image-space singular vector back into latent-shape
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=z.device)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode(z / vae.config.scaling_factor).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append(img[0].permute(1,2,0).cpu().numpy())
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 8. Display frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12,4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1,6,i+1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "Animation along BIG singular mode")
show_frames(frames_small, "Animation along SMALL singular mode")


In [None]:
# --- 0. Setup ---
#!pip install diffusers accelerate transformers safetensors torch torchvision scikit-learn -q

import torch
import numpy as np
import matplotlib.pyplot as plt
from diffusers import StableDiffusionXLPipeline
from sklearn.decomposition import TruncatedSVD

device = "cuda" if torch.cuda.is_available() else "cpu"
print("Using device:", device)

# Device-aware RNG
generator = torch.Generator(device=device).manual_seed(1234)

# --- 1. Load SDXL pipeline ---
pipe = StableDiffusionXLPipeline.from_pretrained(
    "stabilityai/stable-diffusion-xl-base-1.0",
    variant="fp16"
).to(device, dtype=torch.float16)

pipe.enable_vae_tiling()
vae = pipe.vae

# --- 2. Prompt embeddings ---
prompt = "a surreal city of glass towers at sunset"

# SDXL encode_prompt returns a single tensor
prompt_embeds = pipe.encode_prompt(prompt)

# pooled embeddings come from the *text encoder’s pooled output*
# pipe.encode_prompt does not expose it directly
# instead, we use the tokenizer + text_encoder
text_inputs = pipe.tokenizer(
    prompt,
    padding="max_length",
    max_length=pipe.tokenizer.model_max_length,
    truncation=True,
    return_tensors="pt"
).to(device)

prompt_embeds = pipe.text_encoder(
    text_inputs.input_ids,
    attention_mask=text_inputs.attention_mask,
)[0]  # last hidden states

pooled_embeds = pipe.text_encoder(
    text_inputs.input_ids,
    attention_mask=text_inputs.attention_mask,
    output_hidden_states=True,
)[1][-1][:, 0]  # pooled CLS-like embedding

# --- 3. Conditioning kwargs ---
add_text_embeds = pooled_embeds
add_time_ids = torch.zeros((1, 6), device=device, dtype=prompt_embeds.dtype)

# --- 3b. Generate anchor latent from prompt ---
num_inference_steps = 30
latents = torch.randn(
    (1, pipe.unet.config.in_channels, 64, 64),
    device=device,
    generator=generator
)

pipe.scheduler.set_timesteps(num_inference_steps, device=device)
z_anchor = None

for i, t in enumerate(pipe.scheduler.timesteps):
    latent_model_input = pipe.scheduler.scale_model_input(latents, t)
    noise_pred = pipe.unet(
        latent_model_input,
        t,
        encoder_hidden_states=prompt_embeds,
        added_cond_kwargs={
            "text_embeds": add_text_embeds,
            "time_ids": add_time_ids,
        }
    ).sample
    step_out = pipe.scheduler.step(noise_pred, t, latents)
    latents = step_out.prev_sample if hasattr(step_out, "prev_sample") else step_out

    if i == 20:  # mid-step anchor
        z_anchor = latents.clone().detach()
        break

if z_anchor is None:
    z_anchor = latents.clone().detach()

# --- 4. Proximal probes ---
def make_probes(z, n_probes=32, eps=0.05):
    probes = []
    for _ in range(n_probes):
        noise = torch.randn_like(z, device=z.device)
        noise = noise / (noise.norm() + 1e-12) * (eps * (z.norm() + 1e-12))
        probes.append(z + noise)
    return torch.cat(probes, dim=0)

z_probes = make_probes(z_anchor, n_probes=32, eps=0.05)

# --- 5. Decode probes ---
with torch.no_grad():
    decoded = vae.decode(z_probes / vae.config.scaling_factor).sample

imgs = (decoded.clamp(-1, 1) + 1) / 2
X = imgs.cpu().flatten(start_dim=1).float().numpy()

# --- 6. SVD resonance analysis ---
svd = TruncatedSVD(n_components=10)
svd.fit(X)
singular_vectors = svd.components_
print("Explained variance ratios:", svd.explained_variance_ratio_)

# --- 7. Animate by stepping along singular directions ---
def traverse_singular(z_start, direction_flat, n_frames=12, step_size=0.05):
    # heuristic: map flat image-space singular vector back into latent-shape
    z = z_start.clone().detach()
    latent_dir = torch.randn_like(z, device=z.device)
    latent_dir = latent_dir / (latent_dir.norm() + 1e-12) * np.linalg.norm(direction_flat)
    frames = []
    for i in range(n_frames):
        z = z + step_size * latent_dir
        with torch.no_grad():
            out = vae.decode(z / vae.config.scaling_factor).sample
        img = (out.clamp(-1, 1) + 1) / 2
        frames.append(img[0].permute(1,2,0).cpu().numpy())
    return frames

big_mode = singular_vectors[0]
small_mode = singular_vectors[-1]

frames_big = traverse_singular(z_anchor, big_mode, n_frames=12, step_size=0.05)
frames_small = traverse_singular(z_anchor, small_mode, n_frames=12, step_size=0.05)

# --- 8. Display frames ---
def show_frames(frames, title):
    plt.figure(figsize=(12,4))
    for i, f in enumerate(frames[:6]):
        plt.subplot(1,6,i+1)
        plt.imshow(f)
        plt.axis("off")
    plt.suptitle(title, fontsize=14)
    plt.show()

show_frames(frames_big, "Animation along BIG singular mode")
show_frames(frames_small, "Animation along SMALL singular mode")
