# Anamorphic Illusion (Perspective Projection)
This notebook implements an Anamorphic Illusion generation pipeline. 
The goal is to create an image that looks like one thing from a top-down view (View A), but reveals a different 3D object when viewed from a specific grazing angle (View B).
Gaining the idea from Hitchhock's famous "Vertigo" effect, we use perspective warping to simulate the change in viewpoint.

### Concept
- **View A (Top-down)**: The original image, printed on a flat surface. Prompt: "A distinct texture of wood".(can be changed to any texture)
- **View B (Perspective)**: The image seen from a low angle, simulated by a perspective transform. Prompt: "A standing 3D coca-cola can".(can be changed to any 3D object)

The optimization process modifies the pixels of View A so that both views satisfy their respective text prompts.

### Run Instructions
1.  Use kaggle, choose GPU P100 as accelerator.
2.  Run the first cell block.
3.  Restart the cells.
4.  Run the rest of the cells sequentially.
5.  You can change the prompts for the 2 views in the third cell block.

In [None]:
# @title 1. Setup Environment 
import os
import shutil

# install a numpy version that is compatible with other dependencies
!pip install "numpy==1.26.4" 

# force reinstallation and clear up any existing directory
repo_name = "Diffusion-Illusions"
if os.path.exists(repo_name):
    print(f"Removing existing {repo_name} directory...")
    shutil.rmtree(repo_name)

print(f"Cloning repository into {repo_name}...")
# clone to child file
!git clone -b master https://github.com/RyannDaGreat/Diffusion-Illusions {repo_name}

# change into the repo directory
os.chdir(repo_name)
print(f"Current working directory changed to: {os.getcwd()}")
with open("requirements.txt", "r") as f:
    lines = f.readlines()
with open("requirements.txt", "w") as f:
    for line in lines:
        if "matplotlib" in line:
            f.write("matplotlib\n") 
        elif "numpy" in line:
            continue # skip numpy for it's already installed
        else:
            f.write(line)

# install the rest
!pip install -r requirements.txt
!pip install rp --upgrade

print("\n Installation complete!")
print("⚠️ Important: Please now click 'Runtime' -> 'Restart session' in the menu!")
print("⚠️ After restarting, you do not need to run this cell again. Just run the subsequent code.")

In [None]:
# @title Load Stable Diffusion and Libraries
import numpy as np
import rp
import torch
import torch.nn as nn
import torch.nn.functional as F# @title Load Stable Diffusion and Libraries
import sys
import os

# put the Diffusion-Illusions folder into the Python path to avoid strange errs.
if os.path.exists("Diffusion-Illusions"):
    sys.path.append(os.path.abspath("Diffusion-Illusions"))
    print("已将 Diffusion-Illusions 加入 Python 路径")

import numpy as np
import rp
import torch
import torch.nn as nn
import torch.nn.functional as F
import cv2
import source.stable_diffusion as sd
from easydict import EasyDict
from source.learnable_textures import LearnableImageFourier
from source.stable_diffusion_labels import NegativeLabel
from itertools import chain
import time

# Initialize SD Model
if 's' not in dir():
    model_name="CompVis/stable-diffusion-v1-4"
    gpu='cuda:0'
    s=sd.StableDiffusion(gpu,model_name)
    device=s.device

print("Model Loaded!")

In [None]:
# @title Perspective Warp Function

def get_perspective_warp_grid(height, width, device, top_scale=1.0, bot_scale=0.35):
    """
    Generates a sampling grid for perspective transformation (Anamorphic Illusion).
    Args:
        top_scale: How much wider the sampling region is at the top (Far end). > 1.0 means we sample a wider area.
        bot_scale: How much wider/narrower the sampling region is at the bottom (Near end).
    """
    
    # the 4 corners of the Output. (Mapping Needs)
    dst_points = np.float32([
        [-1, -1], [1, -1], [1, 1], [-1, 1]
    ])
    # the 4 corners of the Input.
    src_points = np.float32([
        [-top_scale, -1], [top_scale, -1], [bot_scale, 1], [-bot_scale, 1]
    ])
    
    # use cv2 functions to compute homography matrix
    M = cv2.getPerspectiveTransform(dst_points, src_points)
    M_tensor = torch.from_numpy(M).to(device).float()
    
    # Create Grid for Output Image
    y_range = torch.linspace(-1, 1, height, device=device)
    x_range = torch.linspace(-1, 1, width, device=device)
    uy, ux = torch.meshgrid(y_range, x_range, indexing='ij')
    
    ones = torch.ones_like(ux)
    grid_homogeneous = torch.stack((ux, uy, ones), dim=-1) # to homogeneous coords
    # Apply transformation
    grid_transformed = torch.matmul(grid_homogeneous, M_tensor.T)
    
    # Convert back from homogeneous coordinates
    grid_x = grid_transformed[..., 0] / grid_transformed[..., 2]
    grid_y = grid_transformed[..., 1] / grid_transformed[..., 2]
    
    grid = torch.stack((grid_x, grid_y), dim=-1).unsqueeze(0)
    
    return grid

print("Perspective Warp Function Defined.")

In [None]:
# @title Prompts & Setup
# View A: The actual flat image
prompt_flat = "A dark polished wood grain texture, top down view, high resolution"

# View B: The perspective illusion
prompt_perspective = "A carved wooden chess rook standing up, 3d render, photorealistic"

# Negative prompt
negative_prompt = "blurry, low quality, distortion, ugly, text, watermark, bad anatomy"

IMAGE_SIZE = 512

print(f"Flat Prompt: {prompt_flat}")
print(f"Perspective Prompt: {prompt_perspective}")

# Labels
label_flat = NegativeLabel(prompt_flat, negative_prompt)
label_persp = NegativeLabel(prompt_perspective, negative_prompt)

# Initialize Learnable Image (using Fourier features for better texture)
learnable_image_maker = lambda: LearnableImageFourier(height=IMAGE_SIZE, width=IMAGE_SIZE, num_features=256, hidden_dim=256, scale=20).to(s.device)
image_flat_param = learnable_image_maker()

# Initialize Optimizer
params = chain(image_flat_param.parameters())
optim = torch.optim.SGD(params, lr=1e-4)

# Generate Warp Grid
# top : bot is about 3-1, which creates a strong perspective effect
warp_grid = get_perspective_warp_grid(IMAGE_SIZE, IMAGE_SIZE, s.device, top_scale=1.0, bot_scale=0.35)

In [None]:
# @title Main Optimization Loop

NUM_ITER = 4000
DISPLAY_INTERVAL = 200

display_eta = rp.eta(NUM_ITER, title='Status: ')

print('Starting training... Left: Flat Wood Texture (View A), Right: 3D Coke Can Illusion (View B)')

try:
    for iter_num in range(NUM_ITER):
        display_eta(iter_num)

        # 1. Get Base Image (View A)
        flat_img = image_flat_param() # (C, H, W)
        
        # 2. Warp to Perspective View (View B)
        perspective_img = F.grid_sample(flat_img.unsqueeze(0), warp_grid, align_corners=True, padding_mode='border').squeeze(0)

        # 3. Compute Gradients/Loss
        # Loss A: Flat image should look like Wood
        s.train_step(
            label_flat.embedding,
            flat_img.unsqueeze(0),
            noise_coef=0.1,
            guidance_scale=50
        )
        
        # Loss B: Perspective image should look like Coke Can
        s.train_step(
            label_persp.embedding,
            perspective_img.unsqueeze(0),
            noise_coef=0.1,
            guidance_scale=80
        )

        # 4. Display Logic
        with torch.no_grad():
            if iter_num % DISPLAY_INTERVAL == 0:
                from IPython.display import clear_output
                clear_output(wait=True)

                # Convert to numpy for display
                disp_flat = rp.as_numpy_image(flat_img)
                disp_persp = rp.as_numpy_image(perspective_img)

                # Tile and Display
                combined = rp.tiled_images([disp_flat, disp_persp])
                rp.display_image(combined)
                print(f"Iteration: {iter_num}")

        # 5. Step
        optim.step()
        optim.zero_grad()

except KeyboardInterrupt:
    print('Stopped by user.')

# Final Result will be shown again
flat_img = image_flat_param()
perspective_img = F.grid_sample(flat_img.unsqueeze(0), warp_grid, align_corners=True, padding_mode='border').squeeze(0)
disp_flat = rp.as_numpy_image(flat_img)
disp_persp = rp.as_numpy_image(perspective_img)

print("Final Result:")
rp.display_image(rp.tiled_images([disp_flat, disp_persp]))

In [None]:
# @title Save Images
import cv2

# Save the flat image
file_name_flat = "anamorphic_wood.png"
rp.save_image(disp_flat, file_name_flat)

# Save the illusion view
file_name_persp = "anamorphic_chess.png"
rp.save_image(disp_persp, file_name_persp)

print(f"Saved to {file_name_flat} and {file_name_persp}")