In [None]:
# ==========================================
# SD LATENT WALK WITH ARTISTIC MODELS
# ==========================================

import random
import datetime
import torch
import numpy as np
import imageio
from tqdm.auto import tqdm
from PIL import Image
import matplotlib.pyplot as plt
from diffusers import StableDiffusionPipeline, DDIMScheduler

In [None]:
# STEP 0: Choose Your Artistic Model
# Uncomment the one you want to use:

# Option 1: DreamShaper - Best all-around artistic model
# MODEL_ID = "Lykon/DreamShaper"
# MODEL_NAME = "DreamShaper"

# Option 2: Deliberate - Photorealistic painting style
# MODEL_ID = "XpucT/Deliberate"
# MODEL_NAME = "Deliberate"

# Option 3: Dreamlike Diffusion - Surreal, dreamy art
MODEL_ID = "dreamlike-art/dreamlike-diffusion-1.0"
# MODEL_NAME = "Dreamlike"

# Option 4: ReV Animated - Fantasy/2.5D style
# MODEL_ID = "stablediffusionapi/rev-animated"
# MODEL_NAME = "ReV-Animated"

# Option 5: OpenJourney - Midjourney-style art
# MODEL_ID = "prompthero/openjourney"
# MODEL_NAME = "OpenJourney"

# print(f"Loading {MODEL_NAME}...")
print(f"Model ID: {MODEL_ID}")

In [None]:

# Load model
# if 'pipe' not in globals():
print("Loading model...")
# model_id = "runwayml/stable-diffusion-v1-5"
# model_id = "Lykon/DreamShaper"
scheduler = DDIMScheduler.from_pretrained(MODEL_ID, subfolder="scheduler")
pipe = StableDiffusionPipeline.from_pretrained(
    MODEL_ID,
    scheduler=scheduler,
    torch_dtype=torch.float16,
    safety_checker=None,
    requires_safety_checker=False
).to("cuda")
# pipe.enable_vae_slicing()
print("Model loaded!")

# SLERP
def slerp(val, low, high, DOT_THRESHOLD=0.9995):
    """
    Spherical Linear Interpolation for latent tensors
    low, high: tensors of shape (batch, channels, height, width)
    """
    # Flatten spatial dimensions for dot product calculation
    original_shape = low.shape
    low_flat = low.reshape(original_shape[0], -1)
    high_flat = high.reshape(original_shape[0], -1)

    # Normalize
    low_norm = low_flat / (torch.norm(low_flat, dim=1, keepdim=True) + 1e-8)
    high_norm = high_flat / (torch.norm(high_flat, dim=1, keepdim=True) + 1e-8)

    # Calculate dot product (cosine of angle)
    dot = (low_norm * high_norm).sum(1)

    # If vectors are nearly parallel, use linear interpolation
    # Use torch.where for tensor conditionals instead of Python if
    dot_threshold = torch.tensor(DOT_THRESHOLD, device=dot.device)

    # Calculate angle for slerp
    theta = torch.acos(torch.clamp(dot, -1.0, 1.0))
    sin_theta = torch.sin(theta)

    # Interpolation weights
    theta_0 = theta * (1 - val)
    theta_1 = theta * val

    # Compute slerp weights
    w0 = torch.sin(theta_0) / (sin_theta + 1e-8)
    w1 = torch.sin(theta_1) / (sin_theta + 1e-8)

    # Reshape weights for broadcasting
    w0 = w0.view(-1, 1, 1, 1)
    w1 = w1.view(-1, 1, 1, 1)

    # Interpolate
    result = w0 * low + w1 * high

    # Where dot > threshold, fall back to linear interpolation
    mask = (dot.abs() > DOT_THRESHOLD).view(-1, 1, 1, 1)
    linear_result = (1 - val) * low + val * high

    return torch.where(mask, linear_result, result)

# Simple linear interpolation (fallback)
def lerp(val, low, high):
    return low + (high - low) * val

# ==========================================
# MAIN VIDEO GENERATION FUNCTION
# ==========================================

def generate_latent_walk_video(
    prompt="a futuristic cityscape at sunset, cyberpunk style, highly detailed, 8k",
    negative_prompt="blurry, low quality, distorted, ugly",
    num_frames=60,
    fps=12,
    height=512,
    width=512,
    guidance_scale=7.5,
    num_inference_steps=20,
    walk_type="circular_noise",
    seed_start=42,
    seed_end=12345,
    output_path="latent_walk.mp4",
    reverse_loop=True
):
    device = pipe.device
    latents_shape = (1, 4, height // 8, width // 8)

    print(f"\nGenerating {num_frames} frames for: '{prompt}'")
    print(f"Walk type: {walk_type}")

    frames = []

    # Encode text prompt
    text_input = pipe.tokenizer(
        [prompt],
        padding="max_length",
        max_length=pipe.tokenizer.model_max_length,
        truncation=True,
        return_tensors="pt"
    )

    input_ids = text_input.input_ids.to(device)

    with torch.no_grad():
        text_embeddings = pipe.text_encoder(input_ids)[0]

        uncond_input = pipe.tokenizer(
            [negative_prompt] if negative_prompt else [""],
            padding="max_length",
            max_length=pipe.tokenizer.model_max_length,
            return_tensors="pt"
        )
        uncond_ids = uncond_input.input_ids.to(device)
        uncond_embeddings = pipe.text_encoder(uncond_ids)[0]

        text_embeddings_base = torch.cat([uncond_embeddings, text_embeddings])

    # Setup noise interpolation
    if walk_type == "circular_noise":
        torch.manual_seed(seed_start)
        noise_x = torch.randn(latents_shape, device=device, dtype=torch.float16)
        torch.manual_seed(seed_end)
        noise_y = torch.randn(latents_shape, device=device, dtype=torch.float16)
        angles = torch.linspace(0, 2 * np.pi, num_frames, device=device)

    elif walk_type == "linear_noise":
        generator_start = torch.Generator(device=device).manual_seed(seed_start)
        generator_end = torch.Generator(device=device).manual_seed(seed_end)

        noise_start = torch.randn(latents_shape, generator=generator_start, device=device, dtype=torch.float16)
        noise_end = torch.randn(latents_shape, generator=generator_end, device=device, dtype=torch.float16)
        interpolation_weights = torch.linspace(0, 1, num_frames, device=device)

    # Generate frames
    for i in tqdm(range(num_frames), desc="Generating frames"):

        if walk_type == "circular_noise":
            current_noise = torch.cos(angles[i]) * noise_x + torch.sin(angles[i]) * noise_y
        elif walk_type == "linear_noise":
            t = interpolation_weights[i].item()  # Get scalar value
            current_noise = slerp(t, noise_start, noise_end)

        # Denoising loop
        with torch.no_grad():
            pipe.scheduler.set_timesteps(num_inference_steps)
            latent_model_input = current_noise * pipe.scheduler.init_noise_sigma

            for t in pipe.scheduler.timesteps:
                latent_expanded = torch.cat([latent_model_input] * 2)
                latent_expanded = pipe.scheduler.scale_model_input(latent_expanded, t)

                noise_pred = pipe.unet(
                    latent_expanded,
                    t,
                    encoder_hidden_states=text_embeddings_base
                ).sample

                noise_uncond, noise_text = noise_pred.chunk(2)
                noise_pred = noise_uncond + guidance_scale * (noise_text - noise_uncond)
                latent_model_input = pipe.scheduler.step(noise_pred, t, latent_model_input).prev_sample

            # Decode to image
            latent_model_input = 1 / 0.18215 * latent_model_input
            image = pipe.vae.decode(latent_model_input).sample

            image = (image / 2 + 0.5).clamp(0, 1)
            image = image.cpu().permute(0, 2, 3, 1).numpy()[0]
            image = (image * 255).astype(np.uint8)
            frames.append(image)

    # Create seamless loop
    if reverse_loop and walk_type != "circular_noise":
        print("Creating reverse loop...")
        frames = frames + frames[::-1]

    # Save video
    print(f"\nSaving video to {output_path}...")
    writer = imageio.get_writer(output_path, fps=fps, codec='libx264', quality=8)
    for frame in frames:
        writer.append_data(frame)
    writer.close()

    # Save GIF
    # gif_path = output_path.replace('.mp4', '.gif')
    # imageio.mimsave(gif_path, frames, fps=fps, loop=0)

    print(f"âœ“ Done! Duration: {len(frames)/fps:.1f}s, Frames: {len(frames)}")
    return frames, output_path

# ==========================================
# RUN EXAMPLES BELOW
# ==========================================


In [None]:
"""
FRAME INTERPOLATION WITH RIFE
Original file is located at https://colab.research.google.com/github/hzwer/ECCV2022-RIFE/blob/main/Colab_demo.ipynb
"""

!git clone https://github.com/hzwer/arXiv2020-RIFE
!pip install git+https://github.com/rk-exxec/scikit-video.git@numpy_deprecation
!mkdir /content/arXiv2020-RIFE/train_log
%cd /content/arXiv2020-RIFE/train_log
!gdown --id 1APIzVeI-4ZZCEuIRE1m6WYfSCaOsi_7_
!7z e RIFE_trained_model_v3.6.zip
%cd /content/

In [None]:
#============================
# PROMPT
#============================

prompt="Silhouettes of angelic beings, totem animals, broad rough strokes , energy misty, breathtaking, oil painting style, artistic, aesthetic modern art, hyper-realism, trending on artstation"
outfile = "_".join(prompt.split()[:5])
outfile = outfile.replace(",", "")
current_time = datetime.datetime.now().strftime("%H%M")
outfile = current_time + "_" + outfile + ".mp4"
seed1 = random.randint(1, 99)
seed2 = random.randint(8888, 9999)
# outfile
# seed1
# seed2
print(f"Prompt: {prompt}")
print(outfile)
print(seed1)
print(seed2)

In [None]:
# PREVIEW
# Generate a single frame using the existing prompt
# We set num_frames=1 to get just one image
#===================================================

single_frame_data, _ = generate_latent_walk_video(
    prompt=prompt,
    walk_type="linear_noise", # linear_noise or circular_noise, doesn't matter for 1 frame
    seed_start=random.randint(1, 9999),
    seed_end=random.randint(1, 9999),
    num_frames=1,
    fps=1, # Doesn't matter for a single image
    width=960,
    height=480,
    num_inference_steps=15,
    output_path="single_image.mp4", # A dummy output path, as we only need the image data
    reverse_loop=False # Not relevant for a single frame
)

# Display the generated image
plt.figure(figsize=(12, 12))
plt.imshow(single_frame_data[0])
plt.axis('off')
plt.title(outfile)
plt.show()

In [None]:

# Example : Linear Walk
# ====================================

print("\n" + "=" * 50)
print("EXAMPLE 2: Linear Walk")
print("=" * 50)

frames2, video2 = generate_latent_walk_video(
    prompt=prompt,
    walk_type="linear_noise",
    seed_start=random.randint(1, 99),
    seed_end=random.randint(8000, 9999),
    num_frames=60,
    fps=10,
    width=960,
    height=480,
    num_inference_steps=15,
    output_path=outfile,
    reverse_loop=True
)

In [None]:
# RIFE 4X FRAMES
# ======================================

%cd /content/arXiv2020-RIFE
vidfile = "../" + outfile
!python3 inference_video.py --exp=2 --video={vidfile}
%cd /content/

In [None]:

# Example : Circular Walk (Seamless Loop)
# ========================================

print("=" * 50)
print("EXAMPLE : Circular Walk")
print("=" * 50)

frames1, video1 = generate_latent_walk_video(
    prompt=prompt,
    walk_type="circular_noise",
    num_frames=20,
    fps=10,
    num_inference_steps=15,
    width=960,
    height=480,
    output_path=outfile
)


In [None]:
# FIND EXAMPLES OF SD PROMPTS ON THESE SITES

| Site           | URL                                      | Best For                                  |
| -------------- | ---------------------------------------- | ----------------------------------------- |
| **PromptHero** | [prompthero.com](https://prompthero.com) | Curated artistic prompts with examples    |
| **Lexica.art** | [lexica.art](https://lexica.art)         | Search 10M+ generated images with prompts |
| **Civitai**    | [civitai.com](https://civitai.com)       | Model-specific prompts and workflows      |
| **OpenArt**    | [openart.ai](https://openart.ai)         | Prompt search + prompt book library       |
| **ArtHub.ai**  | [arthub.ai](https://arthub.ai)           | Community prompts with style tags         |
