In [None]:
# Imports and paths setup
import sys
import rp
import torch
import numpy as np
from einops import rearrange

top_dir = rp.get_git_toplevel()
ltx_dir = rp.path_join(top_dir, 'LTX2')
nfs_models_dir = rp.path_join(ltx_dir, 'models')
sys.path += [nfs_models_dir]

from download_models import local_download_dir, download_from_web
models_dir = local_download_dir

# LTX Pipeline imports
from ltx_core.loader import LTXV_LORA_COMFY_RENAMING_MAP, LoraPathStrengthAndSDOps
from ltx_pipelines.ti2vid_two_stages import TI2VidTwoStagesPipeline
from ltx_pipelines.utils.media_io import encode_video
from ltx_pipelines.utils.constants import AUDIO_SAMPLE_RATE

# Model paths
checkpoint_path        = rp.path_join(models_dir, "ltx-2-19b-dev.safetensors")
distilled_lora_path    = rp.path_join(models_dir, "ltx-2-19b-distilled-lora-resized_dynamic_fro095_avg_rank_242_bf16.safetensors")
spatial_upsampler_path = rp.path_join(models_dir, "ltx-2-spatial-upscaler-x2-1.0.safetensors")
gemma_root             = models_dir

# Output directory
output_dir = rp.path_join(top_dir, "outputs")
rp.make_directory(output_dir)

In [None]:
# Setup
IN_NOTEBOOK = rp.running_in_jupyter_notebook()
DEVICE = rp.select_torch_device(prefer_used=True, reserve=True)
DTYPE = torch.bfloat16
download_from_web()

In [None]:
# Helpers
def show_video(video):
    if IN_NOTEBOOK:
        rp.display_video(video)

def save_video_with_audio(video_tensor, audio_tensor, path, fps=25):
    encode_video(
        video=video_tensor,
        fps=int(fps),
        audio=audio_tensor,
        audio_sample_rate=AUDIO_SAMPLE_RATE,
        output_path=path,
        video_chunks_number=1,
    )

In [None]:
# Create Pipeline (run once)
distilled_lora = [
    LoraPathStrengthAndSDOps(
        distilled_lora_path,
        0.6,
        LTXV_LORA_COMFY_RENAMING_MAP,
    ),
]

pipeline = TI2VidTwoStagesPipeline(
    checkpoint_path=checkpoint_path,
    distilled_lora=distilled_lora,
    spatial_upsampler_path=spatial_upsampler_path,
    gemma_root=gemma_root,
    loras=[],
)

In [None]:
# Generate T2V
# Two-stage: 1280x768 -> Stage1 at 640x384 -> Upscale to 1280x768
# Dimensions must be divisible by 64

drone_prompt = "Drone shot, helicopter flying fast through a narrow rocky canyon, sun-kissed day, clear turquoise water below, white foam waves, motion blur, sharp focus"
cookie_prompt = 'PROMPT:  INT. OVEN – DAY. Static camera from inside the oven, looking outward through the slightly fogged glass door. Warm golden light glows around freshly baked cookies. The baker’s face fills the frame, eyes wide with focus, his breath fogging the glass as he leans in. Subtle reflections move across the glass as steam rises. Baker (whispering dramatically): “Today… I achieve perfection.” He leans even closer, nose nearly touching the glass. “Golden edges. Soft center. The gods themselves will smell these cookies and weep.” Baker: “Wait—” (beat) “Did I… forget the chocolate chips?” Cut to side view — coworker pops into frame, chewing casually. Coworker (mouth full): “Nope. You forgot the sugar.” Quick zoom back to the baker’s horrified face, pressed against the oven door, as cookies deflate behind the glass. Steam drifts upward in slow motion. pixar style acting and timing'
negative_prompt = "worst quality, inconsistent motion, blurry, jittery, distorted, watermarks, low quality, artifacts, morphing, warping, flicker, text, logo"

height, width, num_frames, frame_rate = 768, 1280, 121, 25.0

ltx_video, ltx_audio = pipeline(
    prompt=cookie_prompt,
    negative_prompt=negative_prompt,
    seed=42,
    height=height,
    width=width,
    num_frames=num_frames,
    frame_rate=frame_rate,
    num_inference_steps=40,
    cfg_guidance_scale=4.0,
    images=[],
)

with torch.inference_mode():
    video_chunks = list(ltx_video)
    video_tensor = torch.cat(video_chunks, dim=0) if len(video_chunks) > 1 else video_chunks[0]

video_path = rp.path_join(output_dir, "generated_video.mp4")
save_video_with_audio(video_tensor, ltx_audio, video_path, frame_rate)
video = rp.as_numpy_array(video_tensor)

In [None]:
show_video(video)