In [1]:
import torch
from diffusers import WanPipeline
from diffusers.utils import export_to_video, load_video
from vibt.wan import load_vibt_weight, encode_video
from vibt.scheduler import ViBTScheduler

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
base_model_id = "Wan-AI/Wan2.1-T2V-1.3B-Diffusers"
pipe = WanPipeline.from_pretrained(base_model_id, torch_dtype=torch.bfloat16).to("cuda")

In [None]:
load_vibt_weight(
    pipe.transformer,
    "Yuanshi/ViBT",
    "video/video_stylization.safetensors",
)
pipe.scheduler = ViBTScheduler.from_scheduler(pipe.scheduler)

In [None]:
recommended_styles = []

with open("../assets/recommended_styles/video_stylization.txt", "r") as f:
    recommended_styles = f.read().splitlines()

In [None]:
source_video = load_video("../assets/video_00000000.mp4")
source_latents = encode_video(pipe, source_video)
prompt = recommended_styles[0]


pipe.scheduler.set_parameters(noise_scale=1.0, shift_gamma=5.0, seed=42)

output = pipe(
    prompt=prompt,
    num_inference_steps=28,
    guidance_scale=1.5,
    latents=source_latents,
).frames[0]
export_to_video(output, "output1.mp4", fps=15)

In [None]:
source_video = load_video("../assets/video_00001275.mp4")
source_latents = encode_video(pipe, source_video)
prompt = recommended_styles[1]


pipe.scheduler.set_parameters(noise_scale=1.0, shift_gamma=5.0, seed=42)

output = pipe(
    prompt=prompt,
    num_inference_steps=28,
    guidance_scale=1.5,
    latents=source_latents,
).frames[0]
export_to_video(output, "output2.mp4", fps=15)