In [None]:
import torch
import os
import psutil
from PIL import Image
from diffusers import AnimateDiffPipeline, MotionAdapter, EulerDiscreteScheduler
from diffusers.utils import export_to_gif
from huggingface_hub import hf_hub_download
from safetensors.torch import load_file
from torchvision import transforms

device = "cuda"
dtype = torch.float16

image_path = "img.png"
image = Image.open(image_path).convert("RGB").resize((768, 480))
transform = transforms.Compose([
    transforms.ToTensor(),
])

image_tensor = transform(image).unsqueeze(0).to(device).half()

prompt = "Donald Trump dances on a golf field with fluid, relaxed movements. He shifts his weight from side to side, occasionally lifting his arms in a gentle sway. His steps are light and rhythmic, moving naturally with the beat, his body fluidly rotating as he turns and spins. His movements are confident yet unforced, creating a sense of ease and enjoyment. The camera remains stationary, capturing his smooth dance flow from a medium distance."

step = 8
repo = "ByteDance/AnimateDiff-Lightning"
ckpt = f"animatediff_lightning_{step}step_diffusers.safetensors"
base = "emilianJR/epiCRealism"

adapter = MotionAdapter().to(device, dtype)
adapter.load_state_dict(load_file(hf_hub_download(repo, ckpt), device=device))

pipe = AnimateDiffPipeline.from_pretrained(base, motion_adapter=adapter, torch_dtype=dtype).to(device)
pipe.scheduler = EulerDiscreteScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing", beta_schedule="linear")

output = pipe(prompt=prompt, image=image_path, guidance_scale=1.0, num_inference_steps=step, width=768, height=480)

export_to_gif(output.frames[0], "trump-dancing.gif")

current_pid = os.getpid()
for proc in psutil.process_iter(attrs=['pid', 'name']):
    try:
        if "python" in proc.info['name'].lower() and proc.info['pid'] != current_pid:
            os.kill(proc.info['pid'], 9)
    except (psutil.NoSuchProcess, psutil.AccessDenied):
        continue

Loading pipeline components...: 100%|██████████| 6/6 [00:01<00:00,  4.83it/s]
Token indices sequence length is longer than the specified maximum sequence length for this model (88 > 77). Running this sequence through the model will result in indexing errors
The following part of your input was truncated because CLIP can only handle sequences up to 77 tokens: [', capturing his smooth dance flow from a medium distance.']
100%|██████████| 8/8 [00:04<00:00,  1.70it/s]
