In [None]:
# Install required libraries
!pip install --upgrade diffusers transformers torch accelerate imageio-ffmpeg

import torch
import gc
import imageio
import numpy as np
from diffusers import DiffusionPipeline, StableDiffusionPipeline
from PIL import Image
from IPython.display import display, HTML
from base64 import b64encode
import os

# Clear memory
torch.cuda.empty_cache()
gc.collect()

# Define prompt
text_input = "A beautiful sunset over the mountains with vibrant colors"
output_file = "generated_video.mp4"
num_frames = 24  # 3 seconds at 8 FPS
fps = 8

# Step 1: Try HotShot-XL for text-to-video on CPU
try:
    # Initialize HotShot-XL pipeline
    pipe = DiffusionPipeline.from_pretrained(
        "hotshotco/hotshot-xl",
        torch_dtype=torch.float32,  # Use float32 for CPU
        use_safetensors=True
    )

    # Optimize pipeline (CPU-based)
    pipe.unet.enable_gradient_checkpointing()  # Save memory

    # Generate video
    video_frames = pipe(
        prompt=text_input,
        num_frames=num_frames,
        height=256,
        width=256,
        num_inference_steps=8,
        guidance_scale=7.5,
        negative_prompt="blur, low quality",
        generator=torch.Generator().manual_seed(42),
        fps=fps
    ).frames[0]

    # Clear memory
    del pipe
    gc.collect()

    # Export video
    imageio.mimwrite(output_file, video_frames, fps=fps)

except Exception as e:
    print(f"HotShot-XL failed: {str(e)}. Falling back to static video with Stable Diffusion.")

    # Step 2: Fallback to Stable Diffusion v1-5 for single image on CPU
    pipe = StableDiffusionPipeline.from_pretrained(
        "runwayml/stable-diffusion-v1-5",
        torch_dtype=torch.float32,  # Use float32 for CPU
        use_safetensors=True
    )

    # Optimize pipeline
    pipe.unet.enable_gradient_checkpointing()

    # Generate image
    image = pipe(
        prompt=text_input,
        height=256,
        width=256,
        num_inference_steps=8,
        guidance_scale=7.5,
        negative_prompt="blur, low quality",
        generator=torch.Generator().manual_seed(42)
    ).images[0]

    # Save image
    image.save("start_image.png")
    del pipe
    gc.collect()

    # Create static video by looping image
    image_np = np.array(Image.open("start_image.png"))
    writer = imageio.get_writer(output_file, fps=fps)
    for _ in range(num_frames):
        writer.append_data(image_np)
    writer.close()

# Step 3: Display video inline in Colab
def display_video_from_file(video_file):
    with open(video_file, "rb") as f:
        video_data = f.read()
    video_b64 = b64encode(video_data).decode()
    video_html = f"""
    <video width="640" height="480" controls>
        <source src="data:video/mp4;base64,{video_b64}" type="video/mp4">
    </video>
    """
    display(HTML(video_html))

display_video_from_file(output_file)

Collecting diffusers
  Downloading diffusers-0.33.1-py3-none-any.whl.metadata (19 kB)
Collecting transformers
  Downloading transformers-4.51.3-py3-none-any.whl.metadata (38 kB)
Collecting accelerate
  Downloading accelerate-1.6.0-py3-none-any.whl.metadata (19 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/579 [00:00<?, ?B/s]

HotShot-XL failed: unet/hotshot_xl.py as defined in `model_index.json` does not exist in hotshotco/hotshot-xl and is not a module in 'diffusers/pipelines'.. Falling back to static video with Stable Diffusion.


model_index.json:   0%|          | 0.00/541 [00:00<?, ?B/s]

Fetching 15 files:   0%|          | 0/15 [00:00<?, ?it/s]

preprocessor_config.json:   0%|          | 0.00/342 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/308 [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/525k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/472 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/492M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/4.72k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/806 [00:00<?, ?B/s]

vocab.json:   0%|          | 0.00/1.06M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/743 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.44G [00:00<?, ?B/s]

config.json:   0%|          | 0.00/547 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/617 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/335M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/8 [00:00<?, ?it/s]

Potential NSFW content was detected in one or more images. A black image will be returned instead. Try again with a different prompt and/or seed.
