In [None]:
!pip install diffusers

Collecting diffusers
  Downloading diffusers-0.30.3-py3-none-any.whl.metadata (18 kB)
Downloading diffusers-0.30.3-py3-none-any.whl (2.7 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.7/2.7 MB[0m [31m20.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: diffusers
Successfully installed diffusers-0.30.3


In [None]:
import torch
from diffusers import DiffusionPipeline, DPMSolverMultistepScheduler
from diffusers.utils import export_to_video
from PIL import Image
import numpy as np
import os
from IPython.display import display, Video  # Import to display video in Colab

# Load the text-to-video model
pipe = DiffusionPipeline.from_pretrained("damo-vilab/text-to-video-ms-1.7b", torch_dtype=torch.float16, variant="fp16")
pipe.scheduler = DPMSolverMultistepScheduler.from_config(pipe.scheduler.config)
pipe.enable_model_cpu_offload()

# Define your prompt
prompt = "Batman walking in the beach"

# Generate the video frames
video_frames = pipe(prompt, num_inference_steps=25).frames

# Ensure each frame has 3 channels (RGB)
processed_frames = []
for i, frame in enumerate(video_frames):
    print(f"Original frame {i} shape: {frame.shape}")

    if frame.ndim == 4:
        for j in range(frame.shape[0]):
            single_frame = frame[j]
            pil_frame = Image.fromarray((single_frame * 255).astype(np.uint8))
            if pil_frame.mode != "RGB":
                pil_frame = pil_frame.convert("RGB")
            processed_frames.append(pil_frame)
    elif frame.ndim == 3:
        pil_frame = Image.fromarray((frame * 255).astype(np.uint8))
        if pil_frame.mode != "RGB":
            pil_frame = pil_frame.convert("RGB")
        processed_frames.append(pil_frame)
    else:
        raise ValueError(f"Unexpected frame shape {frame.shape}")

# Define the save directory and filename
save_directory = "videos"  # Replace with your desired directory path
filename = "batman.mp4"

# Create the directory if it doesn't exist
os.makedirs(save_directory, exist_ok=True)

# Construct the full save path
save_path = os.path.join(save_directory, filename)

# Export the frames to a video and save it to the specified location
video_path = export_to_video(processed_frames, output_video_path=save_path)
print(f"Exported video at {video_path}")

# Display the saved video in the Colab notebook
display(Video(video_path, embed=True))

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]



  0%|          | 0/25 [00:00<?, ?it/s]

Original frame 0 shape: (16, 256, 256, 3)


  self.pid = _posixsubprocess.fork_exec(


Exported video at videos/batman.mp4


Here are some GPU-related reasons that could cause issues when running your code:

1. **Insufficient GPU Memory**: The model is large, and if your GPU has limited VRAM, it might run out of memory during video generation.

2. **Float16 Precision Issues**: Older GPUs may not support `torch.float16`, leading to errors or slower performance.

3. **Slow CPU Offloading**: Using `pipe.enable_model_cpu_offload()` could slow things down if the CPU is not fast enough or data transfer between CPU and GPU is slow.

4. **Excessive Frame Processing**: Processing each frame individually could overwhelm the GPU, especially with many frames.

5. **Inefficient Parallelization**: Text-to-video generation may not fully utilize the GPU’s parallel processing, causing slowdowns.

These factors could limit the performance when running the code.