In [1]:
!pip install diffusers



In [2]:
!pip install transformers



In [3]:
!pip install accelerate



In [4]:
!pip install safetensors



In [5]:
from diffusers import StableDiffusionPipeline
import torch
import cv2
import os

# Initialize model (Colab-specific config)
pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5",
    torch_dtype=torch.float16,
    use_safetensors=True
).to("cuda")

def generate_frame(prompt, frame_number, output_path="/content/frames"):
    os.makedirs(output_path, exist_ok=True)
    generator = torch.Generator("cuda").manual_seed(42 + frame_number)

    image = pipe(
        prompt,
        generator=generator,
        height=512,
        width=768,
        num_inference_steps=25
    ).images[0]

    image.save(f"{output_path}/frame_{frame_number:04d}.png")

def create_video(output_path="generated_video.mp4"):
    frame_files = sorted([f for f in os.listdir("/content/frames") if f.endswith('.png')])

    img = cv2.imread(f"/content/frames/{frame_files[0]}")
    height, width = img.shape[:2]

    fourcc = cv2.VideoWriter_fourcc(*'mp4v')
    video = cv2.VideoWriter(output_path, fourcc, 24.0, (width, height))

    for frame in frame_files:
        video.write(cv2.imread(f"/content/frames/{frame}"))

    video.release()
    return output_path

# Run generation
prompt = "A fox wearing glasses reading a book under a magical tree"  # Replace with your prompt
num_frames = 24 # For 1 second video

for i in range(num_frames):
    generate_frame(prompt, i)

video_path = create_video()
print(f"Video created: {video_path}")


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

  0%|          | 0/25 [00:00<?, ?it/s]

Video created: generated_video.mp4


In [6]:
from diffusers import StableDiffusionPipeline, StableVideoDiffusionPipeline
import torch
import tempfile
from PIL import Image
import imageio
import os
import cv2

# Load video diffusion model
model_id = "stabilityai/stable-video-diffusion-img2vid"

pipe = StableVideoDiffusionPipeline.from_pretrained(
    model_id,
    torch_dtype=torch.float16,
    variant="fp16"
).to("cuda")

pipe.enable_model_cpu_offload()

# Load image generation pipeline
img_pipe = StableDiffusionPipeline.from_pretrained(
    "runwayml/stable-diffusion-v1-5", torch_dtype=torch.float16
).to("cuda")

# Prompt from user
prompt = input("Enter your text prompt: ")  # Example: "A panda playing in the forest"

# Step 1: Generate image from text prompt
print("Generating base image...")
image = img_pipe(prompt).images[0]

# Step 2: Generate video frames from image
print("Generating video frames...")
video_frames = pipe(image, decode_chunk_size=8, generator=torch.manual_seed(42)).frames[0]

# Step 3: Save frames as MP4 video (~10 seconds)
output_video_path = "text_to_video_output.mp4"
fps = len(video_frames) / 10  # Stretch to 10 seconds

print(f"Saving video at {fps:.2f} FPS...")
imageio.mimwrite(output_video_path, video_frames, fps=fps, codec='libx264', quality=8)

print(f"✅ Video saved as: {output_video_path}")


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading pipeline components...:   0%|          | 0/7 [00:00<?, ?it/s]

Enter your text prompt: Floating islands in the sky with waterfalls pouring into the clouds
Generating base image...


  0%|          | 0/50 [00:00<?, ?it/s]

Generating video frames...


  0%|          | 0/25 [00:00<?, ?it/s]

Saving video at 1.40 FPS...
✅ Video saved as: text_to_video_output.mp4
