# üé¨ Mochi Video Generator - Colab Edition

Generate videos using Genmo's Mochi model on Google Colab.

**Requirements:**
- Google Colab Pro/Pro+ recommended (for A100/V100 GPU with 40GB+ VRAM)
- Free Colab T4 (15GB) may work with aggressive memory optimization

‚ö†Ô∏è **First, set your runtime to GPU:** Runtime ‚Üí Change runtime type ‚Üí GPU (A100 recommended)

In [None]:
# Check GPU
!nvidia-smi

In [None]:
# Install dependencies
!pip install -q torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121
!pip install -q diffusers transformers accelerate sentencepiece protobuf gradio hf_transfer

In [None]:
import os
# Enable fast downloads
os.environ["HF_HUB_ENABLE_HF_TRANSFER"] = "1"

import torch
from diffusers import MochiPipeline
from diffusers.utils import export_to_video

# Check GPU
print(f"üöÄ GPU: {torch.cuda.get_device_name(0)}")
print(f"   VRAM: {torch.cuda.get_device_properties(0).total_memory / 1024**3:.1f} GB")

In [None]:
# Load the Mochi pipeline
print("üì¶ Loading Mochi pipeline (this will download ~46GB on first run)...")

pipe = MochiPipeline.from_pretrained(
    "genmo/mochi-1-preview",
    torch_dtype=torch.bfloat16,
    variant="bf16",
)

# Memory optimization based on available VRAM
vram_gb = torch.cuda.get_device_properties(0).total_memory / 1024**3

if vram_gb >= 40:  # A100
    pipe = pipe.to("cuda")
    print("‚úÖ Full GPU mode (A100)")
elif vram_gb >= 24:  # L4/A10G
    pipe = pipe.to("cuda")
    pipe.enable_vae_tiling()
    print("‚úÖ GPU mode with VAE tiling")
else:  # T4 (15GB)
    pipe.enable_model_cpu_offload()
    pipe.enable_vae_tiling()
    pipe.enable_vae_slicing()
    print("‚úÖ CPU offload mode (lower VRAM GPU)")

print("üé¨ Ready to generate videos!")

In [None]:
# Generate a video
prompt = "A horse galloping through a Tamil village, cinematic lighting, natural motion"

print(f"üé¨ Generating video for: {prompt}")
print("‚è≥ This may take 2-10 minutes depending on GPU...")

with torch.inference_mode():
    frames = pipe(
        prompt,
        num_inference_steps=28,
        guidance_scale=3.5,
    ).frames[0]

# Save video
video_path = export_to_video(frames, "mochi_output.mp4", fps=30)
print(f"‚úÖ Video saved to: {video_path}")

In [None]:
# Display the video in Colab
from IPython.display import HTML
from base64 import b64encode

with open("mochi_output.mp4", "rb") as f:
    video_data = b64encode(f.read()).decode()

HTML(f'''
<video width="640" height="480" controls>
  <source src="data:video/mp4;base64,{video_data}" type="video/mp4">
</video>
''')

In [None]:
# Download the video
from google.colab import files
files.download("mochi_output.mp4")

## üé® Interactive Mode (Optional)
Run the cell below to launch a Gradio interface

In [None]:
import gradio as gr

def generate_video(prompt, steps=28, guidance=3.5):
    with torch.inference_mode():
        frames = pipe(prompt, num_inference_steps=int(steps), guidance_scale=guidance).frames[0]
    return export_to_video(frames, fps=30)

demo = gr.Interface(
    fn=generate_video,
    inputs=[
        gr.Textbox(label="Prompt", placeholder="Describe your video..."),
        gr.Slider(10, 50, value=28, step=1, label="Inference Steps"),
        gr.Slider(1.0, 10.0, value=3.5, step=0.5, label="Guidance Scale"),
    ],
    outputs=gr.Video(label="Generated Video"),
    title="üé¨ Mochi Video Generator",
)

demo.launch(share=True, debug=True)