In [None]:
!pip install --upgrade opencv-python transformers

Collecting transformers
  Downloading transformers-4.44.0-py3-none-any.whl.metadata (43 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m43.7/43.7 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
Downloading transformers-4.44.0-py3-none-any.whl (9.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m9.5/9.5 MB[0m [31m56.3 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.42.4
    Uninstalling transformers-4.42.4:
      Successfully uninstalled transformers-4.42.4
Successfully installed transformers-4.44.0


In [None]:
!pip install -U git+https://github.com/huggingface/diffusers.git
!pip install -U git+https://github.com/huggingface/accelerate.git

Collecting git+https://github.com/huggingface/diffusers.git
  Cloning https://github.com/huggingface/diffusers.git to /tmp/pip-req-build-13vj2813
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/diffusers.git /tmp/pip-req-build-13vj2813
  Resolved https://github.com/huggingface/diffusers.git to commit 0c1e63bd11a5746db8933111a962854fa9b36582
  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Building wheels for collected packages: diffusers
  Building wheel for diffusers (pyproject.toml) ... [?25l[?25hdone
  Created wheel for diffusers: filename=diffusers-0.31.0.dev0-py3-none-any.whl size=2658778 sha256=44dbbcaa79e15e86d02fa14a75e8d400f231a2f99a311c8223dfee68448fdb8e
  Stored in directory: /tmp/pip-ephem-wheel-cache-aqq5zkyl/wheels/4d/b7/a8/6f9549ceec5daad78675b857ac57d697c387062506520a7b50
Successfully built diffusers
Installing

Collecting git+https://github.com/huggingface/accelerate.git
  Cloning https://github.com/huggingface/accelerate.git to /tmp/pip-req-build-qqq_g10k
  Running command git clone --filter=blob:none --quiet https://github.com/huggingface/accelerate.git /tmp/pip-req-build-qqq_g10k
  Resolved https://github.com/huggingface/accelerate.git to commit a452327e8e04b20779882dc491e00de602d554cb
  Installing build dependencies ... [?25l[?25hcanceled[31mERROR: Operation cancelled by user[0m[31m
[0m

In [None]:
!pip install gradio

Collecting gradio
  Downloading gradio-4.41.0-py3-none-any.whl.metadata (15 kB)
Collecting aiofiles<24.0,>=22.0 (from gradio)
  Downloading aiofiles-23.2.1-py3-none-any.whl.metadata (9.7 kB)
Collecting fastapi (from gradio)
  Downloading fastapi-0.112.0-py3-none-any.whl.metadata (27 kB)
Collecting ffmpy (from gradio)
  Downloading ffmpy-0.4.0-py3-none-any.whl.metadata (2.9 kB)
Collecting gradio-client==1.3.0 (from gradio)
  Downloading gradio_client-1.3.0-py3-none-any.whl.metadata (7.1 kB)
Collecting httpx>=0.24.1 (from gradio)
  Downloading httpx-0.27.0-py3-none-any.whl.metadata (7.2 kB)
Collecting orjson~=3.0 (from gradio)
  Downloading orjson-3.10.7-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (50 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m50.4/50.4 kB[0m [31m3.7 MB/s[0m eta [36m0:00:00[0m
Collecting pydub (from gradio)
  Downloading pydub-0.25.1-py2.py3-none-any.whl.metadata (1.4 kB)
Collecting python-multipart>=0.0.9 (from gradi

In [None]:
import gc
import torch
import gradio as gr
from diffusers import CogVideoXPipeline, CogVideoXDDIMScheduler
from diffusers.utils import export_to_video

def reset_memory():
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_accumulated_memory_stats()
    torch.cuda.reset_peak_memory_stats()

def print_memory():
    memory = round(torch.cuda.memory_allocated() / 1024**3, 2)
    max_memory = round(torch.cuda.max_memory_allocated() / 1024**3, 2)
    max_reserved = round(torch.cuda.max_memory_reserved() / 1024**3, 2)
    print(f"{memory=} GB")
    print(f"{max_memory=} GB")
    print(f"{max_reserved=} GB")

pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=torch.float16)
pipe.scheduler = CogVideoXDDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
pipe.enable_sequential_cpu_offload()
pipe.vae.enable_tiling()

def generate_video(prompt, num_frames, progress=gr.Progress(track_tqdm=True)):
    reset_memory()

    with torch.cuda.amp.autocast():
        video = pipe(
            prompt=prompt,
            num_frames=num_frames,
            guidance_scale=6,
            num_inference_steps=30,
            generator=torch.Generator().manual_seed(42),
            callback=lambda i, t, latents: progress(i, t.shape[0])
        ).frames[0]

    print_memory()
    output_path = "output_video.mp4"
    export_to_video(video, output_path, fps=8)
    return output_path

with gr.Blocks() as demo:
    gr.Markdown(
        """
        # CogVideoX Video Generation

        Enter a detailed prompt to generate a video. CogVideoX works best with large, descriptive prompts.
        Avoid short or simple prompts for better results.

        Note: Video generation might take a few minutes. Please be patient.
        """
    )

    with gr.Row():
        prompt_input = gr.Textbox(label="Enter your detailed video prompt", lines=5)
        num_frames = gr.Dropdown(choices=[24, 48], value=24, label="Number of frames")

    generate_button = gr.Button("Generate Video")
    video_output = gr.Video(label="Generated Video")

    generate_button.click(
        generate_video,
        inputs=[prompt_input, num_frames],
        outputs=video_output
    )

demo.launch(share=True, debug=True)

The cache for model files in Transformers v4.22.0 has been updated. Migrating your old cache. This is a one-time only operation. You can interrupt this and resume the migration later on by calling `transformers.utils.move_cache()`.


0it [00:00, ?it/s]

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


model_index.json:   0%|          | 0.00/411 [00:00<?, ?B/s]

Fetching 14 files:   0%|          | 0/14 [00:00<?, ?it/s]

scheduler/scheduler_config.json:   0%|          | 0.00/482 [00:00<?, ?B/s]

(…)ext_encoder/model.safetensors.index.json:   0%|          | 0.00/19.9k [00:00<?, ?B/s]

tokenizer/added_tokens.json:   0%|          | 0.00/2.59k [00:00<?, ?B/s]

text_encoder/config.json:   0%|          | 0.00/799 [00:00<?, ?B/s]

tokenizer/special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

model-00002-of-00002.safetensors:   0%|          | 0.00/4.53G [00:00<?, ?B/s]

model-00001-of-00002.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer/tokenizer_config.json:   0%|          | 0.00/20.6k [00:00<?, ?B/s]

vae/config.json:   0%|          | 0.00/856 [00:00<?, ?B/s]

transformer/config.json:   0%|          | 0.00/713 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/3.39G [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/431M [00:00<?, ?B/s]

Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The config attributes {'mid_block_add_attention': True} were passed to AutoencoderKLCogVideoX, but are not expected and will be ignored. Please verify your config.json configuration file.


AttributeError: 'AutoencoderKLCogVideoX' object has no attribute 'enable_tiling'

## You do not have to run the cell below, it's just testing code if you don't want the gradio UI ;)

In [None]:
import gc
import torch
from diffusers import CogVideoXPipeline, CogVideoXDDIMScheduler
from diffusers.utils import export_to_video

def reset_memory():
    gc.collect()
    torch.cuda.empty_cache()
    torch.cuda.reset_accumulated_memory_stats()
    torch.cuda.reset_peak_memory_stats()

def print_memory():
    memory = round(torch.cuda.memory_allocated() / 1024**3, 2)
    max_memory = round(torch.cuda.max_memory_allocated() / 1024**3, 2)
    max_reserved = round(torch.cuda.max_memory_reserved() / 1024**3, 2)
    print(f"{memory=} GB")
    print(f"{max_memory=} GB")
    print(f"{max_reserved=} GB")

prompt = (
    "A cat walking through the forest sees a bear and gets scared and starts running through a muddy path that is along the middle of the forest , the path is in the middle and at both sides are trees, the bear is chasing the cat, we see the point of view of the cat as the camera is facing the cat and in the background we can see an angry bear chasing down the cat along the muddy path in the middle of the forest.	"
)

pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-2b", torch_dtype=torch.float16)
pipe.scheduler = CogVideoXDDIMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")

pipe.enable_sequential_cpu_offload()  # Changed from enable_model_cpu_offload
pipe.vae.enable_tiling()

reset_memory()

# Reduce video parameters
with torch.cuda.amp.autocast():  # Enable automatic mixed precision
    video = pipe(
        prompt=prompt,
        num_frames=48,  # Reduced from 48
        guidance_scale=6,
        num_inference_steps=30,  # Reduced from 50
        generator=torch.Generator().manual_seed(42)
    ).frames[0]

print_memory()
export_to_video(video, "output_tiling.mp4", fps=8)

The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

The config attributes {'mid_block_add_attention': True, 'sample_size': 256} were passed to AutoencoderKLCogVideoX, but are not expected and will be ignored. Please verify your config.json configuration file.


  0%|          | 0/30 [00:00<?, ?it/s]

memory=0.07 GB
max_memory=8.72 GB
max_reserved=10.48 GB


  self.pid = _posixsubprocess.fork_exec(


'output_tiling.mp4'