<a href="https://colab.research.google.com/github/KaifAhmad1/deepfake/blob/main/LMDeploy_prompt_to_video_generation_test.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
%pip install lmdeploy diffusers transformers torch accelerate optimum-quanto torchao --no-cache-dir --quiet

[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/87.2 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m87.2/87.2 kB[0m [31m128.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m147.5 MB/s[0m eta [36m0:00:00[0m
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m46.8/46.8 kB[0m [31m150.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m110.1/110.1 MB[0m [31m188.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m363.4/363.4 MB[0m [31m155.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m13.8/13.8 MB[0m [31m170.8 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m24.6/24.6 MB[0m [31m172.3 MB/s[0m eta

In [3]:
import os
import time
import torch
from lmdeploy import pipeline
from diffusers.utils import export_to_video
from pathlib import Path
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')
OUTPUT_DIR = Path("/content/drive/MyDrive/visiomarket_videos")
OUTPUT_DIR.mkdir(exist_ok=True)

# Install dependencies
print("Installing dependencies...")
os.system("pip install lmdeploy diffusers transformers torch accelerate optimum-quanto torchao --quiet")

# Detailed prompt
PROMPT = (
    "A pair of white sneakers rotates on a sleek black pedestal in a minimalist studio, "
    "illuminated by dynamic spotlights that shift colors from blue to pink. The camera orbits smoothly, "
    "emphasizing the sneakers’ design, with a modern and vibrant aesthetic."
)

# Model configuration
MODEL_ID = "Lightricks/LTX-Video"
QUANTIZE = 8  # FP8 quantization
TORCH_DTYPE = torch.float16
RESOLUTION = (1216, 704)

# Function to measure VRAM usage
def get_vram_usage():
    return torch.cuda.memory_allocated() / 1e9  # GB

Mounted at /content/drive
Installing dependencies...


In [5]:
# Generate video
def generate_video():
    print("Starting video generation for LTX-Video...")
    start_time = time.time()
    start_vram = get_vram_usage()

    try:
        # Load pipeline
        print("Loading LTX-Video pipeline...")
        pipe = pipeline(
            MODEL_ID,
            torch_dtype=TORCH_DTYPE,
            quantize=QUANTIZE,
            trust_remote_code=True
        )
        pipe.enable_model_cpu_offload()
        pipe.vae.enable_tiling()

        # Generate video
        print("Generating video...")
        video = pipe(
            prompt=PROMPT,
            num_frames=49,
            fps=24,
            guidance_scale=6.0,
            num_inference_steps=30,  # Reduced for speed
            generator=torch.Generator(device="cuda").manual_seed(42)
        ).frames[0]

        # Save video
        output_path = OUTPUT_DIR / "ltx_test.mp4"
        print(f"Saving video to {output_path}...")
        export_to_video(video, str(output_path), fps=24)

        # Metrics
        end_time = time.time()
        end_vram = get_vram_usage()
        inference_time = end_time - start_time
        vram_usage = end_vram - start_vram
        cost = 0.20  # Estimated cost per 5s clip

        # Qualitative notes
        notes = (
            "Expected: Smooth orbiting motion, vibrant spotlight colors, and clear sneaker design. "
            "Check for dynamic lighting transitions and minimalist studio aesthetic."
        )

        return {
            "model": "LTX-Video",
            "success": True,
            "inference_time": inference_time,
            "vram_usage": vram_usage,
            "output_path": str(output_path),
            "cost": cost,
            "notes": notes
        }
    except Exception as e:
        print(f"LTX-Video failed: {str(e)}")
        return {
            "model": "LTX-Video",
            "success": False,
            "error": str(e)
        }
    finally:
        print("Cleaning up VRAM...")
        torch.cuda.empty_cache()

In [6]:
# Run pipeline
if __name__ == "__main__":
    print("Running LTX-Video pipeline...")
    result = generate_video()

    print("\n=== Result ===")
    if result["success"]:
        print(
            f"Model: {result['model']}\n"
            f"Time: {result['inference_time']:.2f}s\n"
            f"VRAM: {result['vram_usage']:.2f}GB\n"
            f"Output: {result['output_path']}\n"
            f"Cost: ${result['cost']:.2f}\n"
            f"Notes: {result['notes']}"
        )
    else:
        print(f"Model: {result['model']}\nError: {result['error']}")

    print("Video saved to Google Drive: /content/drive/MyDrive/visiomarket_videos")

Running LTX-Video pipeline...
Starting video generation for LTX-Video...
Loading LTX-Video pipeline...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


Fetching 47 files:   0%|          | 0/47 [00:00<?, ?it/s]

ltx-video-2b-v0.9.5.safetensors:   0%|          | 0.00/6.34G [00:00<?, ?B/s]

ltx-video-2b-v0.9.1.safetensors:   0%|          | 0.00/5.72G [00:00<?, ?B/s]

ltx-video-2b-v0.9.safetensors:   0%|          | 0.00/9.37G [00:00<?, ?B/s]

.gitattributes:   0%|          | 0.00/2.74k [00:00<?, ?B/s]

ltx-video-2b-v0.9.5.license.txt:   0%|          | 0.00/16.1k [00:00<?, ?B/s]

ltx-video-2b-v0.9.license.txt:   0%|          | 0.00/16.0k [00:00<?, ?B/s]

README.md:   0%|          | 0.00/19.7k [00:00<?, ?B/s]

ltxv-2b-0.9.6-dev-04-25.safetensors:   0%|          | 0.00/6.34G [00:00<?, ?B/s]

(…)txv-2b-0.9.6-distilled-04-25.safetensors:   0%|          | 0.00/6.34G [00:00<?, ?B/s]

ltxv-2b-0.9.6-dev-04-25.license.txt:   0%|          | 0.00/14.4k [00:00<?, ?B/s]

ltx-video-2b-v0.9.1.license.txt:   0%|          | 0.00/16.0k [00:00<?, ?B/s]

(…)txv-2b-0.9.6-distilled-04-25.license.txt:   0%|          | 0.00/14.4k [00:00<?, ?B/s]

ltx-video_example_00001.gif:   0%|          | 0.00/7.96M [00:00<?, ?B/s]

ltx-video_example_00002.gif:   0%|          | 0.00/7.90M [00:00<?, ?B/s]

ltx-video_example_00003.gif:   0%|          | 0.00/4.43M [00:00<?, ?B/s]

ltx-video_example_00005.gif:   0%|          | 0.00/5.73M [00:00<?, ?B/s]

ltx-video_example_00004.gif:   0%|          | 0.00/6.71M [00:00<?, ?B/s]

ltx-video_example_00006.gif:   0%|          | 0.00/4.24M [00:00<?, ?B/s]

ltx-video_example_00008.gif:   0%|          | 0.00/6.23M [00:00<?, ?B/s]

ltx-video_example_00007.gif:   0%|          | 0.00/7.83M [00:00<?, ?B/s]

ltx-video_example_00009.gif:   0%|          | 0.00/6.39M [00:00<?, ?B/s]

ltx-video_example_00010.gif:   0%|          | 0.00/6.19M [00:00<?, ?B/s]

ltx-video_example_00011.gif:   0%|          | 0.00/5.35M [00:00<?, ?B/s]

ltx-video_example_00012.gif:   0%|          | 0.00/7.47M [00:00<?, ?B/s]

ltx-video_example_00013.gif:   0%|          | 0.00/9.02M [00:00<?, ?B/s]

ltx-video_example_00014.gif:   0%|          | 0.00/7.43M [00:00<?, ?B/s]

ltx-video_example_00015.gif:   0%|          | 0.00/6.56M [00:00<?, ?B/s]

ltx-video_example_00016.gif:   0%|          | 0.00/7.41M [00:00<?, ?B/s]

trailer.gif:   0%|          | 0.00/147M [00:00<?, ?B/s]

model_index.json:   0%|          | 0.00/412 [00:00<?, ?B/s]

scheduler_config.json:   0%|          | 0.00/418 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/781 [00:00<?, ?B/s]

model-00001-of-00004.safetensors:   0%|          | 0.00/4.99G [00:00<?, ?B/s]

model-00002-of-00004.safetensors:   0%|          | 0.00/5.00G [00:00<?, ?B/s]

model-00003-of-00004.safetensors:   0%|          | 0.00/4.87G [00:00<?, ?B/s]

model-00004-of-00004.safetensors:   0%|          | 0.00/4.19G [00:00<?, ?B/s]

model.safetensors.index.json:   0%|          | 0.00/19.9k [00:00<?, ?B/s]

added_tokens.json:   0%|          | 0.00/2.59k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/2.54k [00:00<?, ?B/s]

spiece.model:   0%|          | 0.00/792k [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/20.6k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/500 [00:00<?, ?B/s]

(…)pytorch_model-00001-of-00002.safetensors:   0%|          | 0.00/4.94G [00:00<?, ?B/s]

(…)pytorch_model-00002-of-00002.safetensors:   0%|          | 0.00/2.75G [00:00<?, ?B/s]

(…)ion_pytorch_model.safetensors.index.json:   0%|          | 0.00/72.1k [00:00<?, ?B/s]

config.json:   0%|          | 0.00/501 [00:00<?, ?B/s]

diffusion_pytorch_model.safetensors:   0%|          | 0.00/1.68G [00:00<?, ?B/s]

The argument `trust_remote_code` is to be used with Auto classes. It has no effect here and is ignored.


LTX-Video failed: Could not find model architecture from config: {'return_dict': True, 'output_hidden_states': False, 'output_attentions': False, 'torchscript': False, 'torch_dtype': None, 'use_bfloat16': False, 'tf_legacy_loss': False, 'pruned_heads': {}, 'tie_word_embeddings': True, 'chunk_size_feed_forward': 0, 'is_encoder_decoder': False, 'is_decoder': False, 'cross_attention_hidden_size': None, 'add_cross_attention': False, 'tie_encoder_decoder': False, 'max_length': 20, 'min_length': 0, 'do_sample': False, 'early_stopping': False, 'num_beams': 1, 'num_beam_groups': 1, 'diversity_penalty': 0.0, 'temperature': 1.0, 'top_k': 50, 'top_p': 1.0, 'typical_p': 1.0, 'repetition_penalty': 1.0, 'length_penalty': 1.0, 'no_repeat_ngram_size': 0, 'encoder_no_repeat_ngram_size': 0, 'bad_words_ids': None, 'num_return_sequences': 1, 'output_scores': False, 'return_dict_in_generate': False, 'forced_bos_token_id': None, 'forced_eos_token_id': None, 'remove_invalid_values': False, 'exponential_decay