In [None]:
!pip install --quiet --upgrade pip
!pip install --quiet diffusers transformers accelerate torch torchvision imageio imageio-ffmpeg
!pip install --quiet flask flask-cors
!pip install --quiet pyngrok
!pip install --quiet requests
!pip install --quiet numpy

from flask import Flask, request, jsonify, send_file
import base64
from io import BytesIO
from pyngrok import ngrok
from flask_cors import CORS
import torch
from diffusers import CogVideoXPipeline
from diffusers.utils import export_to_video
import gc
import os
import uuid
import time
import sys
import json
import logging
import warnings
import tempfile
import requests
from typing import Dict, Optional

warnings.filterwarnings('ignore')

# Configure logging for Angelo's T2V service
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s [%(levelname)s] %(message)s',
    handlers=[
        logging.FileHandler("t2v_service.log"),
        logging.StreamHandler(sys.stdout)
    ]
)
logger = logging.getLogger(__name__)

print("🎬 Initializing CogVideoX T2V service for Angelo's Reel Generator...")
device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {device}")
print(f"GPU: {torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'CPU only'}")

# Global pipeline variable
pipe = None

def initialize_pipeline():
    """Initialize CogVideoX pipeline - expensive operation done once (no test generation)"""
    global pipe

    if pipe is not None:
        print("✅ Pipeline already initialized")
        return

    print("⏳ Loading CogVideoX-2B model... (this may take several minutes)")
    start_time = time.time()

    try:
        pipe = CogVideoXPipeline.from_pretrained(
            "THUDM/CogVideoX-2b",
            torch_dtype=torch.float16,
        )

        # Enable optimizations for limited GPU memory
        pipe.enable_sequential_cpu_offload()
        pipe.vae.enable_slicing()
        pipe.vae.enable_tiling()

        load_time = time.time() - start_time
        print(f"✅ CogVideoX model loaded successfully in {load_time:.2f}s")

        # Clear any residual memory and proceed (no test generation)
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()

    except Exception as e:
        print(f"❌ Failed to initialize pipeline: {e}")
        raise


def generate_video_from_prompt(prompt: str, request_id: str, **kwargs) -> str:
    """
    Generate video from text prompt using CogVideoX
    Returns path to generated video file
    """
    start_time = time.time()
    print(f"[T2V-{request_id}] Starting video generation")
    print(f"[T2V-{request_id}] Prompt: '{prompt}'")

    # Set default parameters optimized for business content
    params = {
        "prompt": prompt,
        "negative_prompt": kwargs.get("negative_prompt", "blurry, low quality, distorted, text, watermark, blank, black, white"),
        "num_videos_per_prompt": 1,
        "num_inference_steps": kwargs.get("num_inference_steps", 30),  # Reduced for speed
        "num_frames": kwargs.get("num_frames", 41),  # ~3 seconds at 8fps
        "guidance_scale": kwargs.get("guidance_scale", 6.5),
        "generator": torch.Generator().manual_seed(kwargs.get("seed", 42)),
    }

    print(f"[T2V-{request_id}] Generation parameters:")
    for key, value in params.items():
        if key != "generator":
            print(f"  {key}: {value}")

    try:
        if torch.cuda.is_available():
            memory_before = torch.cuda.memory_allocated()
            print(f"[T2V-{request_id}] GPU memory before: {memory_before/1024**2:.2f} MB")

        # Generate video
        generation_start = time.time()
        print(f"[T2V-{request_id}] Starting generation (this may take 5-10 minutes on T4)...")

        video = pipe(**params).frames[0]

        generation_time = time.time() - generation_start
        print(f"[T2V-{request_id}] ✅ Video generation completed in {generation_time:.2f}s")

        # Save video to temporary file
        output_path = f"temp_video_{request_id}.mp4"
        export_to_video(video, output_path, fps=8)

        # Verify file was created and get size
        if os.path.exists(output_path):
            file_size = os.path.getsize(output_path)
            print(f"[T2V-{request_id}] ✅ Video saved: {output_path} ({file_size/1024:.2f} KB)")
        else:
            raise Exception("Video file was not created")

        if torch.cuda.is_available():
            memory_after = torch.cuda.memory_allocated()
            print(f"[T2V-{request_id}] GPU memory after: {memory_after/1024**2:.2f} MB")

        total_time = time.time() - start_time
        print(f"[T2V-{request_id}] ✓ Total processing time: {total_time:.2f}s")

        return output_path

    except Exception as e:
        print(f"[T2V-{request_id}] ❌ Generation failed: {str(e)}")
        raise
    finally:
        # Always clean up GPU memory
        if torch.cuda.is_available():
            torch.cuda.empty_cache()
        gc.collect()
        print(f"[T2V-{request_id}] Memory cleanup completed")

# Initialize the pipeline on startup
initialize_pipeline()

# Flask app for ngrok exposure
app = Flask(__name__)
CORS(app)

@app.route('/generate_video', methods=["POST"])
def generate_video():
    """Generate video from text prompt for Angelo's reel generation pipeline"""
    request_id = str(uuid.uuid4())[:8]
    start_time = time.time()

    print(f"[T2V-SERVICE-{request_id}] Received video generation request")

    data = request.json
    if 'prompt' not in data:
        print(f"[T2V-SERVICE-{request_id}] ERROR: No prompt in request")
        return jsonify({"error": "No prompt provided"}), 400

    prompt = data['prompt']
    print(f"[T2V-SERVICE-{request_id}] Prompt: '{prompt}'")

    # Optional parameters
    optional_params = {
        "negative_prompt": data.get('negative_prompt'),
        "num_inference_steps": data.get('num_inference_steps'),
        "num_frames": data.get('num_frames'),
        "guidance_scale": data.get('guidance_scale'),
        "seed": data.get('seed')
    }

    # Remove None values
    optional_params = {k: v for k, v in optional_params.items() if v is not None}

    try:
        # Generate video
        video_path = generate_video_from_prompt(prompt, request_id, **optional_params)

        # Read video file and encode to base64
        print(f"[T2V-SERVICE-{request_id}] Encoding video to base64...")
        with open(video_path, "rb") as video_file:
            video_bytes = video_file.read()
            video_base64 = base64.b64encode(video_bytes).decode('utf-8')

        # Get video metadata
        file_size = len(video_bytes)

        result = {
            "video_data": video_base64,
            "prompt": prompt,
            "file_size_bytes": file_size,
            "file_size_kb": round(file_size / 1024, 2),
            "processing_time_seconds": round(time.time() - start_time, 2),
            "request_id": request_id
        }

        print(f"[T2V-SERVICE-{request_id}] ✅ Video generation complete")
        print(f"[T2V-SERVICE-{request_id}] File size: {file_size/1024:.2f} KB")
        print(f"[T2V-SERVICE-{request_id}] Total time: {result['processing_time_seconds']}s")

        return jsonify(result)

    except Exception as e:
        print(f"[T2V-SERVICE-{request_id}] ❌ ERROR: {str(e)}")
        return jsonify({
            "error": str(e),
            "request_id": request_id,
            "processing_time_seconds": round(time.time() - start_time, 2)
        }), 500

    finally:
        # Cleanup temporary video file
        try:
            if 'video_path' in locals() and os.path.exists(video_path):
                os.remove(video_path)
                print(f"[T2V-SERVICE-{request_id}] Temp video file cleaned up")
        except Exception as e:
            print(f"[T2V-SERVICE-{request_id}] Warning: Could not clean up temp file: {e}")

@app.route('/status', methods=["GET"])
def status():
    """Check if T2V service is ready"""
    global pipe
    return jsonify({
        "status": "ready" if pipe is not None else "initializing",
        "device": device,
        "gpu_available": torch.cuda.is_available(),
        "gpu_name": torch.cuda.get_device_name(0) if torch.cuda.is_available() else None
    })

# Ngrok setup for external access
def create_t2v_service():
    """Create ngrok tunnel for Angelo's T2V service"""
    try:
        print("🌐 Setting up ngrok tunnel for T2V service...")
        ngrok.set_auth_token("")
        public_url = ngrok.connect(5001).public_url

        print(f"✅ T2V service available at: {public_url}")
        print(f"   Generate endpoint: {public_url}/generate_video")
        print(f"   Status endpoint: {public_url}/status")
        print(f"   Use this URL in your T2V_NGROK_URL environment variable")

        return app, public_url

    except Exception as e:
        print(f"❌ Ngrok setup failed: {e}")
        return None, None

# Start the service
print("🚀 Starting T2V service for Angelo's Reel Generator...")
if __name__ == '__main__':
    app, url = create_t2v_service()
    if app:
        print("🎬 T2V service is ready for reel generation!")
        print("⚠️  WARNING: T2V generation is SLOW on T4 GPU (5-10 minutes per video)")
        print("💡 Recommend using Pexels for production, T2V for unique content only")
        app.run(port=5001, debug=False)
    else:
        print("❌ Failed to start T2V service")

🎬 Initializing CogVideoX T2V service for Angelo's Reel Generator...
Using device: cuda
GPU: Tesla T4
⏳ Loading CogVideoX-2B model... (this may take several minutes)


Loading pipeline components...:   0%|          | 0/5 [00:00<?, ?it/s]

Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]

✅ CogVideoX model loaded successfully in 13.65s
🚀 Starting T2V service for Angelo's Reel Generator...
🌐 Setting up ngrok tunnel for T2V service...
✅ T2V service available at: https://eda5b54c7ec4.ngrok-free.app
   Generate endpoint: https://eda5b54c7ec4.ngrok-free.app/generate_video
   Status endpoint: https://eda5b54c7ec4.ngrok-free.app/status
   Use this URL in your T2V_NGROK_URL environment variable
🎬 T2V service is ready for reel generation!
💡 Recommend using Pexels for production, T2V for unique content only
 * Serving Flask app '__main__'
 * Debug mode: off


 * Running on http://127.0.0.1:5001
INFO:werkzeug:[33mPress CTRL+C to quit[0m
INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 14:57:34] "GET /status HTTP/1.1" 200 -


[T2V-SERVICE-2a43076f] Received video generation request
[T2V-SERVICE-2a43076f] Prompt: 'Professional businesswoman presenting financial charts to executive team in bright conference room'
[T2V-2a43076f] Starting video generation
[T2V-2a43076f] Prompt: 'Professional businesswoman presenting financial charts to executive team in bright conference room'
[T2V-2a43076f] Generation parameters:
  prompt: Professional businesswoman presenting financial charts to executive team in bright conference room
  negative_prompt: blurry, low quality, distorted, text, watermark, amateur, unprofessional
  num_videos_per_prompt: 1
  num_inference_steps: 20
  num_frames: 25
  guidance_scale: 6.5
[T2V-2a43076f] GPU memory before: 260.39 MB
[T2V-2a43076f] Starting generation (this may take 5-10 minutes on T4)...


  0%|          | 0/20 [00:00<?, ?it/s]

[T2V-2a43076f] ✅ Video generation completed in 247.52s
[T2V-2a43076f] ✅ Video saved: temp_video_2a43076f.mp4 (2.70 KB)
[T2V-2a43076f] GPU memory after: 268.52 MB
[T2V-2a43076f] ✓ Total processing time: 248.55s


INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 15:01:54] "POST /generate_video HTTP/1.1" 200 -


[T2V-2a43076f] Memory cleanup completed
[T2V-SERVICE-2a43076f] Encoding video to base64...
[T2V-SERVICE-2a43076f] ✅ Video generation complete
[T2V-SERVICE-2a43076f] File size: 2.70 KB
[T2V-SERVICE-2a43076f] Total time: 248.94s
[T2V-SERVICE-2a43076f] Temp video file cleaned up
[T2V-SERVICE-caceea1c] Received video generation request
[T2V-SERVICE-caceea1c] Prompt: 'Two business executives shaking hands across modern glass meeting table'
[T2V-caceea1c] Starting video generation
[T2V-caceea1c] Prompt: 'Two business executives shaking hands across modern glass meeting table'
[T2V-caceea1c] Generation parameters:
  prompt: Two business executives shaking hands across modern glass meeting table
  negative_prompt: blurry, low quality, distorted, text, watermark, amateur, unprofessional
  num_videos_per_prompt: 1
  num_inference_steps: 20
  num_frames: 25
  guidance_scale: 6.5
[T2V-caceea1c] GPU memory before: 268.52 MB
[T2V-caceea1c] Starting generation (this may take 5-10 minutes on T4)...


  0%|          | 0/20 [00:00<?, ?it/s]

[T2V-caceea1c] ✅ Video generation completed in 229.98s
[T2V-caceea1c] ✅ Video saved: temp_video_caceea1c.mp4 (38.89 KB)
[T2V-caceea1c] GPU memory after: 268.52 MB
[T2V-caceea1c] ✓ Total processing time: 230.83s
[T2V-caceea1c] Memory cleanup completed
[T2V-SERVICE-caceea1c] Encoding video to base64...


INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 15:05:46] "POST /generate_video HTTP/1.1" 200 -


[T2V-SERVICE-caceea1c] ✅ Video generation complete
[T2V-SERVICE-caceea1c] File size: 38.89 KB
[T2V-SERVICE-caceea1c] Total time: 231.31s
[T2V-SERVICE-caceea1c] Temp video file cleaned up
[T2V-SERVICE-fc17817b] Received video generation request
[T2V-SERVICE-fc17817b] Prompt: 'Corporate team collaborating on project around whiteboard in contemporary office'
[T2V-fc17817b] Starting video generation
[T2V-fc17817b] Prompt: 'Corporate team collaborating on project around whiteboard in contemporary office'
[T2V-fc17817b] Generation parameters:
  prompt: Corporate team collaborating on project around whiteboard in contemporary office
  negative_prompt: blurry, low quality, distorted, text, watermark, amateur, unprofessional
  num_videos_per_prompt: 1
  num_inference_steps: 20
  num_frames: 25
  guidance_scale: 6.5
[T2V-fc17817b] GPU memory before: 268.52 MB
[T2V-fc17817b] Starting generation (this may take 5-10 minutes on T4)...


  0%|          | 0/20 [00:00<?, ?it/s]

[T2V-fc17817b] ✅ Video generation completed in 230.42s
[T2V-fc17817b] ✅ Video saved: temp_video_fc17817b.mp4 (53.92 KB)
[T2V-fc17817b] GPU memory after: 268.52 MB
[T2V-fc17817b] ✓ Total processing time: 231.01s


INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 15:09:39] "POST /generate_video HTTP/1.1" 200 -


[T2V-fc17817b] Memory cleanup completed
[T2V-SERVICE-fc17817b] Encoding video to base64...
[T2V-SERVICE-fc17817b] ✅ Video generation complete
[T2V-SERVICE-fc17817b] File size: 53.92 KB
[T2V-SERVICE-fc17817b] Total time: 231.4s
[T2V-SERVICE-fc17817b] Temp video file cleaned up
[T2V-SERVICE-81ce41c0] Received video generation request
[T2V-SERVICE-81ce41c0] Prompt: 'Business manager reviewing documents with client in professional consultation setting'
[T2V-81ce41c0] Starting video generation
[T2V-81ce41c0] Prompt: 'Business manager reviewing documents with client in professional consultation setting'
[T2V-81ce41c0] Generation parameters:
  prompt: Business manager reviewing documents with client in professional consultation setting
  negative_prompt: blurry, low quality, distorted, text, watermark, amateur, unprofessional
  num_videos_per_prompt: 1
  num_inference_steps: 20
  num_frames: 25
  guidance_scale: 6.5
[T2V-81ce41c0] GPU memory before: 268.52 MB
[T2V-81ce41c0] Starting generatio

  0%|          | 0/20 [00:00<?, ?it/s]

[T2V-81ce41c0] ✅ Video generation completed in 230.26s
[T2V-81ce41c0] ✅ Video saved: temp_video_81ce41c0.mp4 (33.17 KB)
[T2V-81ce41c0] GPU memory after: 268.52 MB
[T2V-81ce41c0] ✓ Total processing time: 230.85s


INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 15:13:32] "POST /generate_video HTTP/1.1" 200 -


[T2V-81ce41c0] Memory cleanup completed
[T2V-SERVICE-81ce41c0] Encoding video to base64...
[T2V-SERVICE-81ce41c0] ✅ Video generation complete
[T2V-SERVICE-81ce41c0] File size: 33.17 KB
[T2V-SERVICE-81ce41c0] Total time: 231.25s
[T2V-SERVICE-81ce41c0] Temp video file cleaned up
[T2V-SERVICE-2579539b] Received video generation request
[T2V-SERVICE-2579539b] Prompt: 'Executive giving presentation using digital display in modern boardroom'
[T2V-2579539b] Starting video generation
[T2V-2579539b] Prompt: 'Executive giving presentation using digital display in modern boardroom'
[T2V-2579539b] Generation parameters:
  prompt: Executive giving presentation using digital display in modern boardroom
  negative_prompt: blurry, low quality, distorted, text, watermark, amateur, unprofessional
  num_videos_per_prompt: 1
  num_inference_steps: 20
  num_frames: 25
  guidance_scale: 6.5
[T2V-2579539b] GPU memory before: 268.52 MB
[T2V-2579539b] Starting generation (this may take 5-10 minutes on T4)...


  0%|          | 0/20 [00:00<?, ?it/s]

[T2V-2579539b] ✅ Video generation completed in 229.65s
[T2V-2579539b] ✅ Video saved: temp_video_2579539b.mp4 (57.05 KB)
[T2V-2579539b] GPU memory after: 268.52 MB
[T2V-2579539b] ✓ Total processing time: 230.27s


INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 15:17:24] "POST /generate_video HTTP/1.1" 200 -


[T2V-2579539b] Memory cleanup completed
[T2V-SERVICE-2579539b] Encoding video to base64...
[T2V-SERVICE-2579539b] ✅ Video generation complete
[T2V-SERVICE-2579539b] File size: 57.05 KB
[T2V-SERVICE-2579539b] Total time: 230.66s
[T2V-SERVICE-2579539b] Temp video file cleaned up
[T2V-SERVICE-e22c4191] Received video generation request
[T2V-SERVICE-e22c4191] Prompt: 'Professional team celebrating successful deal with congratulations in office environment'
[T2V-e22c4191] Starting video generation
[T2V-e22c4191] Prompt: 'Professional team celebrating successful deal with congratulations in office environment'
[T2V-e22c4191] Generation parameters:
  prompt: Professional team celebrating successful deal with congratulations in office environment
  negative_prompt: blurry, low quality, distorted, text, watermark, amateur, unprofessional
  num_videos_per_prompt: 1
  num_inference_steps: 20
  num_frames: 25
  guidance_scale: 6.5
[T2V-e22c4191] GPU memory before: 268.52 MB
[T2V-e22c4191] Starting

  0%|          | 0/20 [00:00<?, ?it/s]

[T2V-e22c4191] ✅ Video generation completed in 229.95s
[T2V-e22c4191] ✅ Video saved: temp_video_e22c4191.mp4 (23.92 KB)
[T2V-e22c4191] GPU memory after: 268.52 MB
[T2V-e22c4191] ✓ Total processing time: 230.50s


INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 15:21:16] "POST /generate_video HTTP/1.1" 200 -


[T2V-e22c4191] Memory cleanup completed
[T2V-SERVICE-e22c4191] Encoding video to base64...
[T2V-SERVICE-e22c4191] ✅ Video generation complete
[T2V-SERVICE-e22c4191] File size: 23.92 KB
[T2V-SERVICE-e22c4191] Total time: 230.91s
[T2V-SERVICE-e22c4191] Temp video file cleaned up
[T2V-SERVICE-04a88833] Received video generation request
[T2V-SERVICE-04a88833] Prompt: 'Business advisor explaining strategy to colleagues around conference table'
[T2V-04a88833] Starting video generation
[T2V-04a88833] Prompt: 'Business advisor explaining strategy to colleagues around conference table'
[T2V-04a88833] Generation parameters:
  prompt: Business advisor explaining strategy to colleagues around conference table
  negative_prompt: blurry, low quality, distorted, text, watermark, amateur, unprofessional
  num_videos_per_prompt: 1
  num_inference_steps: 20
  num_frames: 25
  guidance_scale: 6.5
[T2V-04a88833] GPU memory before: 268.52 MB
[T2V-04a88833] Starting generation (this may take 5-10 minutes on

  0%|          | 0/20 [00:00<?, ?it/s]

[T2V-04a88833] ✅ Video generation completed in 246.20s
[T2V-04a88833] ✅ Video saved: temp_video_04a88833.mp4 (87.47 KB)
[T2V-04a88833] GPU memory after: 268.52 MB
[T2V-04a88833] ✓ Total processing time: 246.89s


INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 15:25:24] "POST /generate_video HTTP/1.1" 200 -


[T2V-04a88833] Memory cleanup completed
[T2V-SERVICE-04a88833] Encoding video to base64...
[T2V-SERVICE-04a88833] ✅ Video generation complete
[T2V-SERVICE-04a88833] File size: 87.47 KB
[T2V-SERVICE-04a88833] Total time: 247.3s
[T2V-SERVICE-04a88833] Temp video file cleaned up
[T2V-SERVICE-a155b561] Received video generation request
[T2V-SERVICE-a155b561] Prompt: 'Corporate professionals discussing plans in bright modern workspace'
[T2V-a155b561] Starting video generation
[T2V-a155b561] Prompt: 'Corporate professionals discussing plans in bright modern workspace'
[T2V-a155b561] Generation parameters:
  prompt: Corporate professionals discussing plans in bright modern workspace
  negative_prompt: blurry, low quality, distorted, text, watermark, amateur, unprofessional
  num_videos_per_prompt: 1
  num_inference_steps: 20
  num_frames: 25
  guidance_scale: 6.5
[T2V-a155b561] GPU memory before: 268.52 MB
[T2V-a155b561] Starting generation (this may take 5-10 minutes on T4)...


  0%|          | 0/20 [00:00<?, ?it/s]

[T2V-a155b561] ✅ Video generation completed in 245.50s
[T2V-a155b561] ✅ Video saved: temp_video_a155b561.mp4 (21.96 KB)
[T2V-a155b561] GPU memory after: 268.52 MB
[T2V-a155b561] ✓ Total processing time: 246.03s


INFO:werkzeug:127.0.0.1 - - [07/Nov/2025 15:29:32] "POST /generate_video HTTP/1.1" 200 -


[T2V-a155b561] Memory cleanup completed
[T2V-SERVICE-a155b561] Encoding video to base64...
[T2V-SERVICE-a155b561] ✅ Video generation complete
[T2V-SERVICE-a155b561] File size: 21.96 KB
[T2V-SERVICE-a155b561] Total time: 246.43s
[T2V-SERVICE-a155b561] Temp video file cleaned up
