# üé¨ UVG MAX - Premium Video Generator

**With Fish-Speech (OpenAudio S1) TTS**

### How It Works:
1. **Cell 1**: Install Fish-Speech + download model (~5 min)
2. **Cell 2**: Generate TTS audio for all scenes
3. **Cell 3**: Generate video with the audio

---

In [None]:
#@title üîß Cell 1: Install Fish-Speech (Run Once)
#@markdown This takes ~5 minutes. Downloads the OpenAudio S1-mini model.

import os
import sys

# Clone Fish-Speech
print('üì¶ Step 1/4: Cloning Fish-Speech...')
os.chdir('/content')
!rm -rf fish-speech
!git clone https://github.com/fishaudio/fish-speech.git
os.chdir('/content/fish-speech')

# Install dependencies
print('\nüì¶ Step 2/4: Installing dependencies...')
!pip install -q -e .

# Download model
print('\nüì• Step 3/4: Downloading OpenAudio S1-mini model (~2GB)...')
!pip install -q huggingface_hub[cli]
!huggingface-cli download fishaudio/openaudio-s1-mini --local-dir checkpoints/openaudio-s1-mini

# Clone UVG MAX
print('\nüì¶ Step 4/4: Cloning UVG MAX...')
os.chdir('/content')
!rm -rf uvg-max-engine
!git clone https://github.com/Shya-Bubu/uvg-max-engine.git

# Install UVG deps
!pip install -q python-dotenv requests tqdm Pillow numpy

# Set API keys
os.environ['PEXELS_KEY'] = '7QAyreSxu44EAJYIRfYXstHpvj1tof7v3Baj8tv8fvO4DV6l16I2FzlY'
os.environ['PIXABAY_KEY'] = '53451868-98a1100fbad21a5918d9610dd'

print('\n‚úÖ SETUP COMPLETE!')
print('\nüëâ Now run Cell 2 to generate TTS audio.')

In [None]:
#@title üéôÔ∏è Cell 2: Generate TTS Audio with Fish-Speech
#@markdown Generates audio for all scenes using OpenAudio S1-mini.

import os
import sys
os.chdir('/content/fish-speech')
sys.path.insert(0, '/content/fish-speech')

# Your script scenes
SCENES = [
    "In a world of endless distractions, focus is your superpower.",
    "Every great achievement started with a single moment of clarity.",
    "Small steps every day lead to massive results.",
    "Today, choose focus. Tomorrow, celebrate success."
]

# Create output directory
os.makedirs('/content/tts_audio', exist_ok=True)

print('üéôÔ∏è Generating TTS audio with Fish-Speech...')
print(f'   Scenes: {len(SCENES)}')

for i, text in enumerate(SCENES):
    print(f'\nüìù Scene {i+1}: {text[:40]}...')
    
    # Generate semantic tokens
    !python fish_speech/models/text2semantic/inference.py \
        --text "{text}" \
        --checkpoint-path checkpoints/openaudio-s1-mini \
        --output-path /content/tts_audio/scene_{i+1}_tokens.npy
    
    # Convert to audio
    !python fish_speech/models/dac/inference.py \
        -i /content/tts_audio/scene_{i+1}_tokens.npy \
        --checkpoint-path checkpoints/openaudio-s1-mini/codec.pth \
        -o /content/tts_audio/scene_{i+1}.wav
    
    print(f'   ‚úÖ Saved: /content/tts_audio/scene_{i+1}.wav')

# Free GPU memory
import gc
import torch
gc.collect()
torch.cuda.empty_cache()

print('\n‚úÖ All TTS audio generated!')
print('\nüëâ Run Cell 3 to generate the video.')

In [None]:
#@title üß™ Cell 2b: Test TTS (Quick test with one line)

import os
os.chdir('/content/fish-speech')

TEST_TEXT = "Hello world! This is a test of Fish Speech."

print('üéôÔ∏è Testing Fish-Speech TTS...')

# Generate
!python fish_speech/models/text2semantic/inference.py \
    --text "{TEST_TEXT}" \
    --checkpoint-path checkpoints/openaudio-s1-mini \
    --output-path /content/test_tokens.npy

!python fish_speech/models/dac/inference.py \
    -i /content/test_tokens.npy \
    --checkpoint-path checkpoints/openaudio-s1-mini/codec.pth \
    -o /content/test_audio.wav

# Play
from IPython.display import Audio
display(Audio('/content/test_audio.wav'))

In [None]:
#@title üé¨ Cell 3: Generate Video (Uses Pre-generated Audio)

import os
import sys
import glob

os.chdir('/content/uvg-max-engine')
sys.path.insert(0, '/content/uvg-max-engine')

# Check TTS audio exists
tts_files = sorted(glob.glob('/content/tts_audio/scene_*.wav'))
if not tts_files:
    print('‚ùå No TTS audio found! Run Cell 2 first.')
else:
    print(f'Found {len(tts_files)} TTS audio files')
    
    # Video script with pre-generated audio paths
    SCRIPT = {
        "version": "2.1",
        "video_meta": {
            "title": "The Power of Focus",
            "orientation": "portrait",
            "resolution": {"width": 1080, "height": 1920}
        },
        "scenes": [
            {"scene_id": 1, "text": "In a world of distractions...", "search_keywords": "meditation peaceful", "audio_path": "/content/tts_audio/scene_1.wav"},
            {"scene_id": 2, "text": "Every great achievement...", "search_keywords": "mountain sunrise", "audio_path": "/content/tts_audio/scene_2.wav"},
            {"scene_id": 3, "text": "Small steps lead to results...", "search_keywords": "running fitness", "audio_path": "/content/tts_audio/scene_3.wav"},
            {"scene_id": 4, "text": "Choose focus, celebrate success.", "search_keywords": "celebration happy", "audio_path": "/content/tts_audio/scene_4.wav"}
        ]
    }
    
    from uvg_core.uvg_pipeline import run_from_json
    
    print('üé¨ Generating video...')
    result = run_from_json(SCRIPT)
    
    if result.success:
        print(f'\n‚úÖ Video: {result.output_path}')
    else:
        print(f'\n‚ùå Failed: {result.errors}')

In [None]:
#@title üì∫ Cell 4: Play Video

from IPython.display import Video
import glob, os
os.chdir('/content/uvg-max-engine')

videos = glob.glob('uvg_output/**/*.mp4', recursive=True)
if videos:
    latest = max(videos, key=os.path.getctime)
    print(f'üé¨ {latest}')
    display(Video(latest, embed=True, width=400))
else:
    print('‚ùå No video found.')

In [None]:
#@title üì• Cell 5: Download

from google.colab import files
import glob, os
os.chdir('/content/uvg-max-engine')

for f in glob.glob('uvg_output/final/*.mp4'):
    print(f'üì• {f}')
    files.download(f)