## Cell 1 ‚Äî Mount Drive & create folders (run first)

In [1]:
from google.colab import drive
drive.mount('/content/drive', force_remount=False)

import os
BASE = "/content/drive/MyDrive/hear2see"   # <-- edit if you want another Drive path
IN_AUDIO_DIR = os.path.join(BASE, "input_audio")
OUT_VIDEO_DIR = os.path.join(BASE, "output_videos")
FRAMES_DIR = os.path.join(BASE, "frames")
META_FILE = os.path.join(BASE, "metadata.jsonl")
LOG_DIR = os.path.join(BASE, "logs")

for p in [BASE, IN_AUDIO_DIR, OUT_VIDEO_DIR, FRAMES_DIR, LOG_DIR]:
    os.makedirs(p, exist_ok=True)

print("BASE:", BASE)
print("Input audio dir:", IN_AUDIO_DIR)
print("Output video dir:", OUT_VIDEO_DIR)
print("Frames dir:", FRAMES_DIR)
print("Metadata file:", META_FILE)
print("Logs:", LOG_DIR)

Mounted at /content/drive
BASE: /content/drive/MyDrive/hear2see
Input audio dir: /content/drive/MyDrive/hear2see/input_audio
Output video dir: /content/drive/MyDrive/hear2see/output_videos
Frames dir: /content/drive/MyDrive/hear2see/frames
Metadata file: /content/drive/MyDrive/hear2see/metadata.jsonl
Logs: /content/drive/MyDrive/hear2see/logs


## Cell 2 ‚Äî Install required packages (run once)

In [2]:
# Minimal installs: vosk + gradio + ffmpeg (ffmpeg via apt)
!pip install -q vosk==0.3.48 gradio==4.44.1 >/dev/null
!apt-get -qq update && apt-get -qq install -y ffmpeg >/dev/null
print("‚úÖ Installed vosk + gradio + ffmpeg")

[31mERROR: Could not find a version that satisfies the requirement vosk==0.3.48 (from versions: 0.3.18, 0.3.21, 0.3.22, 0.3.27, 0.3.28, 0.3.29, 0.3.30, 0.3.31, 0.3.32, 0.3.37, 0.3.38, 0.3.40, 0.3.41, 0.3.42, 0.3.43, 0.3.44, 0.3.45)[0m[31m
[0m[31mERROR: No matching distribution found for vosk==0.3.48[0m[31m
[0mW: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)
‚úÖ Installed vosk + gradio + ffmpeg


## Cell 3 ‚Äî Download official Vosk small model (v0.15)

In [3]:
import os
VOSK_SMALL_FOLDER = "/content/vosk-model-small-en-us-0.15"
if not os.path.isdir(VOSK_SMALL_FOLDER):
    print("Downloading Vosk small model (0.15, ~50MB)...")
    !wget -q -O /content/vosk-small.zip https://alphacephei.com/vosk/models/vosk-model-small-en-us-0.15.zip
    !unzip -q /content/vosk-small.zip -d /content
    !rm -f /content/vosk-small.zip
    if os.path.isdir(VOSK_SMALL_FOLDER):
        print("‚úÖ Vosk small downloaded to", VOSK_SMALL_FOLDER)
    else:
        raise RuntimeError("Vosk model download failed.")
else:
    print("Vosk small already present:", VOSK_SMALL_FOLDER)

Downloading Vosk small model (0.15, ~50MB)...
‚úÖ Vosk small downloaded to /content/vosk-model-small-en-us-0.15


## Cell 4 ‚Äî Load Vosk model (official small)

In [4]:
! pip install vosk

Collecting vosk
  Downloading vosk-0.3.45-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl.metadata (1.8 kB)
Collecting srt (from vosk)
  Downloading srt-3.5.3.tar.gz (28 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Downloading vosk-0.3.45-py3-none-manylinux_2_12_x86_64.manylinux2010_x86_64.whl (7.2 MB)
[2K   [90m‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ[0m [32m7.2/7.2 MB[0m [31m48.9 MB/s[0m eta [36m0:00:00[0m
[?25hBuilding wheels for collected packages: srt
  Building wheel for srt (setup.py) ... [?25l[?25hdone
  Created wheel for srt: filename=srt-3.5.3-py3-none-any.whl size=22427 sha256=eb2a4a5b73303c4006ac7c5076f8ae0d5ab47d6cb912c21942206e8d503815fd
  Stored in directory: /root/.cache/pip/wheels/7e/75/5b/e1d5c3756631e4bda806f6cc9640153b39484bb6f7b0b8def3
Successfully built srt
Installing collected packages: srt, vosk
Successfully installed srt-3.5.3 vosk-0.3.45


In [5]:
import traceback, shutil, time, os
from vosk import Model, KaldiRecognizer

# We'll use the official small model downloaded in Cell 3
VOSK_LOCAL = "/content/vosk-model-small-en-us-0.15"

if not os.path.isdir(VOSK_LOCAL):
    raise FileNotFoundError(f"Vosk model folder not found: {VOSK_LOCAL}. Run Cell 3 to download it.")

try:
    vosk_model = Model(VOSK_LOCAL)
    print("‚úÖ Loaded official Vosk small model from", VOSK_LOCAL)
except Exception:
    print("‚ùå Failed to load official small model. Traceback:")
    traceback.print_exc()
    raise RuntimeError("Vosk model not loaded. Check files or rerun Cell 3.")

‚úÖ Loaded official Vosk small model from /content/vosk-model-small-en-us-0.15


## Cell 5 ‚Äî OPTIONAL: Load Stable Diffusion pipeline (only if you want to generate images here)

In [6]:
# This cell is optional and heavy. Only run if you have a fused SD model folder and GPU.
MODEL_DIR = "/content/drive/MyDrive/hear2see/hear2see_v1"  # <-- set to your fused SD folder path if you want in-notebook generation
pipe = None
if MODEL_DIR and os.path.exists(MODEL_DIR):
    print("Attempting to install SD libs and load pipeline (this will take a while). You must have GPU/ram.")
    !pip install -q diffusers==0.19.4 transformers accelerate safetensors >/dev/null
    import torch, traceback
    from diffusers import StableDiffusionPipeline
    device = "cuda" if torch.cuda.is_available() else "cpu"
    dtype = torch.float16 if device == "cuda" else torch.float32
    try:
        pipe = StableDiffusionPipeline.from_pretrained(MODEL_DIR, torch_dtype=dtype, safety_checker=None)
        pipe = pipe.to(device)
        try:
            pipe.enable_attention_slicing()
            pipe.enable_vae_slicing()
        except Exception:
            pass
        print("‚úÖ SD pipeline loaded on", device)
    except Exception:
        print("‚ùå Failed to load pipeline. Traceback:")
        traceback.print_exc()
        pipe = None
else:
    print("MODEL_DIR not set or not found. SD pipeline not loaded. If you want generation here, set MODEL_DIR and rerun this cell.")

Attempting to install SD libs and load pipeline (this will take a while). You must have GPU/ram.
[31mERROR: Could not find a version that satisfies the requirement diffusers==0.19.4 (from versions: 0.0.1, 0.0.2, 0.0.3, 0.0.4, 0.1.0, 0.1.1, 0.1.2, 0.1.3, 0.2.0, 0.2.1, 0.2.2, 0.2.3, 0.2.4, 0.3.0, 0.4.0, 0.4.1, 0.4.2, 0.5.0, 0.5.1, 0.6.0, 0.7.0, 0.7.1, 0.7.2, 0.8.0, 0.8.1, 0.9.0, 0.10.0, 0.10.1, 0.10.2, 0.11.0, 0.11.1, 0.12.0, 0.12.1, 0.13.0, 0.13.1, 0.14.0, 0.15.0, 0.15.1, 0.16.0, 0.16.1, 0.17.0, 0.17.1, 0.18.0, 0.18.1, 0.18.2, 0.19.0, 0.19.1, 0.19.2, 0.19.3, 0.20.0, 0.20.1, 0.20.2, 0.21.0, 0.21.1, 0.21.2, 0.21.3, 0.21.4, 0.22.0, 0.22.1, 0.22.2, 0.22.3, 0.23.0, 0.23.1, 0.24.0, 0.25.0, 0.25.1, 0.26.0, 0.26.1, 0.26.2, 0.26.3, 0.27.0, 0.27.1, 0.27.2, 0.28.0, 0.28.1, 0.28.2, 0.29.0, 0.29.1, 0.29.2, 0.30.0, 0.30.1, 0.30.2, 0.30.3, 0.31.0, 0.32.0, 0.32.1, 0.32.2, 0.33.0, 0.33.1, 0.34.0, 0.35.0, 0.35.1, 0.35.2)[0m[31m
[0m[31mERROR: No matching distribution found for diffusers==0.19.4[0m[

Loading pipeline components...:   0%|          | 0/6 [00:00<?, ?it/s]

`torch_dtype` is deprecated! Use `dtype` instead!
You have disabled the safety checker for <class 'diffusers.pipelines.stable_diffusion.pipeline_stable_diffusion.StableDiffusionPipeline'> by passing `safety_checker=None`. Ensure that you abide to the conditions of the Stable Diffusion license and do not expose unfiltered results in services or applications open to the public. Both the diffusers team and Hugging Face strongly recommend to keep the safety filter enabled in all public facing circumstances, disabling it only for use-cases that involve analyzing network behavior or auditing its results. For more information, please have a look at https://github.com/huggingface/diffusers/pull/254 .


‚úÖ SD pipeline loaded on cuda


## Cell 6 ‚Äî Helpers: save audio, transcribe, build prompt, render & save video

In [7]:
import uuid, datetime, tempfile, subprocess, json, math, shutil
from pathlib import Path
import wave, os
import torch

def save_uploaded_audio(src_filepath: str) -> str:
    if src_filepath is None:
        raise ValueError("No audio file provided")
    ext = Path(src_filepath).suffix.lower() or ".wav"
    ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
    uid = uuid.uuid4().hex[:8]
    dest_name = f"audio_{ts}_{uid}{ext}"
    dest_path = os.path.join(IN_AUDIO_DIR, dest_name)
    shutil.copy2(src_filepath, dest_path)
    return dest_path

from vosk import KaldiRecognizer

def transcribe_vosk(audio_path: str, save_audio_flag=True) -> str:
    if 'vosk_model' not in globals() or vosk_model is None:
        raise RuntimeError("vosk_model not loaded. Run the Vosk load cell (Cell 4)." )
    working = audio_path
    if save_audio_flag:
        try:
            working = save_uploaded_audio(audio_path)
        except Exception:
            working = audio_path
    wav16 = working + ".vosk_16k.wav"
    subprocess.run(['ffmpeg','-y','-i',working,'-ar','16000','-ac','1',wav16], stdout=subprocess.PIPE, stderr=subprocess.PIPE)
    wf = wave.open(wav16,'rb')
    rec = KaldiRecognizer(vosk_model, wf.getframerate())
    rec.SetWords(True)
    fragments=[]
    while True:
        data = wf.readframes(4000)
        if len(data)==0:
            break
        if rec.AcceptWaveform(data):
            j = json.loads(rec.Result())
            fragments.append(j.get('text',''))
    j = json.loads(rec.FinalResult())
    fragments.append(j.get('text',''))
    transcript = ' '.join([t for t in fragments if t]).strip()
    try:
        wf.close()
        if os.path.exists(wav16):
            os.remove(wav16)
    except Exception:
        pass
    return transcript or "(no speech)"

def build_prompt(transcript: str, style: str='cinematic') -> str:
    base = (transcript or 'ambient scene').strip()
    styles = {
        'cinematic': 'cinematic lighting, depth of field, high detail, film grain',
        'realistic': 'photorealistic, natural colors, detailed textures',
        'anime': 'anime style, clean lines, vibrant colors, detailed background'
    }
    extras = styles.get(style, styles['cinematic'])
    return f"{base}, {extras}"

def make_unique_name(prefix='hear2see', ext='.mp4'):
    ts = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')
    uid = uuid.uuid4().hex[:8]
    return f"{prefix}_{ts}_{uid}{ext}"

def render_and_save(pipe_obj, prompt, saved_audio_path,
                    steps=20, frames=16, fps=6, cfg=7.0,
                    neg='text, watermark, logo, lowres, blurry, artifacts',
                    seed=1234, slow_mode=False, keep_frames=False):
    if pipe_obj is None:
        raise RuntimeError('Pipeline not loaded ‚Äî cannot render. Set MODEL_DIR and load pipeline (Cell 5) or run generation elsewhere.')
    video_name = make_unique_name(prefix='hear2see', ext='.mp4')
    video_path = os.path.join(OUT_VIDEO_DIR, video_name)
    tmp = tempfile.mkdtemp(prefix='sd_frames_')
    try:
        actual_fps = max(1, math.floor(fps/2)) if slow_mode else fps
        for i in range(frames):
            try:
                gen = torch.Generator(device=pipe_obj.device).manual_seed(seed + i)
            except Exception:
                gen = torch.Generator().manual_seed(seed + i)
            out = pipe_obj(prompt, negative_prompt=neg, guidance_scale=cfg, num_inference_steps=steps, generator=gen)
            img = out.images[0]
            fname = os.path.join(tmp, f'frame_{i:03d}.png')
            img.save(fname)
        subprocess.run([
            'ffmpeg','-y','-hide_banner','-loglevel','error',
            '-framerate', str(actual_fps),
            '-i', os.path.join(tmp,'frame_%03d.png'),
            '-c:v','libx264','-pix_fmt','yuv420p','-crf','18', video_path
        ], check=True)
        if keep_frames:
            base = os.path.splitext(video_name)[0]
            dst = os.path.join(FRAMES_DIR, base)
            os.makedirs(dst, exist_ok=True)
            for f in sorted(os.listdir(tmp)):
                shutil.copy2(os.path.join(tmp,f), os.path.join(dst,f))
    finally:
        shutil.rmtree(tmp, ignore_errors=True)

    meta = {
        'timestamp': datetime.datetime.utcnow().isoformat() + 'Z',
        'audio_path': saved_audio_path,
        'video_path': video_path,
        'prompt': prompt,
        'steps': steps,
        'frames': frames,
        'fps': fps,
        'cfg': cfg,
        'seed': seed,
        'slow_mode': bool(slow_mode)
    }
    with open(META_FILE, 'a', encoding='utf-8') as mf:
        mf.write(json.dumps(meta, ensure_ascii=False) + '\n')
    return video_path, meta

## Cell 7 ‚Äî Build Gradio UI (Transcribe -> Edit -> Generate; Slow Mode included)

In [8]:
import gradio as gr, os

def transcribe_only_ui(audio_file, style):
    if audio_file is None:
        return "(no audio)", ""
    try:
        saved = save_uploaded_audio(audio_file)
        transcript = transcribe_vosk(saved, save_audio_flag=False)  # already saved
        suggested = build_prompt(transcript, style)
        return transcript, suggested
    except Exception as e:
        return f"(transcribe error: {e})", ""

def generate_from_prompt_ui(prompt_text, steps, frames, fps, cfg, seed, slow_mode, audio_file):
    if not prompt_text or not prompt_text.strip():
        return None
    saved_audio = ""
    try:
        if audio_file:
            saved_audio = save_uploaded_audio(audio_file)
    except Exception:
        saved_audio = ""
    try:
        video_path, meta = render_and_save(pipe, prompt_text.strip(), saved_audio,
                                          steps=int(steps), frames=int(frames), fps=int(fps),
                                          cfg=float(cfg), seed=int(seed), slow_mode=bool(slow_mode),
                                          keep_frames=False)
        print("Saved video:", video_path)
        return video_path
    except Exception as e:
        return f"(generation error: {e})"

with gr.Blocks(title='Hear2See ‚Äî Transcribe, Edit Prompt, Generate Video') as demo:
    gr.Markdown("## Hear2See ‚Äî Transcribe audio ‚Üí Edit prompt ‚Üí Generate video")
    with gr.Row():
        with gr.Column(scale=1):
            audio = gr.Audio(sources=["microphone","upload"], type="filepath", label="üéôÔ∏è Speak or upload audio")
            style = gr.Radio(["cinematic","realistic","anime"], value="cinematic", label="Prompt style")
            trans_btn = gr.Button("üìù Transcribe", variant="secondary")
        with gr.Column(scale=1):
            transcript_o = gr.Textbox(label="Transcript (from audio)", interactive=False, lines=6)
            prompt_i = gr.Textbox(label="Editable Prompt", interactive=True, lines=6,
                                  placeholder="Click Transcribe to auto-fill, or type your own‚Ä¶")
        with gr.Column(scale=1):
            with gr.Accordion("Advanced generation settings", open=False):
                steps = gr.Slider(10, 50, value=26, step=1, label="Steps")
                frames = gr.Slider(8, 48, value=30, step=1, label="Frames")
                fps = gr.Slider(2, 24, value=6, step=1, label="FPS (lower = slower)")
                cfg = gr.Slider(4.0, 12.0, value=7.0, step=0.5, label="CFG Scale")
                seed = gr.Slider(1, 9999, value=1234, step=1, label="Seed")
                slow_mode = gr.Checkbox(value=True, label="üéûÔ∏è Slow mode (half-speed)")
            gen_btn = gr.Button("üé¨ Generate Video", variant="primary")
            video_o = gr.Video(label="Output video")

    trans_btn.click(fn=transcribe_only_ui, inputs=[audio, style], outputs=[transcript_o, prompt_i])
    gen_btn.click(fn=generate_from_prompt_ui, inputs=[prompt_i, steps, frames, fps, cfg, seed, slow_mode, audio], outputs=[video_o])

print("‚úÖ UI defined. Run the next cell (Cell 8) to launch the app.")

‚úÖ UI defined. Run the next cell (Cell 8) to launch the app.


## Cell 8 ‚Äî Robust Gradio launcher (run last)

In [None]:
import os, socket, traceback, gradio as gr
from packaging import version

if 'demo' not in globals():
    raise RuntimeError("UI variable `demo` not defined. Run the UI cell (Cell 7) first.")

def find_free_port(start=7860, end=7880):
    for p in range(start, end+1):
        try:
            s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
            s.bind(('0.0.0.0', p)
)
            s.close()
            return p
        except OSError:
            continue
    return None

port = find_free_port(7860, 7880) or 7860
os.environ['GRADIO_SERVER_PORT'] = str(port)
print("Using GRADIO_SERVER_PORT =", port)
print("Detected gradio:", gr.__version__)

# queue compatibility
try:
    demo.queue(max_size=8)
except Exception:
    try:
        demo.queue()
    except Exception:
        pass

launch_kwargs = dict(share=True, debug=True, prevent_thread_lock=True)
try:
    import google.colab
    is_colab = True
except Exception:
    is_colab = False
if is_colab:
    launch_kwargs.update(inline=False, inbrowser=False, server_name="0.0.0.0", server_port=port)

print("Launching Gradio with kwargs:", launch_kwargs)
try:
    demo.launch(**launch_kwargs)
except Exception:
    print("‚ùå Exception launching Gradio ‚Äî full traceback:")
    traceback.print_exc()
    print("Hint: if port issues occur, restart runtime and re-run cells in order.")

Using GRADIO_SERVER_PORT = 7860
Detected gradio: 5.49.1
Launching Gradio with kwargs: {'share': True, 'debug': True, 'prevent_thread_lock': True, 'inline': False, 'inbrowser': False, 'server_name': '0.0.0.0', 'server_port': 7860}
Colab notebook detected. This cell will run indefinitely so that you can see errors and logs. To turn off, set debug=False in launch().
* Running on public URL: https://32181ed242007a311a.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)


  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
  ts = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')


  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  'timestamp': datetime.datetime.utcnow().isoformat() + 'Z',


Saved video: /content/drive/MyDrive/hear2see/output_videos/hear2see_20251114T090931Z_363d7f69.mp4


  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
  ts = datetime.datetime.utcnow().strftime("%Y%m%dT%H%M%SZ")
  ts = datetime.datetime.utcnow().strftime('%Y%m%dT%H%M%SZ')


  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  0%|          | 0/26 [00:00<?, ?it/s]

  'timestamp': datetime.datetime.utcnow().isoformat() + 'Z',


Saved video: /content/drive/MyDrive/hear2see/output_videos/hear2see_20251114T092955Z_bac859f4.mp4
