<a href="https://colab.research.google.com/github/PiratedVirus/Mad-Lab/blob/master/video_call.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# GPU optional
!nvidia-smi -L || echo "No NVIDIA GPU detected."

# System deps
!apt -yq update >/dev/null
!apt -yq install -y ffmpeg >/dev/null

# Python deps (no streamlit-webrtc / no aiortc / no av pin)
!pip -q install --upgrade pip wheel setuptools
!pip -q install streamlit==1.36.0 streamlit-mic-recorder==0.0.4 pycloudflared==0.2.0
!pip -q install faster-whisper==1.0.3 transformers==4.42.4 sentencepiece==0.2.0
!pip -q install soundfile==0.12.1 numpy==1.26.4

/bin/bash: line 1: nvidia-smi: command not found
No NVIDIA GPU detected.


W: Skipping acquire of configured file 'main/source/Sources' as repository 'https://r2u.stat.illinois.edu/ubuntu jammy InRelease' does not seem to provide it (sources.list entry misspelt?)




In [None]:
import os, pathlib, subprocess, sys, shutil

BIN_DIR = pathlib.Path("/content/piper_bin")
BIN_DIR.mkdir(parents=True, exist_ok=True)

# Clean any broken partials
for p in ["/content/piper.tgz", "/content/piper_linux_x86_64.tar.gz", "/content/piper_1.2.0_linux_x86_64.tar.gz"]:
    if os.path.exists(p):
        os.remove(p)

# Try multiple known asset names across versions
CANDIDATES = [
    "https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_1.2.0_linux_x86_64.tar.gz",
    "https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_linux_x86_64.tar.gz",
    "https://github.com/rhasspy/piper/releases/download/v1.1.0/piper_linux_x86_64.tar.gz",
    "https://github.com/rhasspy/piper/releases/download/v1.0.0/piper_linux_x86_64.tar.gz",
]

ok = False
for url in CANDIDATES:
    print("Trying:", url)
    rc = os.system(f"wget -q -O /content/piper.tgz -L --tries=3 --timeout=20 '{url}'")
    if rc != 0:
        continue
    # Validate tarball before extracting
    rc = os.system("tar -tzf /content/piper.tgz >/dev/null 2>&1")
    if rc == 0:
        ok = True
        break

if not ok:
    print("⚠️ Could not download a valid Piper tarball. You can still use espeak-ng fallback.")
else:
    # Extract and move binary
    os.system("tar -xzf /content/piper.tgz -C /content")
    # Find the 'piper' binary wherever it extracted
    found = None
    for root, dirs, files in os.walk("/content"):
        if "piper" in files:
            candidate = os.path.join(root, "piper")
            if os.path.isfile(candidate):
                found = candidate
                break
    if found:
        shutil.move(found, BIN_DIR / "piper")
        os.chmod(BIN_DIR / "piper", 0o755)
        print("✅ Piper binary ready at:", BIN_DIR / "piper")
    else:
        print("⚠️ Piper binary not found after extraction. Will rely on fallback.")

# Voice (US English Lessac - high quality)
VOICE_DIR = pathlib.Path("/content/models/piper")
VOICE_DIR.mkdir(parents=True, exist_ok=True)
if not (VOICE_DIR/"en_US-lessac-high.onnx").exists():
    os.system("wget -q -O /content/models/piper/en_US-lessac-high.onnx https://github.com/rhasspy/piper/releases/download/v1.2.0/en_US-lessac-high.onnx")
    os.system("wget -q -O /content/models/piper/en_US-lessac-high.onnx.json https://github.com/rhasspy/piper/releases/download/v1.2.0/en_US-lessac-high.onnx.json")

print("Voice exists:", (VOICE_DIR/"en_US-lessac-high.onnx").exists())

Trying: https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_1.2.0_linux_x86_64.tar.gz
Trying: https://github.com/rhasspy/piper/releases/download/v1.2.0/piper_linux_x86_64.tar.gz
Trying: https://github.com/rhasspy/piper/releases/download/v1.1.0/piper_linux_x86_64.tar.gz
Trying: https://github.com/rhasspy/piper/releases/download/v1.0.0/piper_linux_x86_64.tar.gz
⚠️ Could not download a valid Piper tarball. You can still use espeak-ng fallback.
Voice exists: True


In [None]:
!apt -yq install espeak-ng >/dev/null
!espeak-ng --version | head -n 1



eSpeak NG text-to-speech: 1.50  Data at: /usr/lib/x86_64-linux-gnu/espeak-ng-data


In [None]:
%%writefile /content/app.py
import os, io, tempfile, subprocess
from typing import Optional, Tuple

import numpy as np
import soundfile as sf
import streamlit as st
from streamlit_mic_recorder import mic_recorder
from faster_whisper import WhisperModel

st.set_page_config(page_title="Realtime Any→English Translator (POC)", layout="wide")
st.title("🌍 Realtime Any→English Translator (POC)")
st.caption("No templates • Whisper for LID+Translate • TTS: Piper/espeak-ng • No storage")

# Runtime (tune via env or defaults below)
WHISPER_MODEL_NAME = os.getenv("WHISPER_MODEL", "base")     # "tiny" | "base" | "small"
WHISPER_COMPUTE    = os.getenv("WHISPER_COMPUTE", "int8")   # "int8" (CPU) | "float16" (GPU)

PIPER_BIN  = "/content/piper_bin/piper"                     # optional binary
PIPER_ONNX = "/content/models/piper/en_US-lessac-high.onnx"
PIPER_JSON = "/content/models/piper/en_US-lessac-high.onnx.json"

@st.cache_resource(show_spinner=True)
def load_whisper():
    return WhisperModel(WHISPER_MODEL_NAME, compute_type=WHISPER_COMPUTE)

whisper = load_whisper()

def espeak_tts_to_wav_bytes(text: str) -> bytes:
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f_out:
        out_wav = f_out.name
    try:
        cmd = ["espeak-ng","-v","en-us","-s","170","-w",out_wav,text]
        subprocess.run(cmd, check=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        with open(out_wav, "rb") as f: return f.read()
    finally:
        try: os.remove(out_wav)
        except FileNotFoundError: pass

def piper_tts_to_wav_bytes(text: str) -> Optional[bytes]:
    if not os.path.exists(PIPER_BIN): return None
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f_out:
        out_wav = f_out.name
    try:
        cmd = [PIPER_BIN,"--model",PIPER_ONNX,"--config",PIPER_JSON,"--output_file",out_wav]
        subprocess.run(cmd, input=text.encode("utf-8"), check=True,
                       stdout=subprocess.PIPE, stderr=subprocess.PIPE)
        with open(out_wav, "rb") as f: return f.read()
    finally:
        try: os.remove(out_wav)
        except FileNotFoundError: pass

def whisper_once(wav_path: str, task: str, language: Optional[str]=None):
    """
    task: "transcribe" (same language) or "translate" (to English).
    language=None => Whisper auto-detects (no hard-coded lists).
    Returns (text, detected_lang, lang_prob, avg_logprob)
    """
    segs, info = whisper.transcribe(
        wav_path,
        language=language,
        task=task,
        vad_filter=True,
        vad_parameters={"min_silence_duration_ms": 200},
        condition_on_previous_text=False,
        temperature=0.0,
        beam_size=5,
        best_of=5,
        compression_ratio_threshold=2.4,
        log_prob_threshold=-1.0,
        no_speech_threshold=0.6
    )
    text = " ".join([s.text.strip() for s in segs]).strip()
    lid  = getattr(info, "language", None)
    lidp = float(getattr(info, "language_probability", 0.0) or 0.0)
    lp   = [getattr(s, "avg_logprob", None) for s in segs]
    lp   = float(np.mean([x for x in lp if x is not None])) if lp and any(t is not None for t in lp) else -99.0
    return text, lid, lidp, lp

# UI
st.markdown("**Step 1 — Record a short clip (~3–4s helps for language detection):**")
audio = mic_recorder(start_prompt="Start recording", stop_prompt="Stop", just_once=False)

if audio and "bytes" in audio:
    # Save the recorded bytes as a temp WAV for Whisper
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
        f.write(audio["bytes"])
        wav_path = f.name

    # 1) Transcribe in original language (auto-detect)
    orig_text, lid, lid_prob, lp = whisper_once(wav_path, task="transcribe", language=None)

    # 2) Translate directly to English (auto)
    en_text, _, _, _ = whisper_once(wav_path, task="translate", language=None)

    os.remove(wav_path)

    st.markdown(f"**Detected Language:** `{lid or 'unknown'}`  • confidence: `{lid_prob:.2f}`")
    st.markdown("**Original Text (auto):**")
    st.code(orig_text or "—")

    st.markdown("**English Translation (Whisper):**")
    st.code(en_text or "—")

    if en_text and st.button("Speak English"):
        audio_bytes = piper_tts_to_wav_bytes(en_text) or espeak_tts_to_wav_bytes(en_text)
        st.audio(audio_bytes, format="audio/wav")
else:
    st.info("Click **Start recording**, speak, then click **Stop**.")

Overwriting /content/app.py


In [None]:
import os, threading, time
from pycloudflared import try_cloudflare

# Good defaults for CPU runtimes
os.environ["WHISPER_MODEL"] = "base"   # use "small" for better accuracy if you can tolerate slower
os.environ["WHISPER_COMPUTE"] = "int8"

def run_streamlit():
    os.system("streamlit run /content/app.py --server.port 8501 --server.headless true")

t = threading.Thread(target=run_streamlit, daemon=True); t.start()
time.sleep(3)
print("Public URL:", try_cloudflare(port=8501))

 * Running on https://kijiji-thumbzilla-november-gibraltar.trycloudflare.com
 * Traffic stats available on http://127.0.0.1:20241/metrics
Public URL: Urls(tunnel='https://kijiji-thumbzilla-november-gibraltar.trycloudflare.com', metrics='http://127.0.0.1:20241/metrics', process=<Popen: returncode: None args: ['/usr/local/lib/python3.12/dist-packages/pyc...>)
