# VSCode Colab-Kernel Runner: CosyVoice WebSocket

This notebook is for VSCode using a Colab kernel. It avoids hardcoded `/content/tts_ws` paths and works when the repo root is either the current directory or a cloned checkout.

Notes:
- For exact reproducibility, set `GIT_REF` to a commit SHA.
- Uncommitted local laptop changes are not visible to the remote kernel.


In [None]:
import os
import pathlib
import textwrap

cwd = pathlib.Path.cwd()
auto_repo_root = None
if (cwd / '.git').exists() and (cwd / 'colab_t4_cosyvoice.sh').exists():
    auto_repo_root = str(cwd)
elif (cwd / 'tts_ws' / 'colab_t4_cosyvoice.sh').exists():
    auto_repo_root = str(cwd / 'tts_ws')

REPO_URL = os.environ.get('REPO_URL', 'https://github.com/Aryan-Seth/sigiq_task.git')
GIT_REF = os.environ.get('GIT_REF', 'main')
WORK_ROOT = os.environ.get('WORK_ROOT', '/content' if os.path.isdir('/content') else '/tmp/colab_ws')
REPO_PARENT = os.environ.get('REPO_PARENT', f'{WORK_ROOT}/sigiq_takehome')
REPO_ROOT = os.environ.get('REPO_ROOT', auto_repo_root or REPO_PARENT)
COSYVOICE_REPO_DIR = os.environ.get('COSYVOICE_REPO_DIR', f'{WORK_ROOT}/CosyVoice')
COSYVOICE_MODEL_DIR = os.environ.get('COSYVOICE_MODEL_DIR', 'FunAudioLLM/CosyVoice2-0.5B')
COSYVOICE_MODE = os.environ.get('COSYVOICE_MODE', 'sft')
PORT = os.environ.get('PORT', '8000')
HOST = os.environ.get('HOST', '0.0.0.0')

os.environ.update({
    'REPO_URL': REPO_URL,
    'GIT_REF': GIT_REF,
    'WORK_ROOT': WORK_ROOT,
    'REPO_PARENT': REPO_PARENT,
    'REPO_ROOT': REPO_ROOT,
    'COSYVOICE_REPO_DIR': COSYVOICE_REPO_DIR,
    'COSYVOICE_MODEL_DIR': COSYVOICE_MODEL_DIR,
    'COSYVOICE_MODE': COSYVOICE_MODE,
    'PORT': PORT,
    'HOST': HOST,
})

print(textwrap.dedent(f'''
cwd={cwd}
auto_repo_root={auto_repo_root}
REPO_URL={REPO_URL}
GIT_REF={GIT_REF}
WORK_ROOT={WORK_ROOT}
REPO_PARENT={REPO_PARENT}
REPO_ROOT={REPO_ROOT}
COSYVOICE_REPO_DIR={COSYVOICE_REPO_DIR}
COSYVOICE_MODEL_DIR={COSYVOICE_MODEL_DIR}
COSYVOICE_MODE={COSYVOICE_MODE}
PORT={PORT}
'''))


In [None]:
%%bash
set -euxo pipefail

REPO_URL="${REPO_URL:-https://github.com/Aryan-Seth/sigiq_task.git}"
GIT_REF="${GIT_REF:-main}"
WORK_ROOT="${WORK_ROOT:-/content}"
[[ -d "$WORK_ROOT" ]] || WORK_ROOT="/tmp/colab_ws"
mkdir -p "$WORK_ROOT"
REPO_PARENT="${REPO_PARENT:-$WORK_ROOT/sigiq_takehome}"
REPO_ROOT="${REPO_ROOT:-$REPO_PARENT}"

nvidia-smi || true

if [[ -d "$REPO_ROOT/.git" ]]; then
  echo "Using existing git repo at REPO_ROOT=$REPO_ROOT"
  cd "$REPO_ROOT"
  git fetch --all --prune || true
  git checkout "$GIT_REF" || true
elif [[ -d "$REPO_PARENT/.git" ]]; then
  echo "Using existing git repo at REPO_PARENT=$REPO_PARENT"
  cd "$REPO_PARENT"
  git fetch --all --prune
  git checkout "$GIT_REF"
else
  echo "Cloning $REPO_URL -> $REPO_PARENT"
  rm -rf "$REPO_PARENT"
  git clone "$REPO_URL" "$REPO_PARENT"
  cd "$REPO_PARENT"
  git checkout "$GIT_REF"
fi

if [[ ! -d "$REPO_ROOT" ]]; then
  if [[ -d "$REPO_PARENT" ]]; then
    REPO_ROOT="$REPO_PARENT"
  elif [[ -d "$REPO_PARENT/tts_ws" ]]; then
    REPO_ROOT="$REPO_PARENT/tts_ws"
  else
    echo "Could not find repo root at REPO_ROOT=$REPO_ROOT, $REPO_PARENT, or $REPO_PARENT/tts_ws" >&2
    exit 1
  fi
fi

echo "Repo HEAD: $(git -C "$REPO_ROOT" rev-parse --short HEAD)"
echo "Repo root: $REPO_ROOT"
ls -la "$REPO_ROOT" | head -n 60


In [None]:
%%bash
set -euxo pipefail

WORK_ROOT="${WORK_ROOT:-/content}"
[[ -d "$WORK_ROOT" ]] || WORK_ROOT="/tmp/colab_ws"
REPO_ROOT="${REPO_ROOT:-$WORK_ROOT/sigiq_takehome}"
if [[ ! -d "$REPO_ROOT" ]]; then
  REPO_ROOT="$WORK_ROOT/sigiq_takehome/tts_ws"
fi
COSYVOICE_REPO_DIR="${COSYVOICE_REPO_DIR:-$WORK_ROOT/CosyVoice}"

cd "$REPO_ROOT"
WORK_ROOT="$WORK_ROOT" REPO_ROOT="$REPO_ROOT" COSYVOICE_REPO_DIR="$COSYVOICE_REPO_DIR" bash colab_t4_cosyvoice.sh install


In [None]:
%%bash
set -euxo pipefail
pkill -f "uvicorn app.server:app" >/dev/null 2>&1 || true

WORK_ROOT="${WORK_ROOT:-/content}"
[[ -d "$WORK_ROOT" ]] || WORK_ROOT="/tmp/colab_ws"
REPO_ROOT="${REPO_ROOT:-$WORK_ROOT/sigiq_takehome}"
if [[ ! -d "$REPO_ROOT" ]]; then
  REPO_ROOT="$WORK_ROOT/sigiq_takehome/tts_ws"
fi
COSYVOICE_REPO_DIR="${COSYVOICE_REPO_DIR:-$WORK_ROOT/CosyVoice}"
COSYVOICE_MODEL_DIR="${COSYVOICE_MODEL_DIR:-FunAudioLLM/CosyVoice2-0.5B}"
COSYVOICE_MODE="${COSYVOICE_MODE:-sft}"
PORT="${PORT:-8000}"
HOST="${HOST:-0.0.0.0}"

cd "$REPO_ROOT"
WORK_ROOT="$WORK_ROOT" REPO_ROOT="$REPO_ROOT" COSYVOICE_REPO_DIR="$COSYVOICE_REPO_DIR" COSYVOICE_MODEL_DIR="$COSYVOICE_MODEL_DIR" COSYVOICE_MODE="$COSYVOICE_MODE" PORT="$PORT" HOST="$HOST" TTS_PROFILE=1 bash colab_t4_cosyvoice.sh start


In [None]:
import asyncio
import base64
import json
import os
import wave

import websockets

PORT = int(os.environ.get('PORT', '8000'))
WORK_ROOT = os.environ.get('WORK_ROOT', '/tmp')
OUT_WAV = os.path.join(WORK_ROOT, 'cosy_smoke.wav')

async def run_smoke() -> None:
    pcm_parts = []
    metrics = None
    async with websockets.connect(f'ws://127.0.0.1:{PORT}/tts', max_size=None) as ws:
        await ws.send(json.dumps({'text': ' ', 'flush': False, 'run_id': 'colab-smoke'}))
        await ws.send(json.dumps({'text': 'Hello from VSCode Colab-kernel CosyVoice websocket.', 'flush': False, 'run_id': 'colab-smoke'}))
        await ws.send(json.dumps({'text': '', 'flush': True, 'run_id': 'colab-smoke'}))
        await ws.send(json.dumps({'text': '', 'flush': False, 'run_id': 'colab-smoke'}))
        try:
            while True:
                msg = await ws.recv()
                payload = json.loads(msg if isinstance(msg, str) else msg.decode('utf-8'))
                if payload.get('type') == 'metrics':
                    metrics = payload.get('metrics')
                    continue
                b64 = str(payload.get('audio', ''))
                if b64:
                    pcm_parts.append(base64.b64decode(b64))
        except websockets.ConnectionClosed:
            pass

    pcm = b''.join(pcm_parts)
    os.makedirs(os.path.dirname(OUT_WAV), exist_ok=True)
    with wave.open(OUT_WAV, 'wb') as wf:
        wf.setnchannels(1)
        wf.setsampwidth(2)
        wf.setframerate(44100)
        wf.writeframes(pcm)

    print('audio bytes:', len(pcm))
    print('wrote:', OUT_WAV)
    print('server metrics:', metrics)

asyncio.run(run_smoke())


In [None]:
%%bash
set -euxo pipefail

WORK_ROOT="${WORK_ROOT:-/content}"
[[ -d "$WORK_ROOT" ]] || WORK_ROOT="/tmp/colab_ws"
REPO_ROOT="${REPO_ROOT:-$WORK_ROOT/sigiq_takehome}"
if [[ ! -d "$REPO_ROOT" ]]; then
  REPO_ROOT="$WORK_ROOT/sigiq_takehome/tts_ws"
fi
PORT="${PORT:-8000}"

cd "$REPO_ROOT"
python benchmark_ttft.py \
  --uri "ws://127.0.0.1:${PORT}/tts" \
  --backend cosyvoice \
  --no-start-server \
  --lengths 80,160,320 \
  --runs-per-length 2 \
  --chunk-mode ramp \
  --chunk-plan 4,8,32 \
  --delay 0.01 \
  --math-normalizer rule \
  --json-out "${WORK_ROOT}/ttft_cosyvoice_colab.json"

echo "Wrote ${WORK_ROOT}/ttft_cosyvoice_colab.json"


In [None]:
# Optional: public URL for UI testing
import os
import pathlib
import re
import subprocess
import time

port = int(os.environ.get('PORT', '8000'))
work_root = pathlib.Path(os.environ.get('WORK_ROOT', '/tmp'))
work_root.mkdir(parents=True, exist_ok=True)
cf = pathlib.Path('/usr/local/bin/cloudflared')
log = work_root / 'cloudflared.log'

if not cf.exists():
    subprocess.run(['wget', '-q', 'https://github.com/cloudflare/cloudflared/releases/latest/download/cloudflared-linux-amd64', '-O', str(cf)], check=True)
    subprocess.run(['chmod', '+x', str(cf)], check=True)

subprocess.run(['pkill', '-f', 'cloudflared tunnel'], check=False)
with log.open('w') as f:
    _ = subprocess.Popen([str(cf), 'tunnel', '--url', f'http://127.0.0.1:{port}'], stdout=f, stderr=subprocess.STDOUT)

url = None
for _ in range(40):
    time.sleep(0.5)
    txt = log.read_text(errors='ignore') if log.exists() else ''
    m = re.search(r'https://[-a-z0-9]+\.trycloudflare\.com', txt)
    if m:
        url = m.group(0)
        break

print('Cloudflare URL:', url)
print('If None, rerun this cell and inspect', log)
