Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 11 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,6 +41,17 @@ stackvox speak "Hi" --voice bf_emma # same, explicit subcommand
stackvox speak "save" --out a.wav # write wav instead of playing
stackvox welcome # multilingual welcome (6 languages)
stackvox voices # list all voice ids
echo "from a pipe" | stackvox # piped stdin works for speak/say
stackvox speak --file message.txt # read a whole file
```

Bash completion:

```bash
eval "$(stackvox completion bash)" # current shell
# or persist:
stackvox completion bash > ~/.stackvox-completion.bash
echo 'source ~/.stackvox-completion.bash' >> ~/.bashrc
```

Daemon mode (keeps the model resident so each subsequent call is instant):
Expand Down
80 changes: 78 additions & 2 deletions stackvox/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,58 @@ def _configure_logging() -> None:
)


SUBCOMMANDS = {"serve", "stop", "status", "say", "speak", "voices", "welcome"}
SUBCOMMANDS = {"serve", "stop", "status", "say", "speak", "voices", "welcome", "completion"}

_BASH_COMPLETION = r"""# stackvox bash completion. Install with one of:
# eval "$(stackvox completion bash)" # current shell
# stackvox completion bash > ~/.stackvox-completion.bash && \
# echo 'source ~/.stackvox-completion.bash' >> ~/.bashrc
_stackvox_completion() {
local cur prev subcommand
cur="${COMP_WORDS[COMP_CWORD]}"
prev="${COMP_WORDS[COMP_CWORD-1]}"
subcommand="${COMP_WORDS[1]:-}"

if [[ ${COMP_CWORD} -eq 1 ]]; then
COMPREPLY=( $(compgen -W "speak say serve stop status voices welcome completion" -- "$cur") )
return 0
fi

case "$prev" in
--file|--out)
COMPREPLY=( $(compgen -f -- "$cur") )
return 0
;;
--speed)
COMPREPLY=( $(compgen -W "0.8 0.9 1.0 1.1 1.2 1.5" -- "$cur") )
return 0
;;
--lang)
COMPREPLY=( $(compgen -W "en-us en-gb fr-fr it hi pt-br es ja zh" -- "$cur") )
return 0
;;
esac

case "$subcommand" in
speak)
COMPREPLY=( $(compgen -W "--voice --speed --lang --file --out --help" -- "$cur") )
;;
say)
COMPREPLY=( $(compgen -W "--voice --speed --lang --file --fallback-say --help" -- "$cur") )
;;
serve)
COMPREPLY=( $(compgen -W "--voice --speed --lang --help" -- "$cur") )
;;
completion)
COMPREPLY=( $(compgen -W "bash" -- "$cur") )
;;
*)
COMPREPLY=( $(compgen -W "--help" -- "$cur") )
;;
esac
}
complete -F _stackvox_completion stackvox
"""

WELCOME_LINES = [
("bf_emma", "en-gb", "Welcome to stackvox."),
Expand Down Expand Up @@ -59,6 +110,9 @@ def _build_parser() -> argparse.ArgumentParser:
sub.add_parser("voices", help="List available voices")
sub.add_parser("welcome", help="Play a multilingual welcome message")

p_completion = sub.add_parser("completion", help="Print a shell completion script")
p_completion.add_argument("shell", choices=["bash"], help="Shell to generate completion for")

return parser


Expand All @@ -69,11 +123,20 @@ def _add_voice_args(parser: argparse.ArgumentParser) -> None:


def _read_text(args: argparse.Namespace) -> str | None:
"""Resolve the text to speak from --file, the positional, or piped stdin.

Precedence: --file > positional text > stdin (when piped, not a TTY).
"""
file: Path | None = getattr(args, "file", None)
if file is not None:
return file.read_text(encoding="utf-8")
text: str | None = getattr(args, "text", None)
return text
if text is not None:
return text
if not sys.stdin.isatty():
piped = sys.stdin.read()
return piped if piped.strip() else None
return None


def _cmd_speak(args: argparse.Namespace) -> int:
Expand Down Expand Up @@ -149,12 +212,24 @@ def _cmd_welcome(_: argparse.Namespace) -> int:
return 0


def _cmd_completion(args: argparse.Namespace) -> int:
if args.shell == "bash":
print(_BASH_COMPLETION)
return 0
# argparse `choices=` should prevent reaching here; defensive only.
print(f"unsupported shell: {args.shell}", file=sys.stderr)
return 1


def main() -> int:
_configure_logging()
argv = sys.argv[1:]
# Back-compat: `stackvox "text"` with no subcommand → speak.
# Same shortcut for piped stdin: `echo hi | stackvox` → speak.
if argv and argv[0] not in SUBCOMMANDS and not argv[0].startswith("-"):
argv = ["speak", *argv]
elif not argv and not sys.stdin.isatty():
argv = ["speak"]

parser = _build_parser()
args = parser.parse_args(argv)
Expand All @@ -171,6 +246,7 @@ def main() -> int:
"status": _cmd_status,
"voices": _cmd_voices,
"welcome": _cmd_welcome,
"completion": _cmd_completion,
}
return handlers[args.cmd](args)

Expand Down
51 changes: 48 additions & 3 deletions stackvox/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@
from __future__ import annotations

import logging
import sys
import threading
import urllib.request
from pathlib import Path

Expand All @@ -22,15 +24,48 @@
_VOICES_URL = "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files-v1.0/voices-v1.0.bin"


def _download_with_progress(url: str, dest: Path) -> None:
"""Stream a URL to a file, printing percentage updates to stderr.

The ~340 MB Kokoro model takes long enough on first run that a silent
download looks like a hang; this gives users feedback without pulling
in tqdm. Falls back to a single line when the server doesn't report
Content-Length.
"""
last_pct = -1
label = dest.name

def hook(blocks: int, blocksize: int, totalsize: int) -> None:
nonlocal last_pct
if totalsize <= 0:
return
pct = min(100, int(blocks * blocksize * 100 / totalsize))
if pct != last_pct:
mb_total = totalsize / 1_000_000
print(
f"\r[stackvox] downloading {label} {pct:3d}% ({mb_total:.0f} MB)",
end="",
file=sys.stderr,
flush=True,
)
last_pct = pct

urllib.request.urlretrieve(url, dest, reporthook=hook)
if last_pct >= 0 and sys.stderr.isatty():
# Finish the carriage-returned line so subsequent output starts on
# its own line. On non-TTY (e.g. CI logs) stderr is line-buffered
# and the \r writes already land on separate lines.
print("", file=sys.stderr, flush=True)


def _ensure_models(cache_dir: Path) -> tuple[Path, Path]:
cache_dir.mkdir(parents=True, exist_ok=True)
model_path = cache_dir / "kokoro-v1.0.onnx"
voices_path = cache_dir / "voices-v1.0.bin"
for path, url in [(model_path, _MODEL_URL), (voices_path, _VOICES_URL)]:
if path.exists():
continue
logger.info("downloading %s...", path.name)
urllib.request.urlretrieve(url, path)
_download_with_progress(url, path)
return model_path, voices_path


Expand Down Expand Up @@ -134,12 +169,22 @@ def synth(line: dict) -> tuple[np.ndarray, int]:


_default: Stackvox | None = None
_default_lock = threading.Lock()


def _get_default() -> Stackvox:
"""Lazily build a module-level engine, safe under concurrent first calls.

Without the lock, two threads racing to call speak() / synthesize() at
process start could each instantiate Stackvox — meaning two 340 MB model
loads. Double-checked locking keeps the fast path lock-free once
initialised.
"""
global _default
if _default is None:
_default = Stackvox()
with _default_lock:
if _default is None:
_default = Stackvox()
return _default


Expand Down
50 changes: 50 additions & 0 deletions tests/test_cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,23 @@

from __future__ import annotations

import io

import pytest

from stackvox import cli


@pytest.fixture(autouse=True)
def _stdin_is_a_tty(mocker):
"""Default tests to a TTY stdin so the new piped-stdin shortcut is off.

Tests that exercise the piped path opt in by patching isatty to False
and providing a real stdin StringIO.
"""
mocker.patch.object(cli.sys.stdin, "isatty", return_value=True)


def test_bare_text_routes_to_speak(mocker):
speak = mocker.patch.object(cli, "_cmd_speak", return_value=0)
mocker.patch.object(cli.sys, "argv", ["stackvox", "hello world"])
Expand Down Expand Up @@ -39,3 +51,41 @@ def test_unknown_subcommand_treated_as_speak_text(mocker):
mocker.patch.object(cli.sys, "argv", ["stackvox", "hello"])
cli.main()
assert speak.call_args.args[0].text == "hello"


def test_piped_stdin_with_no_args_routes_to_speak(mocker):
"""`echo hi | stackvox` should default to speak."""
speak = mocker.patch.object(cli, "_cmd_speak", return_value=0)
mocker.patch.object(cli.sys, "argv", ["stackvox"])
mocker.patch.object(cli.sys.stdin, "isatty", return_value=False)
mocker.patch.object(cli.sys, "stdin", io.StringIO("hello from stdin\n"))
assert cli.main() == 0
assert speak.call_args.args[0].cmd == "speak"


def test_read_text_prefers_file_then_positional_then_stdin(mocker, tmp_path):
"""_read_text precedence: --file > positional > piped stdin."""
file_arg = tmp_path / "src.txt"
file_arg.write_text("from-file", encoding="utf-8")

# Both --file and positional present → file wins.
args = mocker.MagicMock(file=file_arg, text="from-positional")
assert cli._read_text(args) == "from-file"

# No --file, positional present → positional wins.
args = mocker.MagicMock(file=None, text="from-positional")
assert cli._read_text(args) == "from-positional"

# No --file, no positional, stdin piped → stdin wins.
mocker.patch.object(cli.sys.stdin, "isatty", return_value=False)
mocker.patch.object(cli.sys, "stdin", io.StringIO("from-stdin"))
args = mocker.MagicMock(file=None, text=None)
assert cli._read_text(args) == "from-stdin"


def test_completion_bash_emits_complete_script(mocker, capsys):
mocker.patch.object(cli.sys, "argv", ["stackvox", "completion", "bash"])
assert cli.main() == 0
captured = capsys.readouterr()
assert "_stackvox_completion()" in captured.out
assert "complete -F _stackvox_completion stackvox" in captured.out