Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
17 changes: 16 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -65,7 +65,22 @@ stackvox completion bash > ~/.stackvox-completion.bash
echo 'source ~/.stackvox-completion.bash' >> ~/.bashrc
```

Daemon mode (keeps the model resident so each subsequent call is instant):
## Configuration

stackvox reads per-user defaults from a TOML file, so you don't need to repeat `--voice bf_emma --speed 1.1` on every invocation. Set values in `~/.config/stackvox/config.toml` (or `$XDG_CONFIG_HOME/stackvox/config.toml`, or wherever `STACKVOX_CONFIG` points):

```toml
[defaults]
voice = "bf_emma"
speed = 1.1
lang = "en-gb"
```

CLI flags always win over config values, and config values always win over the built-in defaults. A missing file is fine — built-ins apply. A malformed file logs a warning and is ignored.

## Daemon mode

Keeps the model resident so each subsequent call is instant:

```bash
stackvox serve # foreground; run with `nohup stackvox serve &` to background
Expand Down
6 changes: 4 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ dependencies = [
"soundfile>=0.12.1",
"sounddevice>=0.4.6",
"numpy>=1.24",
"tomli>=2.0;python_version<'3.11'",
]

[project.optional-dependencies]
Expand Down Expand Up @@ -109,8 +110,9 @@ no_implicit_optional = true
disallow_untyped_defs = true

[[tool.mypy.overrides]]
# Third-party deps without inline type stubs.
module = ["kokoro_onnx", "sounddevice", "soundfile"]
# Third-party deps without inline type stubs (or, for `tomli`, a 3.10-only
# fallback that's not installed on the typechecker's Python).
module = ["kokoro_onnx", "sounddevice", "soundfile", "tomli"]
ignore_missing_imports = true

[[tool.mypy.overrides]]
Expand Down
24 changes: 13 additions & 11 deletions stackvox/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,8 +9,8 @@

import soundfile as sf

from stackvox import daemon
from stackvox.engine import DEFAULT_LANG, DEFAULT_SPEED, DEFAULT_VOICE, Stackvox
from stackvox import config, daemon
from stackvox.engine import Stackvox


def _configure_logging() -> None:
Expand Down Expand Up @@ -103,26 +103,28 @@ def _configure_logging() -> None:
]


def _build_parser() -> argparse.ArgumentParser:
def _build_parser(defaults: config.Defaults | None = None) -> argparse.ArgumentParser:
if defaults is None:
defaults = config.Defaults()
parser = argparse.ArgumentParser(prog="stackvox", description="Kokoro-82M TTS")
sub = parser.add_subparsers(dest="cmd")

p_speak = sub.add_parser("speak", help="Synthesize and play in-process (loads model each run)")
_add_voice_args(p_speak)
_add_voice_args(p_speak, defaults)
p_speak.add_argument("text", nargs="?")
p_speak.add_argument("--file", type=Path)
p_speak.add_argument("--out", type=Path, help="Write wav instead of playing")

p_say = sub.add_parser("say", help="Send text to daemon (fast; fails if daemon not running)")
_add_voice_args(p_say)
_add_voice_args(p_say, defaults)
p_say.add_argument("text", nargs="?")
p_say.add_argument("--file", type=Path)
p_say.add_argument(
"--fallback-say", action="store_true", help="Shell out to macOS `say` if daemon unreachable"
)

p_serve = sub.add_parser("serve", help="Run the daemon in the foreground")
_add_voice_args(p_serve)
_add_voice_args(p_serve, defaults)

sub.add_parser("stop", help="Stop the running daemon")
sub.add_parser("status", help="Print daemon status")
Expand All @@ -146,10 +148,10 @@ def _build_parser() -> argparse.ArgumentParser:
return parser


def _add_voice_args(parser: argparse.ArgumentParser) -> None:
parser.add_argument("--voice", default=DEFAULT_VOICE)
parser.add_argument("--speed", type=float, default=DEFAULT_SPEED)
parser.add_argument("--lang", default=DEFAULT_LANG)
def _add_voice_args(parser: argparse.ArgumentParser, defaults: config.Defaults) -> None:
parser.add_argument("--voice", default=defaults.voice)
parser.add_argument("--speed", type=float, default=defaults.speed)
parser.add_argument("--lang", default=defaults.lang)


def _read_text(args: argparse.Namespace) -> str | None:
Expand Down Expand Up @@ -297,7 +299,7 @@ def main() -> int:
elif not argv and not sys.stdin.isatty():
argv = ["speak"]

parser = _build_parser()
parser = _build_parser(config.load_defaults())
args = parser.parse_args(argv)

if not args.cmd:
Expand Down
80 changes: 80 additions & 0 deletions stackvox/config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
"""User config file: per-user defaults for voice / speed / lang.

Lives at `$XDG_CONFIG_HOME/stackvox/config.toml` (falling back to
`~/.config/stackvox/config.toml`), or wherever `STACKVOX_CONFIG` points if set.
A missing file is fine — defaults from `stackvox.engine` apply. A malformed
file logs a warning and is otherwise ignored.

File format::

[defaults]
voice = "bf_emma"
speed = 1.1
lang = "en-gb"
"""

from __future__ import annotations

import logging
import os
import sys
from dataclasses import dataclass
from pathlib import Path

if sys.version_info >= (3, 11):
import tomllib
else: # pragma: no cover - covered by 3.10 CI
import tomli as tomllib

from stackvox.engine import DEFAULT_LANG, DEFAULT_SPEED, DEFAULT_VOICE

logger = logging.getLogger(__name__)


@dataclass(frozen=True)
class Defaults:
"""Resolved default values for synthesis parameters."""

voice: str = DEFAULT_VOICE
speed: float = DEFAULT_SPEED
lang: str = DEFAULT_LANG


def config_path() -> Path:
"""Resolve where the config file lives.

Honours `STACKVOX_CONFIG` first; otherwise XDG (`$XDG_CONFIG_HOME` →
`~/.config`).
"""
override = os.environ.get("STACKVOX_CONFIG")
if override:
return Path(override).expanduser()
xdg = os.environ.get("XDG_CONFIG_HOME")
base = Path(xdg).expanduser() if xdg else Path.home() / ".config"
return base / "stackvox" / "config.toml"


def load_defaults(path: Path | None = None) -> Defaults:
"""Read the config file and return resolved defaults.

Missing file → built-in defaults. Malformed file → warning logged,
built-in defaults used. Per-key fallback so a config that only sets
`voice` keeps the built-in `speed` and `lang`.
"""
p = path or config_path()
if not p.is_file():
return Defaults()
try:
data = tomllib.loads(p.read_text(encoding="utf-8"))
except (OSError, tomllib.TOMLDecodeError) as exc:
logger.warning("ignoring malformed stackvox config at %s: %s", p, exc)
return Defaults()
section = data.get("defaults", {})
if not isinstance(section, dict):
logger.warning("config %s: [defaults] must be a table; ignoring", p)
return Defaults()
return Defaults(
voice=str(section.get("voice", DEFAULT_VOICE)),
speed=float(section.get("speed", DEFAULT_SPEED)),
lang=str(section.get("lang", DEFAULT_LANG)),
)
110 changes: 110 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,110 @@
"""Config loader tests — pure file/env logic, no engine touched."""

from __future__ import annotations

import logging

from stackvox import config
from stackvox.engine import DEFAULT_LANG, DEFAULT_SPEED, DEFAULT_VOICE


class TestConfigPath:
def test_stackvox_config_env_takes_priority(self, monkeypatch, tmp_path):
monkeypatch.setenv("STACKVOX_CONFIG", str(tmp_path / "elsewhere.toml"))
assert config.config_path() == tmp_path / "elsewhere.toml"

def test_xdg_config_home_when_set(self, monkeypatch, tmp_path):
monkeypatch.delenv("STACKVOX_CONFIG", raising=False)
monkeypatch.setenv("XDG_CONFIG_HOME", str(tmp_path / "xdg"))
assert config.config_path() == tmp_path / "xdg" / "stackvox" / "config.toml"

def test_falls_back_to_home_dotconfig(self, monkeypatch):
monkeypatch.delenv("STACKVOX_CONFIG", raising=False)
monkeypatch.delenv("XDG_CONFIG_HOME", raising=False)
from pathlib import Path

assert config.config_path() == Path.home() / ".config" / "stackvox" / "config.toml"


class TestLoadDefaults:
def test_missing_file_returns_built_in_defaults(self, tmp_path):
actual = config.load_defaults(tmp_path / "absent.toml")
assert actual == config.Defaults()
assert actual.voice == DEFAULT_VOICE
assert actual.speed == DEFAULT_SPEED
assert actual.lang == DEFAULT_LANG

def test_full_config_overrides_all_three(self, tmp_path):
path = tmp_path / "config.toml"
path.write_text('[defaults]\nvoice = "bf_emma"\nspeed = 1.25\nlang = "en-gb"\n', encoding="utf-8")
actual = config.load_defaults(path)
assert actual.voice == "bf_emma"
assert actual.speed == 1.25
assert actual.lang == "en-gb"

def test_partial_config_keeps_built_in_for_missing_keys(self, tmp_path):
path = tmp_path / "config.toml"
path.write_text('[defaults]\nvoice = "bf_emma"\n', encoding="utf-8")
actual = config.load_defaults(path)
assert actual.voice == "bf_emma"
# speed and lang come from the engine defaults.
assert actual.speed == DEFAULT_SPEED
assert actual.lang == DEFAULT_LANG

def test_empty_file_returns_built_in_defaults(self, tmp_path):
path = tmp_path / "config.toml"
path.write_text("", encoding="utf-8")
assert config.load_defaults(path) == config.Defaults()

def test_malformed_toml_logs_warning_and_returns_defaults(self, tmp_path, caplog):
path = tmp_path / "config.toml"
path.write_text("this is not valid = toml\n[defaults\nvoice =", encoding="utf-8")
with caplog.at_level(logging.WARNING, logger="stackvox.config"):
actual = config.load_defaults(path)
assert actual == config.Defaults()
assert any("malformed stackvox config" in r.message for r in caplog.records)

def test_defaults_section_must_be_a_table(self, tmp_path, caplog):
"""`defaults = "string"` is parseable TOML but the wrong shape — log and ignore."""
path = tmp_path / "config.toml"
path.write_text('defaults = "not-a-table"\n', encoding="utf-8")
with caplog.at_level(logging.WARNING, logger="stackvox.config"):
actual = config.load_defaults(path)
assert actual == config.Defaults()
assert any("must be a table" in r.message for r in caplog.records)


class TestCLIPicksUpConfig:
"""Smoke test: argparse defaults reflect the user's config file."""

def test_voice_default_comes_from_config(self, mocker, monkeypatch, tmp_path):
path = tmp_path / "config.toml"
path.write_text('[defaults]\nvoice = "bf_emma"\nspeed = 1.3\n', encoding="utf-8")
monkeypatch.setenv("STACKVOX_CONFIG", str(path))

from stackvox import cli

speak = mocker.patch.object(cli, "_cmd_speak", return_value=0)
mocker.patch.object(cli.sys, "argv", ["stackvox", "speak", "hello"])
mocker.patch.object(cli.sys.stdin, "isatty", return_value=True)

assert cli.main() == 0
args = speak.call_args.args[0]
assert args.voice == "bf_emma"
assert args.speed == 1.3
# Lang wasn't in config; should fall through to built-in default.
assert args.lang == DEFAULT_LANG

def test_explicit_flag_overrides_config(self, mocker, monkeypatch, tmp_path):
path = tmp_path / "config.toml"
path.write_text('[defaults]\nvoice = "bf_emma"\n', encoding="utf-8")
monkeypatch.setenv("STACKVOX_CONFIG", str(path))

from stackvox import cli

speak = mocker.patch.object(cli, "_cmd_speak", return_value=0)
mocker.patch.object(cli.sys, "argv", ["stackvox", "speak", "--voice", "af_sarah", "hello"])
mocker.patch.object(cli.sys.stdin, "isatty", return_value=True)

assert cli.main() == 0
assert speak.call_args.args[0].voice == "af_sarah"