# local_tts_v2 — Quick Test
Hexagonal architecture smoke test: normalisation → synthesis → file output.

In [1]:
import sys
from pathlib import Path

def find_repo_root(start: Path) -> Path:
    for candidate in [start.resolve()] + list(start.resolve().parents):
        if (candidate / 'src' / 'tts_v2').exists():
            return candidate
    raise RuntimeError('Could not locate local_tts_v2 repo root')

REPO_ROOT = find_repo_root(Path.cwd())
if str(REPO_ROOT / 'src') not in sys.path:
    sys.path.insert(0, str(REPO_ROOT / 'src'))

OUTPUT_DIR = REPO_ROOT / 'outputs'
OUTPUT_DIR.mkdir(parents=True, exist_ok=True)

print(f'Repo root : {REPO_ROOT}')
print(f'Output dir: {OUTPUT_DIR}')

Repo root : /Users/s748779/tts/local_tts_v2
Output dir: /Users/s748779/tts/local_tts_v2/outputs


In [2]:
import logging
logging.basicConfig(level=logging.INFO, format='%(name)s | %(levelname)s | %(message)s')

# Domain + service
from tts_v2.domain.audio import SynthesisRequest
from tts_v2.domain.voice import list_personas
from tts_v2.service.tts_service import TTSService

# Adapters
from tts_v2.adapters.synthesizer.coqui_adapter import CoquiSynthesizerAdapter
from tts_v2.adapters.normalizer.bfsi_normalizer_adapter import BFSINormalizerAdapter
from tts_v2.adapters.audio_sink.file_sink_adapter import FileSinkAdapter
from tts_v2.adapters.audit.file_audit_adapter import FileAuditAdapter

print('✓ Imports successful')
print(f'Available personas: {list_personas()}')

✓ Imports successful
Available personas: ['professional_male', 'friendly_female', 'neutral_male', 'professional_female']


## Normalisation Smoke Tests

In [3]:
from tts_v2.adapters.normalizer.bfsi_normalizer_adapter import BFSINormalizerAdapter
norm = BFSINormalizerAdapter()

tests = [
    ('abbreviation', 'Please complete your KYC verification with NAB.'),
    ('money',        'Your account balance is $1,234.50.'),
    ('otp',          'Your OTP is 482913. Do not share it.'),
]

for label, raw in tests:
    result = norm.normalize(raw)
    print(f'[{label}]')
    print(f'  IN : {raw}')
    print(f'  OUT: {result}')
    print()

tts_v2.adapters.normalizer.bfsi_normalizer_adapter | INFO | [normalize] input: 'Please complete your KYC verification with NAB.'
tts_v2.text_normalization.abbreviation_handler | INFO | expand_abbreviations: 'KYC' → 'KNOW YOUR CUSTOMER'
tts_v2.text_normalization.abbreviation_handler | INFO | expand_abbreviations: 'NAB' → 'NATIONAL AUSTRALIA BANK'
tts_v2.text_normalization.abbreviation_handler | INFO | expand_abbreviations: done (input 47 chars → output 82 chars)
tts_v2.text_normalization.number_formatter | INFO | expand_numbers_in_text: no numeric tokens found
tts_v2.adapters.normalizer.bfsi_normalizer_adapter | INFO | [normalize] output: 'Please complete your KNOW YOUR CUSTOMER verification with NATIONAL AUSTRALIA BANK.'
tts_v2.adapters.normalizer.bfsi_normalizer_adapter | INFO | [normalize] input: 'Your account balance is $1,234.50.'
tts_v2.text_normalization.abbreviation_handler | INFO | expand_abbreviations: done (input 34 chars → output 34 chars)
tts_v2.text_normalization.number_fo

[abbreviation]
  IN : Please complete your KYC verification with NAB.
  OUT: Please complete your KNOW YOUR CUSTOMER verification with NATIONAL AUSTRALIA BANK.

[money]
  IN : Your account balance is $1,234.50.
  OUT: Your account balance is one thousand, two hundred and thirty-four dollars and fifty cents.

[otp]
  IN : Your OTP is 482913. Do not share it.
  OUT: Your ONE TIME PASSWORD is four eight two nine one three. Do not share it.



## Wire TTSService and Load Model

In [4]:
service = TTSService(
    synthesizer=CoquiSynthesizerAdapter(use_gpu=False),
    normalizer=BFSINormalizerAdapter(),
    audio_sink=FileSinkAdapter(),
    audit=FileAuditAdapter(str(OUTPUT_DIR / 'audit.jsonl')),
)
print('✓ TTSService ready')

tts_v2.shared.device_utils | INFO | Device: GPU disabled by caller, using CPU
tts_v2.adapters.synthesizer.coqui_adapter | INFO | Loading Coqui model 'tts_models/en/vctk/vits' (cpu load → move to cpu)
TTS.utils.manage | INFO | tts_models/en/vctk/vits is already downloaded.
TTS.tts.models | INFO | Using model: vits
TTS.utils.audio.processor | INFO | Setting up Audio Processor...
TTS.utils.audio.processor | INFO |  | sample_rate: 22050
TTS.utils.audio.processor | INFO |  | resample: False
TTS.utils.audio.processor | INFO |  | num_mels: 80
TTS.utils.audio.processor | INFO |  | log_func: np.log10
TTS.utils.audio.processor | INFO |  | min_level_db: 0
TTS.utils.audio.processor | INFO |  | frame_shift_ms: None
TTS.utils.audio.processor | INFO |  | frame_length_ms: None
TTS.utils.audio.processor | INFO |  | ref_level_db: None
TTS.utils.audio.processor | INFO |  | fft_size: 1024
TTS.utils.audio.processor | INFO |  | power: None
TTS.utils.audio.processor | INFO |  | preemphasis: 0.0
TTS.utils.aud

✓ TTSService ready


## Synthesise BFSI Scenarios

In [5]:
scenarios = [
    {
        'name'   : 'balance_inquiry',
        'text'   : 'Your account balance is $1,234.50 as of today.',
        'persona': 'professional_female',
    },
    {
        'name'   : 'otp_delivery',
        'text'   : 'Your OTP is 482913. This code expires in 5 minutes.',
        'persona': 'neutral_male',
    },
    {
        'name'   : 'kyc_notice',
        'text'   : 'We require KYC verification to continue. Please contact NAB.',
        'persona': 'professional_male',
    },
]

results = []
for s in scenarios:
    out_path = str(OUTPUT_DIR / f"{s['name']}.wav")
    result = service.speak(
        SynthesisRequest(
            text=s['text'],
            persona=s['persona'],
            output_path=out_path,
        )
    )
    results.append((s['name'], result))
    status = '✓' if result.success else '✗'
    print(f'{status} {s["name"]} → {result.output_path}')

tts_v2.adapters.normalizer.bfsi_normalizer_adapter | INFO | [normalize] input: 'Your account balance is $1,234.50 as of today.'
tts_v2.text_normalization.abbreviation_handler | INFO | expand_abbreviations: done (input 46 chars → output 46 chars)
tts_v2.text_normalization.number_formatter | INFO | format_money: AUD 1234.5 → whole=1234, cents=50
tts_v2.text_normalization.number_formatter | INFO | expand_numbers_in_text: 'Your account balance is $1,234.50 as of today.' → 'Your account balance is one thousand, two hundred and thirty-four dollars and fifty cents as of today.'
tts_v2.adapters.normalizer.bfsi_normalizer_adapter | INFO | [normalize] output: 'Your account balance is one thousand, two hundred and thirty-four dollars and fifty cents as of today.'
tts_v2.service.tts_service | INFO | [speak] normalised: 'Your account balance is $1,234.50 as of today.' → 'Your account balance is one thousand, two hundred and thirty-four dollars and fifty cents as of today.'
tts_v2.service.tts_servic

✓ balance_inquiry → /Users/s748779/tts/local_tts_v2/outputs/balance_inquiry.wav


TTS.utils.synthesizer | INFO | Processing time: 1.485
TTS.utils.synthesizer | INFO | Real-time factor: 0.263
tts_v2.adapters.synthesizer.coqui_adapter | INFO | [synthesize] produced 5.64s AudioChunk
tts_v2.service.tts_service | INFO | [speak] synthesised 5.64s for 91 chars
tts_v2.shared.audio_utils | INFO | Saved WAV → /Users/s748779/tts/local_tts_v2/outputs/otp_delivery.wav (5.64s @ 22050Hz)
tts_v2.adapters.audit.file_audit_adapter | INFO | [audit] {'ts': 1771815306.411, 'persona': 'neutral_male', 'speaker_id': 'p227', 'text_raw': 'Your OTP is 482913. This code expires in 5 minutes.', 'text_len': 91, 'duration_s': 5.644, 'elapsed_s': 1.504, 'rtf': 0.2665, 'output_path': '/Users/s748779/tts/local_tts_v2/outputs/otp_delivery.wav', 'metadata': {}}
tts_v2.service.tts_service | INFO | [speak] done | duration=5.64s | RTF=0.267
tts_v2.adapters.normalizer.bfsi_normalizer_adapter | INFO | [normalize] input: 'We require KYC verification to continue. Please contact NAB.'
tts_v2.text_normalizatio

✓ otp_delivery → /Users/s748779/tts/local_tts_v2/outputs/otp_delivery.wav


TTS.utils.synthesizer | INFO | Processing time: 1.902
TTS.utils.synthesizer | INFO | Real-time factor: 0.274
tts_v2.adapters.synthesizer.coqui_adapter | INFO | [synthesize] produced 6.93s AudioChunk
tts_v2.service.tts_service | INFO | [speak] synthesised 6.93s for 95 chars
tts_v2.shared.audio_utils | INFO | Saved WAV → /Users/s748779/tts/local_tts_v2/outputs/kyc_notice.wav (6.93s @ 22050Hz)
tts_v2.adapters.audit.file_audit_adapter | INFO | [audit] {'ts': 1771815308.359, 'persona': 'professional_male', 'speaker_id': 'p225', 'text_raw': 'We require KYC verification to continue. Please contact NAB.', 'text_len': 95, 'duration_s': 6.933, 'elapsed_s': 1.946, 'rtf': 0.2807, 'output_path': '/Users/s748779/tts/local_tts_v2/outputs/kyc_notice.wav', 'metadata': {}}
tts_v2.service.tts_service | INFO | [speak] done | duration=6.93s | RTF=0.281


✓ kyc_notice → /Users/s748779/tts/local_tts_v2/outputs/kyc_notice.wav


## Verify Output Files

In [6]:
import IPython.display as ipd

for name, result in results:
    p = Path(result.output_path)
    size_kb = p.stat().st_size / 1024 if p.exists() else 0
    print(f'{name}: {size_kb:.1f} KB — {"exists ✓" if p.exists() else "MISSING ✗"}')
    if p.exists():
        display(ipd.Audio(str(p)))

balance_inquiry: 225.1 KB — exists ✓


otp_delivery: 243.1 KB — exists ✓


kyc_notice: 298.6 KB — exists ✓
