In [None]:
import os
import re
import argparse
import random
from TTS.api import TTS
from pydub import AudioSegment

MODEL_NAME = "tts_models/en/vctk/vits"
VOICES = ["p225", "p227", "p229"]

def read_markdown_file(file_path: str) -> str:
    with open(file_path, "r", encoding="utf-8") as f:
        return f.read()

def split_into_chunks(text: str, max_chars: int = 500):
    sentences = re.split(r'(?<=[.!?])\s+', text)
    chunks = []
    buffer = []
    length = 0
    for s in sentences:
        if length + len(s) <= max_chars:
            buffer.append(s)
            length += len(s)
        else:
            chunks.append(" ".join(buffer))
            buffer = [s]
            length = len(s)
    if buffer:
        chunks.append(" ".join(buffer))
    return chunks

def clean_tags(text: str) -> str:
    return re.sub(r'\[/?voice[^\]]*\]', '', text).strip()

def synthesize_chunks(chunks, output_path: str):
    tts = TTS(MODEL_NAME)
    if hasattr(tts, "speakers") and tts.speakers:
        print("Available speakers:", tts.speakers)
    combined = AudioSegment.silent(duration=0)
    for i, chunk in enumerate(chunks, start=1):
        if not chunk.strip():
            continue
        clean_text = clean_tags(chunk)
        voice = random.choice(VOICES)
        print(f"Synthesizing chunk {i}/{len(chunks)} with {voice}")
        wav = tts.tts(text=clean_text, speaker=voice)
        segment = AudioSegment(
            wav.tobytes(),
            frame_rate=tts.synthesizer.output_sample_rate,
            sample_width=wav.dtype.itemsize,
            channels=1
        )
        combined += segment
    if len(combined) > 0:
        combined.export(output_path, format="wav")
        print(f"Audiobook created: {output_path}")
    else:
        print("No audio generated!")

if __name__ == "__main__":
    parser = argparse.ArgumentParser(description="Convert markdown to audiobook")
    parser.add_argument("input", help="Input markdown file")
    parser.add_argument("-o", "--output", default="audiobook.wav", help="Output WAV file")
    args = parser.parse_args()
    text = read_markdown_file(args.input)
    chunks = split_into_chunks(text)
    synthesize_chunks(chunks, args.output)
