# TTS v4
- Trying to implement sentence tracking

1) Install

In [1]:
# Core TTS + I/O deps
!pip -q install "kokoro>=0.9.4" soundfile misaki[en] pypdf ebooklib pydub

# --- NEW: Install libraries for advanced PDF extraction ---
# 1) unstructured.io and its layout-detection model deps
!pip -q install "unstructured[local-inference]"
!pip -q install "detectron2@git+https://github.com/facebookresearch/detectron2.git@v0.6#egg=detectron2"

# 2) Nougat (transformers) and its PDF-to-image deps
!pip -q install "transformers[torch]" "nougat-ocr"
!apt-get -yqq install poppler-utils
!pip -q install pdf2image
# --- End of new installs ---

# MP3 encoder (pydub uses ffmpeg)
!apt-get -yqq install ffmpeg

# (Optional) Silence overly chatty logs
import logging
logging.getLogger("phonemizer").setLevel(logging.ERROR)
logging.getLogger("unstructured").setLevel(logging.ERROR)
logging.getLogger("transformers").setLevel(logging.ERROR)
logging.getLogger("pypdf").setLevel(logging.CRITICAL)


[?25l     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/48.3 kB[0m [31m?[0m eta [36m-:--:--[0m[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m48.3/48.3 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
[?25h  Preparing metadata (setup.py) ... [?25l[?25hdone
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m323.9/323.9 kB[0m [31m16.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m41.0/41.0 kB[0m [31m3.2 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m3.6/3.6 MB[0m [31m70.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m10.1/10.1 MB[0m [31m90.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.6/61.6 kB[0m [31m5.1 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m163.5/163.5 kB[0m [31m16.6 MB/s[0m eta [36m0:00:

2) Config + delivery helper (download vs Drive) + device selection

In [2]:
# --- Toggle: False = download to device; True = save into Google Drive ---
SAVE_TO_DRIVE = False
DRIVE_DIR = "/content/drive/MyDrive/TTS/kokoro_outputs"  # used only if SAVE_TO_DRIVE=True

if SAVE_TO_DRIVE:
    from google.colab import drive
    drive.mount('/content/drive')
    import os
    os.makedirs(DRIVE_DIR, exist_ok=True)

import os, io, re, zipfile, time
from pathlib import Path
from google.colab import files

def deliver(path: str):
    """Save to Drive if enabled; otherwise trigger a browser download."""
    if SAVE_TO_DRIVE:
        dest = f"{DRIVE_DIR}/{Path(path).name}"
        os.replace(path, dest)
        print("Saved to Drive:", dest)
    else:
        files.download(path)

# --- Device selection ---
# DEVICE_MODE: "auto" (default), "cuda", or "cpu"
DEVICE_MODE = "auto"

import torch
def _pick_device():
    if DEVICE_MODE == "cuda":
        return "cuda"
    if DEVICE_MODE == "cpu":
        return "cpu"
    return "cuda" if torch.cuda.is_available() else "cpu"

DEVICE = _pick_device()
print("Using device:", DEVICE)


Using device: cuda


3) Helpers (PDF/EPUB extract & cleanup, + sentence-timed synthesis)

In [3]:
# 3) Helpers (PDF/EPUB extract & cleanup, synth primitives) — SIMPLIFIED location tracking

import numpy as np
import soundfile as sf
import re, io, zipfile, torch
from pathlib import Path
from typing import List, Tuple, Dict, Union
from functools import lru_cache

from pypdf import PdfReader
from ebooklib import epub
from kokoro import KPipeline
from pydub import AudioSegment

# --- Imports for advanced PDF extraction ---
from unstructured.partition.auto import partition
from transformers import pipeline as hf_pipeline
from pdf2image import convert_from_bytes

# Sentence-ish split; keeps chunks small (avoids 510-phoneme truncation)
SPLIT_PATTERN = r"[.?!]\s+|[\n]{2,}"
# Same pattern, but CAPTURED, so we can keep punctuation/newlines on the sentence.
SPLIT_PATTERN_CAP = r"([.?!]\s+|[\n]{2,})"


# --- PDF Extraction Method 1: Legacy (pypdf, page-by-page) ---
def _extract_text_pdf_legacy(file_like: io.BytesIO) -> List[Dict]:
    print("Using legacy (pypdf) text extraction (per-page).")
    reader = PdfReader(file_like)
    elements = []
    for i, p in enumerate(reader.pages):
        try:
            page_text = p.extract_text() or ""
        except Exception:
            page_text = ""

        elements.append({
            "text": page_text,
            "metadata": {
                "page_number": i + 1,
                "points": None # Legacy doesn't provide points
            }
        })
    return elements

# --- PDF Extraction Method 2: unstructured.io (Simplified) ---
def _extract_text_pdf_unstructured(file_like: io.BytesIO) -> List[Dict]:
    print("Unstructured: Parsing PDF with layout analysis (strategy='hi_res')...")
    try:
        partitioned_elements = partition(file=file_like, strategy="hi_res", content_type="application/pdf", include_page_breaks=True)
        print(f"Unstructured 'hi_res' returned {len(partitioned_elements)} raw elements.")
    except Exception as e:
        print(f"Unstructured 'hi_res' strategy failed: {e}. Falling back to 'fast'.")
        try:
            file_like.seek(0)
            partitioned_elements = partition(file=file_like, strategy="fast", content_type="application/pdf", include_page_breaks=True)
            print(f"Unstructured 'fast' returned {len(partitioned_elements)} raw elements.")
        except Exception as e2:
            print(f"Unstructured 'fast' strategy also failed: {e2}.")
            return [{"text": "Error: Unstructured parsing failed.", "metadata": {"page_number": 1, "points": None}}]

    element_list = []
    current_page = 1
    print("\n--- Processing elements (checking for points) ---")

    for i, el in enumerate(partitioned_elements):
        meta_dict = el.metadata.to_dict()

        page_num_meta = meta_dict.get("page_number")
        if page_num_meta is not None:
             current_page = page_num_meta

        # --- *** SIMPLIFIED METADATA EXTRACTION *** ---
        points = None
        coords_meta = meta_dict.get("coordinates")
        if coords_meta:
            points = coords_meta.get("points") # Just get the points
            # if points is None:
            #     print(f"Warning: Element {i} had coordinates metadata but no 'points'.")
        # else:
            # print(f"Warning: Element {i} missing coordinates metadata.")


        location_data = {
            "page_number": current_page,
            "points": points # Store only points and page number
        }
        # --- *** END SIMPLIFICATION *** ---

        element_text = str(el).strip()
        if element_text:
            element_list.append({
                "text": element_text,
                "metadata": location_data
            })

    print("--- Finished processing elements ---")
    print(f"Unstructured: Found {len(element_list)} text elements.")
    if not element_list:
         return [{"text": "Warning: Unstructured found no text elements.", "metadata": {"page_number": 1, "points": None}}]
    return element_list


# --- PDF Extraction Method 3: Nougat (Hugging Face model) ---
@lru_cache(maxsize=1)
def get_nougat_pipeline(device=DEVICE):
    # ... (remains unchanged) ...
    print(f"Loading Nougat model to {device}... (this may take a moment on first run)")
    if device == "cuda" and torch.cuda.is_available():
        return hf_pipeline("image-to-text", model="facebook/nougat-base", device=0)
    else:
        return hf_pipeline("image-to-text", model="facebook/nougat-base")

def _extract_text_pdf_nougat(file_like: io.BytesIO, device=DEVICE) -> List[Dict]:
    # ... (remains unchanged) ...
    pipe = get_nougat_pipeline(device=device)
    print("Nougat: Converting PDF to images...")
    pdf_bytes = file_like.read()
    try: images = convert_from_bytes(pdf_bytes)
    except Exception as e:
        print(f"Nougat: pdf2image conversion failed: {e}. Cannot process.")
        return [{"text": "Error: Could not convert PDF.", "metadata": {"page_number": 1, "points": None}}]
    all_elements = []
    print(f"Nougat: Processing {len(images)} pages with model...")
    for i, page_img in enumerate(images):
        try:
            result = pipe(page_img); page_text = result[0]['generated_text']
            all_elements.append({"text": page_text, "metadata": {"page_number": i + 1, "points": None}})
        except Exception as e:
            print(f"Nougat: Error on page {i+1}: {e}")
            all_elements.append({"text": f"\n[Error processing page {i+1}]\n", "metadata": {"page_number": i+1, "points": None}})
    print("Nougat: Processing complete.")
    return all_elements

# --- Main PDF Extraction Dispatcher ---
def extract_text_from_pdf(file_like: io.BytesIO, method: str, device=DEVICE) -> List[Dict]:
    # ... (remains unchanged) ...
    if method == "nougat":
        try: return _extract_text_pdf_nougat(file_like, device=device)
        except Exception as e:
            print(f"CRITICAL: Nougat failed: {e}. Falling back to unstructured.")
            file_like.seek(0); return _extract_text_pdf_unstructured(file_like)
    elif method == "legacy": return _extract_text_pdf_legacy(file_like)
    else:
        if method != "unstructured": print(f"Warning: Unknown method '{method}'. Defaulting to 'unstructured'.")
        try: return _extract_text_pdf_unstructured(file_like)
        except Exception as e:
            print(f"CRITICAL: Unstructured failed: {e}. Falling back to legacy.")
            file_like.seek(0); return _extract_text_pdf_legacy(file_like)

# --- EPUB Extraction (Unchanged) ---
def extract_chapters_from_epub(file_like: io.BytesIO):
    # ... (remains unchanged) ...
    bk = epub.read_epub(file_like)
    chapters = []
    for item in bk.get_items_of_type(epub.ITEM_DOCUMENT):
        if getattr(item, "is_nav", False): continue
        html = item.get_content().decode("utf-8", errors="ignore")
        text = re.sub(r"<(script|style).*?>.*?</\1>", " ", html, flags=re.S|re.I)
        text = re.sub(r"<br\s*/?>", "\n", text, flags=re.I)
        text = re.sub(r"</p>|</div>|</h\d>", "\n\n", text, flags=re.I)
        text = re.sub(r"<[^>]+>", " ", text)
        text = re.sub(r"[ \t]+", " ", text)
        text = re.sub(r"\n{3,}", "\n\n", text).strip()
        if text:
            title = Path(item.file_name).stem
            first = text.splitlines()[0] if text else ""; m = re.match(r"(?i)\s*(chapter|part|book)\b[^\n]{0,80}", first)
            if m: title = first[:60]
            chapters.append((title, text))
    if not chapters:
        blobs = [];
        for item in bk.get_items_of_type(epub.ITEM_DOCUMENT):
             if getattr(item, "is_nav", False): continue
             blobs.append(item.get_content().decode("utf-8", errors="ignore"))
        html = " ".join(blobs)
        text = re.sub(r"<(script|style).*?>.*?</\1>", " ", html, flags=re.S|re.I)
        text = re.sub(r"<br\s*/?>", "\n", text, flags=re.I)
        text = re.sub(r"</p>|</div>|</h\d>", "\n\n", text, flags=re.I)
        text = re.sub(r"<[^>]+>", " ", text)
        text = re.sub(r"[ \t]+", " ", text)
        text = re.sub(r"\n{3,}", "\n\n", text).strip()
        if text: chapters = [("Chapter 1", text)]
    return chapters

def safe_name(s: str) -> str:
    # ... (remains unchanged) ...
    s = re.sub(r"[^\w\-]+", "_", s).strip("_"); return s or "chapter"

# --- Pipeline cache (Unchanged) ---
@lru_cache(maxsize=4)
def get_pipeline(lang_code='a', device=DEVICE):
    # ... (remains unchanged) ...
    return KPipeline(lang_code=lang_code, device=device)

def _synthesize_sentence(pipe: KPipeline, sentence: str, voice='af_heart', speed=1.0) -> np.ndarray:
    # ... (remains unchanged) ...
    subchunks = [];
    for _, _, audio in pipe(sentence, voice=voice, speed=speed, split_pattern=None): subchunks.append(audio)
    if not subchunks: return np.zeros((0,), dtype=np.float32)
    return np.concatenate(subchunks, axis=0)

def split_sentences_keep_delim(text: str) -> List[str]:
    # ... (remains unchanged) ...
    parts = re.split(SPLIT_PATTERN_CAP, text); sents = []
    for i in range(0, len(parts), 2):
        chunk = (parts[i] or "").strip(); sep = parts[i+1] if i+1 < len(parts) else ""
        if not chunk: continue
        if sep and not sep.isspace(): chunk = (chunk + " " + sep.strip()).strip()
        sents.append(chunk)
    return sents

# --- Synthesizer (Modified slightly to handle simplified metadata) ---
def synth_text_to_wav_and_manifest(
    text_or_elements: Union[str, List[Dict]],
    voice='af_heart',
    speed=1.0,
    lang_code='a',
    device=DEVICE) -> Tuple[bytes, Dict]:
    pipe = get_pipeline(lang_code=lang_code, device=device)
    sr = 24000

    if isinstance(text_or_elements, str):
        # Simplified metadata for string input
        elements = [{"text": text_or_elements, "metadata": {"page_number": 1, "points": None}}]
    else:
        elements = text_or_elements

    pcm_all = []; timeline = []; t = 0.0; sentence_index = 0
    print(f"Synthesizing {len(elements)} text elements...")

    for element in elements:
        element_text = element.get("text", "")
        element_meta = element.get("metadata", {}) # Contains page_number, points

        sentences = split_sentences_keep_delim(element_text)

        for sent in sentences:
            if not sent: continue
            pcm = _synthesize_sentence(pipe, sent, voice=voice, speed=speed)
            dur = pcm.shape[0] / sr
            timeline.append({
                "i": sentence_index,
                "start": round(t, 3),
                "end": round(t + dur, 3),
                "text": sent.strip(),
                "location": element_meta # Pass the simplified metadata
            })
            pcm_all.append(pcm); t += dur; sentence_index += 1

    pcm_cat = np.concatenate(pcm_all, axis=0) if pcm_all else np.zeros((sr//10,), dtype=np.float32)
    buf = io.BytesIO(); sf.write(buf, pcm_cat, sr, format='WAV'); buf.seek(0)
    manifest = {"audioUrl": "", "sentences": timeline}
    return buf.read(), manifest

def wav_to_mp3_bytes(wav_bytes: bytes, bitrate="128k") -> bytes:
    # ... (remains unchanged) ...
    audio = AudioSegment.from_file(io.BytesIO(wav_bytes), format="wav"); out = io.BytesIO()
    audio.export(out, format="mp3", bitrate=bitrate); out.seek(0); return out.read()


  m = re.match('([su]([0-9]{1,2})p?) \(([0-9]{1,2}) bit\)$', token)
  m2 = re.match('([su]([0-9]{1,2})p?)( \(default\))?$', token)
  elif re.match('(flt)p?( \(default\))?$', token):
  elif re.match('(dbl)p?( \(default\))?$', token):


4) High-level synthesis wrappers (now also write manifest.json)

In [4]:
# 4) High-level synthesis wrappers (string / PDF / EPUB) — with manifest

def synth_string(text: str,
                 voice="af_heart",
                 speed=1.0,
                 out_format="wav",          # "wav" or "mp3"
                 lang_code="a",
                 device=None,
                 basename="kokoro_text"):
    device = device or DEVICE

    # --- MODIFIED ---
    # Wrap the raw string in the new element structure
    elements = [{
        "text": text,
        "metadata": {"page_number": 1, "source": "string", "coordinates": None}
    }]

    wav_bytes, manifest = synth_text_to_wav_and_manifest(
        elements,  # <-- Pass the new elements list
        voice=voice, speed=speed, lang_code=lang_code, device=device
    )
    # --- End modification ---

    out_base = f"/content/{basename}"

    if out_format.lower() == "mp3":
        mp3 = wav_to_mp3_bytes(wav_bytes)
        audio_path = out_base + ".mp3"
        with open(audio_path, "wb") as f: f.write(mp3)
    else:
        audio_path = out_base + ".wav"
        with open(audio_path, "wb") as f: f.write(wav_bytes)

    manifest_path = out_base + "_manifest.json"
    manifest["audioUrl"] = Path(audio_path).name
    with open(manifest_path, "w", encoding="utf-8") as f:
        import json; json.dump(manifest, f, ensure_ascii=False, indent=2)

    return audio_path, manifest_path

def synth_pdf(file_path_or_bytes,
              voice="af_heart",
              speed=1.0,
              out_format="wav",           # "wav" or "mp3"
              lang_code="a",
              device=None,
              basename=None,
              extract_method="unstructured"):
    device = device or DEVICE
    if isinstance(file_path_or_bytes, (str, Path)):
        with open(file_path_or_bytes, "rb") as fh:
            pdf_bytes = io.BytesIO(fh.read())
        stem = Path(file_path_or_bytes).stem
    else:
        pdf_bytes = file_path_or_bytes
        stem = basename or "document"

    # --- MODIFIED ---
    # 1. extract_text_from_pdf now returns a list of element dicts
    elements = extract_text_from_pdf(pdf_bytes, method=extract_method, device=device)

    # 2. Pass this list directly to the new synthesizer
    wav_bytes, manifest = synth_text_to_wav_and_manifest(
        elements,  # <-- Pass the new elements list
        voice=voice, speed=speed, lang_code=lang_code, device=device
    )
    # --- End modification ---

    out_base = f"/content/{(basename or stem)}_tts"

    if out_format.lower() == "mp3":
        mp3 = wav_to_mp3_bytes(wav_bytes)
        audio_path = out_base + ".mp3"
        with open(audio_path, "wb") as f: f.write(mp3)
    else:
        audio_path = out_base + ".wav"
        with open(audio_path, "wb") as f: f.write(wav_bytes)

    manifest_path = out_base + "_manifest.json"
    manifest["audioUrl"] = Path(audio_path).name
    with open(manifest_path, "w", encoding="utf-8") as f:
        import json; json.dump(manifest, f, ensure_ascii=False, indent=2)

    return audio_path, manifest_path

def synth_epub(file_path_or_bytes,
               voice="af_heart",
               speed=1.0,
               per_chapter_format="wav",  # "wav" or "mp3"
               lang_code="a",
               device=None,
               zip_name=None):
    device = device or DEVICE

    if isinstance(file_path_or_bytes, (str, Path)):
        with open(file_path_or_bytes, "rb") as fh:
            epub_bytes = io.BytesIO(fh.read())
        stem = Path(file_path_or_bytes).stem
    else:
        epub_bytes = file_path_or_bytes
        stem = "book"

    chapters = extract_chapters_from_epub(epub_bytes)
    assert chapters, "No chapters detected in EPUB."

    zip_buf = io.BytesIO()
    with zipfile.ZipFile(zip_buf, "w", zipfile.ZIP_DEFLATED) as zf:
        for idx, (title, body) in enumerate(chapters, 1):
            name = f"{idx:02d}_{safe_name(title)[:40]}"

            # --- MODIFIED ---
            # Wrap chapter string in the new element structure
            chapter_elements = [{
                "text": body,
                "metadata": {
                    "chapter_index": idx,
                    "chapter_title": title,
                    "page_number": 1, # Page number is relative to chapter
                    "coordinates": None
                }
            }]

            wav_bytes, manifest = synth_text_to_wav_and_manifest(
                chapter_elements, # <-- Pass the new elements list
                voice=voice, speed=speed, lang_code=lang_code, device=device
            )
            # --- End modification ---

            if per_chapter_format.lower() == "mp3":
                data = wav_to_mp3_bytes(wav_bytes)
                audio_name = f"{name}.mp3"
                zf.writestr(audio_name, data)
            else:
                audio_name = f"{name}.wav"
                zf.writestr(audio_name, wav_bytes)

            manifest["audioUrl"] = audio_name
            import json
            zf.writestr(f"{name}_manifest.json", json.dumps(manifest, ensure_ascii=False, indent=2))

    zip_buf.seek(0)
    zpath = f"/content/{zip_name or (stem + '_chapters')}.zip"
    with open(zpath, "wb") as f:
        f.write(zip_buf.read())
    return zpath

## QUICK-CALL CELLS (updated to also deliver the manifest)

A) String → audio

In [None]:
# @title A) String → Audio (+ Manifest)
# @markdown ### Options (edit here)
VOICE = "af_heart"  # @param {type:"string"}
SPEED = 1.0          # @param {type:"number"}
FORMAT = "mp3"       # @param ["wav", "mp3"]
LANG = "a"           # @param {type:"string"}
DEVICE_OVERRIDE = "None"  # @param ["None", "cuda", "cpu"]
BASENAME = "kokoro_text"  # @param {type:"string"}

# @markdown **Text to read (edit below):**
TEXT = """Paste or type your text here.
It can be multiple paragraphs. Chapters aren't needed for this path.
"""

# ---- Run (no edits needed below) ----
_dev = None if DEVICE_OVERRIDE == "None" else DEVICE_OVERRIDE
audio_path, manifest_path = synth_string(
    TEXT, voice=VOICE, speed=SPEED,
    out_format=FORMAT, lang_code=LANG,
    device=_dev, basename=BASENAME
)
deliver(audio_path)
deliver(manifest_path)
print("Done:", audio_path, manifest_path)


<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Done: /content/kokoro_text.mp3 /content/kokoro_text_manifest.json


B) Upload PDF → one audio file

In [6]:
# @title B) PDF → Audio (+ Manifest)
# @markdown ### Options (edit here)
VOICE = "af_heart"  # @param {type:"string"}
SPEED = 1.0          # @param {type:"number"}
FORMAT = "mp3"       # @param ["wav", "mp3"]
# --- NEW: Choose your PDF text extraction method ---
PDF_METHOD = "unstructured"  # @param ["unstructured", "nougat", "legacy"]
LANG = "a"           # @param {type:"string"}
DEVICE_OVERRIDE = "None"  # @param ["None", "cuda", "cpu"]

# @markdown **Upload a PDF when prompted.**
from google.colab import files
print("Upload a PDF…")
_uploaded = files.upload()
_pdf_key = next(iter(_uploaded))

# ---- Run (no edits needed below) ----
_dev = None if DEVICE_OVERRIDE == "None" else DEVICE_OVERRIDE
audio_path, manifest_path = synth_pdf(
    _pdf_key, voice=VOICE, speed=SPEED,
    out_format=FORMAT, lang_code=LANG,
    device=_dev,
    extract_method=PDF_METHOD  # <-- NEWLY ADDED
)
deliver(audio_path)
deliver(manifest_path)
print("Done:", audio_path, manifest_path)

Upload a PDF…


Saving Assignment_1.pdf to Assignment_1.pdf
Unstructured: Parsing PDF with layout analysis (strategy='hi_res')...
Unstructured 'hi_res' returned 154 raw elements.

--- Processing elements (checking for points) ---
--- Finished processing elements ---
Unstructured: Found 142 text elements.
Synthesizing 142 text elements...




<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

<IPython.core.display.Javascript object>

Done: /content/Assignment_1_tts.mp3 /content/Assignment_1_tts_manifest.json


In [None]:
# @title C) EPUB → ZIP (Per-Chapter Audio + Manifests)
# @markdown ### Options (edit here)
VOICE = "af_heart"      # @param {type:"string"}
SPEED = 1.0              # @param {type:"number"}
CHAPTER_FORMAT = "wav"   # @param ["wav", "mp3"]
LANG = "a"               # @param {type:"string"}
DEVICE_OVERRIDE = "None" # @param ["None", "cuda", "cpu"]
ZIP_NAME = ""            # @param {type:"string"}

# @markdown **Upload an EPUB when prompted.**
from google.colab import files
print("Upload an EPUB…")
_uploaded = files.upload()
_epub_key = next(iter(_uploaded))

# ---- Run (no edits needed below) ----
_dev = None if DEVICE_OVERRIDE == "None" else DEVICE_OVERRIDE
zip_path = synth_epub(
    _epub_key, voice=VOICE, speed=SPEED,
    per_chapter_format=CHAPTER_FORMAT,
    lang_code=LANG, device=_dev,
    zip_name=(ZIP_NAME or None)
)
deliver(zip_path)
print("Done:", zip_path)
