<a href="https://colab.research.google.com/github/MarkTarry/Piper-TTS/blob/main/Training.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Mount Google Drive for datasets and outputs

In [None]:
from google.colab import drive
DRIVE_MOUNT_DIR = '/content/drive'
drive.mount(DRIVE_MOUNT_DIR)

PIPER_DATA_DIR = f"{DRIVE_MOUNT_DIR}/MyDrive/Piper"

In [None]:
# VOICE_NAME="jarvis"
# ESPEAK_LANGUAGE="en"
# IETF_BCP_47_LANGUAGE="en_GB"

VOICE_NAME="majelbarret"
ESPEAK_LANGUAGE="en-us"
IETF_BCP_47_LANGUAGE="en_US"

Colab runtime + GPU check

In [None]:
# Check GPU
import torch, platform, sys
print("Python:", sys.version.split()[0])
print("PyTorch:", torch.__version__)
print("CUDA available:", torch.cuda.is_available())
!nvidia-smi

System packages (incl. eSpeak dev)

In [None]:
!sudo apt-get update -y
!sudo apt-get install -y build-essential cmake ninja-build espeak-ng espeak-ng-data libespeak-ng-dev pkg-config ffmpeg
!pkg-config --modversion espeak-ng

Clone repo fresh

In [None]:
%cd /content
!rm -rf piper1-gpl
!git clone https://github.com/OHF-voice/piper1-gpl.git
%cd piper1-gpl
!pwd

Python deps (editable install, no venv in Colab)

In [None]:
!python3 -m pip install --upgrade pip setuptools wheel
!python3 -m pip install -e ".[train]"

Build the Cython extension used for alignment

In [None]:
%cd /content/piper1-gpl
!chmod +x ./build_monotonic_align.sh
!./build_monotonic_align.sh

Dev build (repo mode)

In [None]:
!python3 -m pip install --upgrade pip setuptools wheel scikit-build cmake ninja

In [None]:
%cd /content/piper1-gpl
!python3 setup.py build_ext --inplace -v

Set paths and training hyperparams

In [None]:
from pathlib import Path
import os

# ==== CHANGE THESE ====
SAMPLE_RATE_HZ  = 22050
BATCH_SIZE      = 32       # drop to 8 or 4 if you OOM

VOICE_DATA_DIR  = Path(f"{PIPER_DATA_DIR}/{VOICE_NAME}")
AUDIO_DIR       = VOICE_DATA_DIR / "wavs"
CSV_PATH        = VOICE_DATA_DIR / "metadata.csv"
CONFIG_PATH     = VOICE_DATA_DIR / "config.json"
CHECKPOINT_PATH = VOICE_DATA_DIR / "latest.ckpt"
if not CHECKPOINT_PATH.exists():
    CHECKPOINT_PATH = "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/en/en_US/lessac/medium/epoch%3D2164-step%3D1355540.ckpt"

CACHE_DIR       = Path(f"/content/piper_cache/{VOICE_NAME}")

# Make sure dirs exist
CACHE_DIR.mkdir(parents=True, exist_ok=True)
print("CSV exists:", CSV_PATH.exists())
print("Audio dir exists:", AUDIO_DIR.exists())
print("Cache dir:", CACHE_DIR)
print("Config will be written to:", CONFIG_PATH)
print("Checkpoint path:", CHECKPOINT_PATH)

Quick sanity checks

In [None]:
import pandas as pd, io, os, textwrap

csv_path = str(CSV_PATH)
if os.path.exists(csv_path):
    # Read as pipe-delimited, two columns
    try:
        df = pd.read_csv(csv_path, sep="|", header=None, names=["audio","text"])
        print(df.head())
        # Check a few audio files exist
        missing = [a for a in df["audio"].head(5) if not (AUDIO_DIR/str(a)).exists()]
        print("Missing among first 5:", missing)
    except Exception as e:
        print("CSV read error:", e)
else:
    print("CSV not found at:", csv_path)

# Kick off training

`New models typically start ~epoch=2165`

In [None]:
!timeout 90m python3 -m piper.train fit \
  --data.voice_name "$VOICE_NAME" \
  --data.csv_path "$CSV_PATH" \
  --data.audio_dir "$AUDIO_DIR" \
  --model.sample_rate 22050 \
  --data.espeak_voice "$ESPEAK_LANGUAGE" \
  --data.cache_dir "$CACHE_DIR" \
  --data.config_path "$CONFIG_PATH" \
  --data.batch_size 8 \
  --ckpt_path "$CHECKPOINT_PATH"

Take backup of export

In [None]:
import os
import glob
import re

# Find the latest version directory
lightning_logs_dir = "/content/piper1-gpl/lightning_logs"
version_dirs = glob.glob(os.path.join(lightning_logs_dir, "version_*"))
if version_dirs:
    latest_version_dir = max(version_dirs, key=os.path.getmtime)
    print(f"Latest version directory found: {latest_version_dir}")

    # Find the first .ckpt file in the latest version directory
    ckpt_files = glob.glob(os.path.join(latest_version_dir, "checkpoints", "*.ckpt"))
    if ckpt_files:
        first_ckpt_file = min(ckpt_files, key=os.path.getctime)
        print(f"First checkpoint file found: {first_ckpt_file}")

        # Extract epoch and step from the filename
        match = re.search(r"epoch=(\d+)-step=(\d+)", os.path.basename(first_ckpt_file))
        if match:
            epoch = match.group(1)
            step = match.group(2)

            file_name = f"{IETF_BCP_47_LANGUAGE}_{VOICE_NAME}_epoch={epoch}-step={step}"

            !cp -v {first_ckpt_file} {VOICE_DATA_DIR}/{file_name}.ckpt
            !cp -v -f {first_ckpt_file} {VOICE_DATA_DIR}/latest.ckpt

            !python3 -m piper.train.export_onnx \
                --checkpoint {first_ckpt_file} \
                --output-file {VOICE_DATA_DIR}/{file_name}.onnx

            !cp -v {VOICE_DATA_DIR}/config.json {VOICE_DATA_DIR}/{file_name}.onnx.json
        else:
            print("Could not extract epoch and step from checkpoint filename.")
    else:
        print("No checkpoint files found in the latest version directory.")
else:
    print("No version directories found in lightning_logs.")