## Setup môi trường

In [None]:
!pip install torch==2.8.0 torchvision==0.23.0 torchaudio==2.8.0 --index-url https://download.pytorch.org/whl/cu126

In [None]:
!pip install onnxscript

### Setup piper

In [None]:
!sudo apt-get update -y
!sudo apt-get install -y build-essential cmake ninja-build espeak-ng espeak-ng-data libespeak-ng-dev pkg-config ffmpeg
!pkg-config --modversion espeak-ng

In [None]:
%cd /content
!rm -rf piper1-gpl
!git clone https://github.com/OHF-voice/piper1-gpl.git
%cd piper1-gpl
!pwd

In [None]:
!python3 -m pip install --upgrade pip setuptools wheel
!python3 -m pip install -e ".[train]"

In [None]:
%cd /content/piper1-gpl
!chmod +x ./build_monotonic_align.sh
!./build_monotonic_align.sh

In [None]:
!python3 -m pip install --upgrade pip setuptools wheel scikit-build cmake ninja

In [None]:
%cd /content/piper1-gpl
!python3 setup.py build_ext --inplace -v

## Load checkpoint

In [None]:
from google.colab import drive
drive.mount('/content/drive')


In [None]:
Nếu chưa có checkpoint sẵn thì tải về từ huggingface.
Ở đây em chạy ở lần chạy đầu tiên để lấy bản pretrained có sẵn để finetune tiếp.
Các lần tiếp theo em bỏ qua để load checkpoint mới sau khi finetune.
Lý do em làm vậy là để chạy train trên nhiều account khác nhau để tận dụng tối đa thời lượng sử dụng GPU T4 miễn phí của Colab.

In [None]:
%cd /content
!wget "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/vi/vi_VN/vais1000/medium/epoch%3D4769-step%3D919580.ckpt?download=true" -O epoch=4769-step=919580.ckpt
!wget "https://huggingface.co/datasets/rhasspy/piper-checkpoints/resolve/main/vi/vi_VN/vais1000/medium/config.json?download=true" -O config.json


In [None]:
Sau mỗi lần finetune, em lưu checkpoint về drive để upload sang một account khác, rồi chạy dòng này để lấy checkpoint.

In [None]:
%cd /content
!mv /content/drive/MyDrive/DoAn_TTS_Mobile/base_models/checkpoints/epoch=4793-step=37296.ckpt /content/piper1-gpl/pretrained-model.ckpt


In [None]:
%cd /content/piper1-gpl/src

In [None]:
from piper.train.vits.lightning import VitsModel

In [None]:
import torch
import inspect
import argparse
import logging
from pathlib import Path, PosixPath, WindowsPath

# simple logger to output information to the console
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')

def convert_paths_to_strings(data):
    """
    Recursively traverses nested dictionaries and lists,
    converting all pathlib.Path objects to strings.
    """
    if isinstance(data, dict):
        return {key: convert_paths_to_strings(value) for key, value in data.items()}
    elif isinstance(data, list):
        return [convert_paths_to_strings(item) for item in data]
    elif isinstance(data, (PosixPath, WindowsPath)):
        return str(data)
    else:
        return data


def clean_and_save_checkpoint(input_path: str, output_path: str):
    """
    Loads a checkpoint, removes incompatible hyperparameters and data types,
    and saves it to a new file.
    """
    input_file = Path(input_path)
    output_file = Path(output_path)

    if not input_file.is_file():
        logging.error(f"File not found: {input_path}")
        return

    if output_file.exists():
        logging.warning(f"Output file already exists: {output_path}. It will be overwritten.")

    logging.info(f"Loading checkpoint from: {input_path}")
    checkpoint = torch.load(input_path, map_location="cpu", weights_only=False)

    # --- STEP 1: Convert all Path objects to strings ---
    logging.info("Searching for and converting unsafe Path objects to strings...")
    cleaned_checkpoint = convert_paths_to_strings(checkpoint)
    logging.info("Conversion complete.")

    # --- STEP 2: Remove outdated hyperparameters ---
    if "hyper_parameters" in cleaned_checkpoint:
        logging.info("Checking and removing outdated hyperparameters...")

        init_signature = inspect.signature(VitsModel.__init__)
        valid_params = set(init_signature.parameters.keys())
        checkpoint_params = set(cleaned_checkpoint["hyper_parameters"].keys())
        invalid_params = checkpoint_params - valid_params

        if invalid_params:
            logging.info("Found the following invalid parameters to remove:")
            for param in sorted(list(invalid_params)):
                logging.info(f" - {param}")
                del cleaned_checkpoint["hyper_parameters"][param]
        else:
            logging.info("No outdated hyperparameters found.")

    # --- STEP 3: Save the fully cleaned checkpoint ---
    logging.info(f"Saving the fully cleaned checkpoint to: {output_path}")
    torch.save(cleaned_checkpoint, output_path)
    logging.info("Done! The new checkpoint file has been successfully created and is fully compatible.")


if __name__ == "__main__":
    clean_and_save_checkpoint("/content/piper1-gpl/pretrained-model.ckpt", "/content/piper1-gpl/pretrained-model-cleaned.ckpt")

## Train

In [None]:
# Path to the main entry point of piper.train
main_py_path = "/content/piper1-gpl/src/piper/train/__main__.py"

# Code to prepend to the file
prepend_code = (
    "import torch\n"
    "import pathlib\n"
    "torch.serialization.add_safe_globals([pathlib.PosixPath])\n"
)

# Read original content
with open(main_py_path, "r") as f:
    original_content = f.read()

# Prepend the code if not already present
if prepend_code not in original_content:
    modified_content = prepend_code + original_content
    with open(main_py_path, "w") as f:
        f.write(modified_content)
    print("Modified __main__.py to add torch.serialization.add_safe_globals.")
else:
    print("__main__.py already contains the necessary torch.serialization.add_safe_globals.")


In [None]:
%cd /content/piper1-gpl

In [None]:
!python3 -m piper.train fit \
  --data.voice_name "nagiya" \
  --data.csv_path /content/drive/MyDrive/DoAn_TTS_Mobile/vieneu_dataset/metadata.csv \
  --data.audio_dir /content/drive/MyDrive/DoAn_TTS_Mobile/vieneu_dataset/wavs \
  --model.sample_rate 22050 \
  --data.espeak_voice "vi-vn-x-south" \
  --data.cache_dir /content/piper_cache \
  --data.config_path /content/drive/MyDrive/DoAn_TTS_Mobile/vieneu_dataset/config.json \
  --data.batch_size 4 \
  --ckpt_path "/content/piper1-gpl/pretrained-model-cleaned.ckpt"

## Export model

In [None]:
Nhớ chọn lại tên checkpoint mới khi đổi account.

In [None]:
!python3 -m piper.train.export_onnx \
  --checkpoint "/content/piper1-gpl/lightning_logs/version_0/checkpoints/epoch=4818-step=76146.ckpt" \
  --output-file "/content/model.onnx"

In [None]:
import shutil
shutil.copyfile("/content/drive/MyDrive/DoAn_TTS_Mobile/vieneu_dataset/config.json", "/content/model.onnx.json")

# Inference

In [None]:
%cd /content/piper1-gpl/src

In [None]:
import wave
from piper import PiperVoice, SynthesisConfig

In [None]:
voice = PiperVoice.load(model_path="/content/model.onnx", config_path="/content/model.onnx.json", use_cuda=True)

In [None]:
syn_config = SynthesisConfig(
    volume=1,  # half as loud
    length_scale=1.0,  # twice as slow
    noise_scale=1.0,  # more audio variation
    noise_w_scale=1.0,  # more speaking variation
    normalize_audio=False, # use raw audio from voice
)

In [None]:
with wave.open("/content/test.wav", "wb") as wav_file:
    voice.synthesize_wav("Tôi tên Nagiya, hiện tại đang học tại trường Đại học Khoa học tự nhiên. Đây là giọng đọc sau khi được fine tune, Một hai ba bốn", wav_file, syn_config=syn_config)

In [None]:
from IPython.display import Audio
Audio("/content/test.wav")