# Voice Cloning App - Remote Training

Remote training for the Voice Cloning App.

**Please ensure you have this notebook enabled with GPU before running (Runtime->Change runtime type)**

Steps:
1. Export your dataset from the app & unzip
2. Create a folder called `Voice-Cloning` in your Google Drive
  1. Create a sub-folder called `datasets` and upload your dataset folder to it
  2. If using non-English: Create a sub folder called `alphabets` and upload your alphabet.txt file to it 
3. Configure parameters below
4. Run this notebook one cell at a time
  - Connect to your google drive when prompted
  - Ensure you have selected the correct options before running the training cell

During training and once training is done you'll find your latest checkpoint in Google Drive within the folder `Voice-Cloning/checkpoints/dataset_name`.

This can be download from your drive and imported into the app under the "Import/Export" menu.

In [None]:
#@title Connect to google drive
from google.colab import drive
import os

drive.mount('/content/drive')

output_directory = "/content/drive/MyDrive/Voice-Cloning"
os.makedirs(output_directory, exist_ok=True)

dataset_directory = os.path.join(output_directory, "datasets")
os.makedirs(output_directory, exist_ok=True)

checkpoint_directory = os.path.join(output_directory, "checkpoints")
os.makedirs(checkpoint_directory, exist_ok=True)

alphabet_directory = os.path.join(output_directory, "alphabets")
os.makedirs(alphabet_directory, exist_ok=True)

datasets = os.listdir(dataset_directory)
assert datasets, "No datasets found in 'Voice-Cloning/datasets'. Please export your dataset from the app, unzip and upload to this folder"

# Check datasets
for dataset in datasets:
  try:
    dataset_path = os.path.join(dataset_directory, dataset)
    files = os.listdir(dataset_path)
    assert "metadata.csv" in files, f"Dataset '{dataset}' is missing metadata.csv"
    assert "wavs" in files, f"Dataset '{dataset}' is missing wavs folder"
  except NotADirectoryError:
    raise Exception(f"Dataset '{dataset}' is not a folder. Please ensure all datasets are folders containing your metadata.csv & wavs")

checkpoints = {dataset: os.listdir(os.path.join(checkpoint_directory, dataset)) for dataset in datasets if os.path.isdir(os.path.join(checkpoint_directory, dataset))}
languages = os.listdir(alphabet_directory)

In [None]:
#@title Parameters
import torch
assert torch.cuda.is_available(),  "Please change Runtime type to GPU (Runtime->Change runtime type)"

# Clone the app
!pip install pysrt==1.1.2 pydub==0.24.1 webrtcvad==2.0.10 Unidecode==1.0.22 gdown
!git clone https://github.com/BenAAndrew/Voice-Cloning-App.git
%cd /content/Voice-Cloning-App/
!git checkout 0ad83edb6ca8251c1c3426989af0e31641fe8ec2
from training.train import train
from training import DEFAULT_ALPHABET
from training.utils import load_symbols

# Download pretrained model
import gdown
transfer_learning_path = "/content/drive/MyDrive/Voice-Cloning/pretrained.pt"
gdown.download('1c5ZTuT7J08wLUoVZ2KkUs_VdZuJ86ZqA', "./"+transfer_learning_path, quiet=False)

# Get settings
epochs = 1000 #@param {type:"slider", min:100, max:3500, step:100}
batch_size = 38 #@param {type:"slider", min:12, max:70, step:2}
checkpoint_frequency = 1000 #@param {type:"slider", min:250, max:2500, step:250}
backup_checkpoint_frequency = 10000 #@param {type:"slider", min:2500, max:25000, step:500}
validation_size = 0.2 #@param {type:"slider", min:0.05, max:0.2, step:0.025}
early_stopping = True #@param {type:"boolean"}

In [None]:
#@title Options
import ipywidgets as widgets
from IPython.display import display

# Form
dataset = widgets.Dropdown(
    options=datasets,
    description='Dataset:',
)

def on_change(change):
    if change['type'] == 'change' and change['name'] == 'value':
      checkpoint.options = checkpoints.get(change['new'], [])

dataset.observe(on_change)

checkpoint = widgets.Dropdown(
    options=checkpoints.get(dataset.value, []),
    description='Checkpoint:',
)

alphabet = widgets.Dropdown(
    options=languages,
    description='Language:',
)

button = widgets.Button(
    description="Start training", 
    button_style="success",
)

display(dataset)
display(checkpoint)
if languages:
  display(alphabet)

In [None]:
#@title Train

dataset_path = os.path.join(dataset_directory, dataset.value)
metadata = os.path.join(dataset_path, "metadata.csv")
wavs = os.path.join(dataset_path, "wavs")
output_directory = os.path.join(checkpoint_directory, dataset.value)
symbols = load_symbols(os.path.join(alphabet_directory, alphabet.value)) if alphabet.value else DEFAULT_ALPHABET
checkpoint_path = os.path.join(checkpoint_directory, dataset.value, checkpoint.value) if checkpoint.value else None
train(
    metadata_path=metadata,
    audio_directory=wavs,
    output_directory=output_directory,
    symbols=symbols,
    checkpoint_path=checkpoint_path,
    transfer_learning_path=transfer_learning_path,
    epochs=epochs,
    batch_size=batch_size,
    early_stopping=early_stopping,
    multi_gpu=False,
    iters_per_checkpoint=checkpoint_frequency,
    iters_per_backup_checkpoint=backup_checkpoint_frequency,
    train_size=1-validation_size,
)