In [None]:
#@markdown # Check GPU type
#@markdown ### Factory reset runtime if you don't have the desired GPU.

#@markdown ---

#@markdown ## It is recommended to not use the K80

!nvidia-smi -L
#@markdown All GPUs work properly, but vary in speed. K80 and P4 are not recommended.

#@markdown ---

In [None]:
#@markdown # Anti-Disconnect for Google Colab
#@markdown ## Run this to stop it from disconnecting automatically (will disconnect after 6 - 12 hours, though.)

import IPython
js_code = '''
function ClickConnect(){
console.log("Working");
document.querySelector("colab-toolbar-button#connect").click()
}
setInterval(ClickConnect,60000)
'''
display(IPython.display.Javascript(js_code))

In [None]:
#@markdown ## Mount your Google Drive

#Google Drive Authentication Token
from google.colab import drive
drive.mount('drive')

In [None]:
#@markdown # Download pretrained model and install tacotron 2

#make wavs folder
!mkdir '/content/wavs'

#get haruqa japanese pretrained model
!gdown https://drive.google.com/uc?id=1j986QrB1C-tY4GLq806xMBfMWVO3YKY8

#download tacotron 2
!git clone -q https://github.com/NVIDIA/tacotron2
!pip install unidecode
!pip install tensorflow==1.15

In [None]:
#@markdown # Optional: Unzip file to unpack wavs
#@markdown ### If you have a lot of wav files, then zip them all into one file locally on your system, then upload it and copy the path. Otherwise, you may just upload your wavs to /wavs.
#@markdown ---

zip_file_path = "/content/wavs.zip" #@param {type:"string"}
!unzip $zip_file_path -d '/content/wavs'

In [None]:
#@markdown This is for your training configuration (hparams.py)

%%writefile /content/tacotron2/hparams.py
import tensorflow as tf
from text import symbols

transcription='/content/tacotron2/filelists/transcription.txt'#@param {type:'string'}
batchsize=8#@param {type: 'integer'}

def create_hparams(hparams_string=None, verbose=False):
    """Create model hyperparameters. Parse nondefault from given string."""

    hparams = tf.contrib.training.HParams(
        ################################
        # Experiment Parameters        #
        ################################
        epochs=5000,
        iters_per_checkpoint=100,
        seed=1234,
        dynamic_loss_scaling=True,
        fp16_run=False,
        distributed_run=False,
        dist_backend="nccl",
        dist_url="tcp://localhost:54321",
        cudnn_enabled=True,
        cudnn_benchmark=False,
        ignore_layers=['embedding.weight'],

        ################################
        # Data Parameters             #
        ################################
        load_mel_from_disk=False,
        training_files=transcription,
        validation_files=transcription,
        text_cleaners=['convert_to_ascii'],

        ################################
        # Audio Parameters             #
        ################################
        max_wav_value=32768.0,
        sampling_rate=22050,
        filter_length=1024,
        hop_length=256,
        win_length=1024,
        n_mel_channels=80,
        mel_fmin=0.0,
        mel_fmax=8000.0,

        ################################
        # Model Parameters             #
        ################################
        n_symbols=len(symbols),
        symbols_embedding_dim=512,

        # Encoder parameters
        encoder_kernel_size=5,
        encoder_n_convolutions=3,
        encoder_embedding_dim=512,

        # Decoder parameters
        n_frames_per_step=1,  # currently only 1 is supported
        decoder_rnn_dim=1024,
        prenet_dim=256,
        max_decoder_steps=1000,
        gate_threshold=0.5,
        p_attention_dropout=0.1,
        p_decoder_dropout=0.1,

        # Attention parameters
        attention_rnn_dim=1024,
        attention_dim=128,

        # Location Layer parameters
        attention_location_n_filters=32,
        attention_location_kernel_size=31,

        # Mel-post processing network parameters
        postnet_embedding_dim=512,
        postnet_kernel_size=5,
        postnet_n_convolutions=5,

        ################################
        # Optimization Hyperparameters #
        ################################
        use_saved_learning_rate=False,
        learning_rate=1e-3,
        weight_decay=1e-6,
        grad_clip_thresh=1.0,
        batch_size=batchsize, #if you have the T4, set this to 14 or less
        mask_padding=True  # set model's padded outputs to padded values
    )

    if hparams_string:
        tf.logging.info('Parsing command line hparams: %s', hparams_string)
        hparams.parse(hparams_string)

    if verbose:
        tf.logging.info('Final parsed hparams: %s', hparams.values())

    return hparams

In [None]:
#@markdown # Launch TensorBoard

!rm -rf '/content/logs'
!mkdir '/content/logs'
import tensorflow as tf
import datetime
%reload_ext tensorboard
%tensorboard --logdir '/content/logs'

In [None]:
#@markdown # Begin training

#outputdirectory = "/content/drive/MyDrive/jsut_loanwords128" #@param{type:'string'}

!python /content/tacotron2/train.py \
--log_directory='/content/logs' -c '/content/FlatBaseModel_frontVoiceIsAkitoTenohira_20210418.pt' --warm_start \
--output_directory="/content/drive/MyDrive/jsut_loanwords128" #@param{type:'string'}