# Import packages   

In [10]:
import torch
import voice_to_text as vtt
import Mel_creator as mc

from data_utils_2 import TextMelLoader, TextMelCollate
from torch.utils.data.distributed import DistributedSampler
from logger import Tacotron2Logger
from distributed import apply_gradient_allreduce
from loss_function import Tacotron2Loss
from torch.cuda.amp import autocast, GradScaler

from torch.utils.data import DataLoader
import torch.distributed as dist
import time
from unidecode import unidecode
import math
from numpy import finfo
import os
import argparse
from model_FV import Tacotron2
import tqdm
from shutil import copytree

# Functions

In [9]:
def prepare_dataloaders(hparams):
    # Get data, data loaders and collate function ready
    trainset = TextMelLoader(hparams['training_files'], hparams['text_cleaners'],
                         hparams['max_wav_value'], hparams['sampling_rate'],
                         hparams['load_mel_from_disk'], hparams['filter_length'],
                         hparams['hop_length'], hparams['win_length'],
                         hparams['n_mel_channels'], hparams['mel_fmin'],
                         hparams['mel_fmax'], hparams['seed'])
    valset = TextMelLoader( hparams['validation_files'], hparams['text_cleaners'],
                         hparams['max_wav_value'], hparams['sampling_rate'],
                         hparams['load_mel_from_disk'], hparams['filter_length'],
                         hparams['hop_length'], hparams['win_length'],
                         hparams['n_mel_channels'], hparams['mel_fmin'],
                         hparams['mel_fmax'], hparams['seed'])
    collate_fn = TextMelCollate(n_frames_per_step=1)

    if hparams['distributed_run']:
        train_sampler = DistributedSampler(trainset)
        shuffle = False
    else:
        train_sampler = None
        shuffle = True

    train_loader = DataLoader(trainset, num_workers=1, shuffle=shuffle,
                              sampler=train_sampler,
                              batch_size=hparams['batch_size'], pin_memory=False,
                              drop_last=True, collate_fn=collate_fn)
    return train_loader, valset, collate_fn


def reduce_tensor(tensor, n_gpus):
    rt = tensor.clone()
    dist.all_reduce(rt, op=dist.reduce_op.SUM)
    rt /= n_gpus
    return rt

def init_distributed(n_gpus, rank, group_name):
    assert torch.cuda.is_available(), "Distributed mode requires CUDA."
    print("Initializing Distributed")

    # Set cuda device so everything is done on the right GPU.
    torch.cuda.set_device(rank % torch.cuda.device_count())

    # Initialize distributed communication
    dist.init_process_group(
        backend="gloo", init_method="tcp://localhost:54321",
        world_size=n_gpus, rank=rank, group_name=group_name)
    
    print("Done initializing distributed")

def prepare_directories_and_logger(output_directory, log_directory, rank):
    if rank == 0:
        if not os.path.isdir(output_directory):
            os.makedirs(output_directory)
            os.chmod(output_directory, 0o775)
        logger = Tacotron2Logger(os.path.join(output_directory, log_directory))
    else:
        logger = None
    return logger


##Funciones en construccion
#########################



def load_model(hparams):
    model = Tacotron2().cuda()  ##Josue
    if hparams['fp16_run']:
        model.decoder.attention_layer.score_mask_value = ('float16').min

    if hparams['distributed_run']:
        model = apply_gradient_allreduce(model)

    return model


def warm_start_model(checkpoint_path, model, ignore_layers):
    assert os.path.isfile(checkpoint_path)
    print("Warm starting model from checkpoint '{}'".format(checkpoint_path))
    checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
    model_dict = checkpoint_dict['state_dict']
    if len(ignore_layers) > 0:
        model_dict = {k: v for k, v in model_dict.items()
                      if k not in ignore_layers}
        dummy_dict = model.state_dict()
        dummy_dict.update(model_dict)
        model_dict = dummy_dict
    model.load_state_dict(model_dict)
    return model


def load_checkpoint(checkpoint_path, model, optimizer):
    assert os.path.isfile(checkpoint_path)
    print("Loading checkpoint '{}'".format(checkpoint_path))
    checkpoint_dict = torch.load(checkpoint_path, map_location='cpu')
    model.load_state_dict(checkpoint_dict['state_dict'])
    optimizer.load_state_dict(checkpoint_dict['optimizer'])
    learning_rate = checkpoint_dict['learning_rate']
    iteration = checkpoint_dict['iteration']
    print("Loaded checkpoint '{}' from iteration {}" .format(
        checkpoint_path, iteration))
    return model, optimizer, learning_rate, iteration


def save_checkpoint(model, optimizer, learning_rate, iteration, filepath):
    print("Saving model and optimizer state at iteration {} to {}".format(
        iteration, filepath))
    try:
        torch.save({'iteration': iteration,
                'state_dict': model.state_dict(),
                'optimizer': optimizer.state_dict(),
                'learning_rate': learning_rate}, filepath)
    except KeyboardInterrupt:
        print("interrupt received while saving, waiting for save to complete.")
        torch.save({'iteration': iteration,'state_dict': model.state_dict(),'optimizer': optimizer.state_dict(),'learning_rate': learning_rate}, filepath)
    print("Model Saved")






########################
##Validation process

def validate(model, criterion, valset, iteration, batch_size, n_gpus,
             collate_fn, logger, distributed_run, rank, epoch, start_eposh, learning_rate):
    """Handles all the validation scoring and printing"""
    model.eval()
    with torch.no_grad():
        val_sampler = DistributedSampler(valset) if distributed_run else None
        val_loader = DataLoader(valset, sampler=val_sampler, num_workers=1,
                                shuffle=False, batch_size=batch_size,
                                pin_memory=False, collate_fn=collate_fn)

        val_loss = 0.0
        for i, batch in enumerate(val_loader):
            x, y = model.parse_batch(batch)
            y_pred = model(x)
            loss = criterion(y_pred, y)
            if distributed_run:
                reduced_val_loss = reduce_tensor(loss.data, n_gpus).item()
            else:
                reduced_val_loss = loss.item()
            val_loss += reduced_val_loss
        val_loss = val_loss / (i + 1)

    model.train()
    if rank == 0:
        print("Epoch: {} Validation loss {}: {:9f}  Time: {:.1f}m LR: {:.6f}".format(epoch, iteration, val_loss,(time.perf_counter()-start_eposh)/60, learning_rate))
        logger.log_validation(val_loss, model, y, y_pred, iteration)
       
##Training process

def train( log_directory, checkpoint_path, warm_start, n_gpus,
          rank, group_name, hparams, log_directory2):
    """Training and validation logging results to tensorboard and stdout

    Params
    ------
    log_directory (string) directory to save tensorboard logs
    checkpoint_path(string): checkpoint path
    n_gpus (int): number of gpus
    rank (int): rank of current gpu
    hparams (object): comma separated list of "name=value" pairs.
    """
    if hparams['distributed_run']:
        init_distributed(n_gpus, rank, group_name)

    torch.manual_seed(hparams['seed'])
    torch.cuda.manual_seed(hparams['seed'])

    model = load_model(hparams)
    
    optimizer = torch.optim.Adam(model.parameters(), lr=hparams['learning_rate'],
                                 weight_decay=hparams['weight_decay'])

    if hparams['fp16_run']:
        scaler = GradScaler()
    if hparams['distributed_run']:
        model = apply_gradient_allreduce(model)

    criterion = Tacotron2Loss()

    logger = prepare_directories_and_logger(
        hparams['ouputh_checkpoint_path'], hparams['log_directory_1'], hparams['rank'])

    train_loader, valset, collate_fn = prepare_dataloaders(hparams)

    # Load checkpoint if one exists
    iteration = 0
    epoch_offset = 0
    if checkpoint_path is not None and os.path.isfile(checkpoint_path):
        if warm_start:
            model = warm_start_model(
                checkpoint_path, model, hparams['ignore_layers'])
        else:
            model, optimizer, _learning_rate, iteration = load_checkpoint(
                checkpoint_path, model, optimizer)
            if hparams['use_saved_learning_rate']:
                learning_rate = _learning_rate
            iteration += 1  # next iteration is iteration + 1
            epoch_offset = max(0, int(iteration / len(train_loader)))
    else:
      model = warm_start_model("pretrained_model", model, hparams['ignore_layers'])
      # download LJSpeech pretrained model if no checkpoint already exists
    
    start_eposh = time.perf_counter()
    learning_rate = 0.0
    model.train()
    is_overflow = False
    # ================ MAIN TRAINNIG LOOP! ===================
    for epoch in tqdm(range(epoch_offset, hparams['epochs'])):
        print("\nStarting Epoch: {} Iteration: {}".format(epoch, iteration))
        start_eposh = time.perf_counter() # eposh is russian, not a typo
        for i, batch in tqdm(enumerate(train_loader), total=len(train_loader)):
            start = time.perf_counter()
            if iteration < hparams['decay_start']: learning_rate = 5e-4
            else: iteration_adjusted = iteration - hparams['decay_start']; learning_rate = (5e-4*(e**(-iteration_adjusted/8000))) + 0
            learning_rate = max(hparams['min_learning_rate'] , learning_rate) # output the largest number
            for param_group in optimizer.param_groups:
                param_group['lr'] = learning_rate

            model.zero_grad()
            x, y = model.parse_batch(batch)
            y_pred = model(x)
            with autocast(enabled= hparams['fp16_run']):
                y_pred = model(x)

            loss = criterion(y_pred, y)

            if hparams['distributed_run']:
                reduced_loss = reduce_tensor(loss.data, n_gpus).item()
            else:
                reduced_loss = loss.item()
            if  hparams['fp16_run']:
                scaler.scale(loss).backward()
            else:
                loss.backward()

            if  hparams['fp16_run']:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), max_norm=1.0)
                is_overflow = math.isnan(grad_norm)
                if not is_overflow:
                        scaler.step(optimizer)
                        scaler.update()
            else:
                grad_norm = torch.nn.utils.clip_grad_norm_(
                    model.parameters(), max_norm=1)
                optimizer.step()

            if not is_overflow and rank == 0:
                duration = time.perf_counter() - start
                logger.log_training(
                    reduced_loss, grad_norm, learning_rate, duration, iteration)
                print("Batch {} loss {:.6f} Grad Norm {:.6f} Time {:.6f}".format(iteration, reduced_loss, grad_norm, duration), end='\r', flush=True)

            iteration += 1
        validate(model, criterion, valset, iteration,
                 hparams['batch_size'], n_gpus, collate_fn, logger,
                 hparams['distributed_run'], rank, epoch, start_eposh, learning_rate)
        save_checkpoint(model, optimizer, learning_rate, iteration, checkpoint_path)
        if log_directory2 is not None:
            copytree(log_directory, log_directory2)



## Training process   

### Hparams

In [7]:
hparams = {
    'in_audio_path':'C:\\Users\\derec\\OneDrive\\Documents\\F-VOICE\\audio_test',
    'out_audio_path':'C:\\Users\\derec\\OneDrive\\Documents\\F-VOICE',
    'whisper_model':'base',
    'whipser_language':'english',
    'out_audio_mels_path':'C:\\Users\\derec\\OneDrive\\Documents\\F-VOICE\\mels',
    'ouputh_checkpoint_path':'C:\\Users\\derec\\OneDrive\\Documents\\F-VOICE\\checkpoints',
    'log_directory_1':'C:\\Users\\derec\\OneDrive\\Documents\\F-VOICE\\logs1',
    'log_directory_2':'C:\\Users\\derec\\OneDrive\\Documents\\F-VOICE\\logs2',
    ################################
    # Data Parameters              #
    ################################
    'training_files':'C:\\Users\\derec\\OneDrive\\Documents\\F-VOICE\\wavs_text.txt',
    'validation_files':'C:\\Users\\derec\\OneDrive\\Documents\\F-VOICE\\wavs_text.txt',
    'n_gpus':1,
    'rank':0,
    'group_name':None,
    'text_cleaners': ['english_cleaners'],
    ################################
    # Audio Parameters             #
    ################################
    'max_wav_value': 32768.0,
    'sampling_rate': 22050,
    'load_mel_from_disk': False,
    'filter_length': 1024,
    'hop_length': 256,
    'win_length': 1024,
    'n_mel_channels': 80,
    'mel_fmin': 0.0,
    'mel_fmax': 8000.0,
    'seed': 20,
     ################################
     # Optimization Hyperparameters #
    ################################
    'use_saved_learning_rate':False,
    'learning_rate':1e-3,
    'weight_decay':1e-6,
    'grad_clip_thresh':1.0,
    'batch_size':64,
    'mask_padding':True,# set model's padded outputs to padded values
    ################################
    # Experiment Parameters        #
    ################################
    'epochs':500,
    'iters_per_checkpoint':1000,
    'dynamic_loss_scaling':True,
    'fp16_run':False,
    'distributed_run':False,
    'dist_backend':"gloo",
    'dist_url':"tcp://localhost:54321",
    'cudnn_enabled':True,
    'cudnn_benchmark':True,
    'ignore_layers':['embedding.weight'],
    'decay_start': 15000,
    'min_learning_rate': 1e-5
}


#### Step 1 create Text from audio  

In [None]:
vtt.voice_to_text(hparams['in_audio_path'],hparams['out_audio_path'],language="english") #Here creates the audio files to a text file

#### Step 2 Audio To mels   

In [None]:
mc.mel_creator(hparams['in_audio_path'],hparams['out_audio_mels_path']) #Here creates the audio files to Mel tensor's

In [None]:
checkpoint_path = hparams['ouputh_checkpoint_path']+(r'/')+"Model 0 F-VOICE"