# Glow-TTS Training 02

## Google Drive

In [2]:
# mount google drive for data storage
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## Install Coqui TTS

In [2]:
# upgrade pip, install tts, and upgrade tensorflow
! pip install -U pip
! pip install TTS
! pip install -U tensorflow

Collecting pip
  Downloading pip-24.1-py3-none-any.whl (1.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.8/1.8 MB[0m [31m5.6 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: pip
  Attempting uninstall: pip
    Found existing installation: pip 23.1.2
    Uninstalling pip-23.1.2:
      Successfully uninstalled pip-23.1.2
Successfully installed pip-24.1
Collecting TTS
  Downloading TTS-0.22.0-cp310-cp310-manylinux1_x86_64.whl.metadata (21 kB)
Collecting scikit-learn>=1.3.0 (from TTS)
  Downloading scikit_learn-1.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (11 kB)
Collecting anyascii>=0.3.0 (from TTS)
  Downloading anyascii-0.3.2-py3-none-any.whl.metadata (1.5 kB)
Collecting pysbd>=0.3.4 (from TTS)
  Downloading pysbd-0.3.4-py3-none-any.whl.metadata (6.1 kB)
Collecting umap-learn>=0.5.1 (from TTS)
  Downloading umap_learn-0.5.6-py3-none-any.whl.metadata (21 kB)
Collecting pandas<2.0,>=1.4 (from TTS)
  Downloading pand

Collecting tensorflow
  Downloading tensorflow-2.16.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (4.3 kB)
Collecting h5py>=3.10.0 (from tensorflow)
  Downloading h5py-3.11.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.5 kB)
Collecting ml-dtypes~=0.3.1 (from tensorflow)
  Downloading ml_dtypes-0.3.2-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (20 kB)
Collecting tensorboard<2.17,>=2.16 (from tensorflow)
  Downloading tensorboard-2.16.2-py3-none-any.whl.metadata (1.6 kB)
Collecting keras>=3.0.0 (from tensorflow)
  Downloading keras-3.3.3-py3-none-any.whl.metadata (5.7 kB)
Collecting numpy<2.0.0,>=1.23.5 (from tensorflow)
  Downloading numpy-1.26.4-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (61 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m61.0/61.0 kB[0m [31m2.7 MB/s[0m eta [36m0:00:00[0m
Collecting namex (from keras>=3.0.0->tensorflow)
  Downloading namex-0.0.8-

## Imports

In [2]:
# import the necessary libraries
import os
import glob

from trainer import Trainer, TrainerArgs

from TTS.utils.audio import AudioProcessor
from TTS.tts.models.glow_tts import GlowTTS
from TTS.tts.datasets import load_tts_samples
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.tts.configs.glow_tts_config import GlowTTSConfig
from TTS.tts.configs.shared_configs import BaseDatasetConfig

## Load Dataset

In [3]:
# set output path under google drive directory
output_path = "/content/drive/MyDrive/ljspeech-002/tts_train_dir"
if not os.path.exists(output_path):
    os.makedirs(output_path)

# set the dataset configuration
# ljspeech dataset was chosen, but only one speaker (002) will be used
dataset_config = BaseDatasetConfig(
    formatter="ljspeech", meta_file_train="/content/drive/MyDrive/ljspeech-002/metadata.csv", path="/content/drive/MyDrive/ljspeech-002"
)

## Config

In [4]:
# model architecture and configuration
# 250 epochs will be trained with 32 training batch size
# checkpoints every 200 steps, meaning around every 19 epochs,
# since 337 training samples / 32 batch size = 10.5 steps per epoch
config = GlowTTSConfig(
    batch_size=32,
    eval_batch_size=16,
    num_loader_workers=4,
    num_eval_loader_workers=4,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=250,
    text_cleaner="phoneme_cleaners",
    use_phonemes=True,
    phoneme_language="en-us",
    phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
    print_step=25,
    print_eval=False,
    mixed_precision=True,
    output_path=output_path,
    datasets=[dataset_config],
    save_step=200,
)

## Audio Processor

In [5]:
# audio processor for feature extraction and audio input/output
ap = AudioProcessor.init_from_config(config)

 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:True
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:20.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:45
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:10
 | > hop_length:256
 | > win_length:1024


## Tokenizer

In [6]:
# tokenizer for converting text to sequences of token ids
tokenizer, config = TTSTokenizer.init_from_config(config)

## Load Data Samples

In [7]:
# load the training and evaluation samples from the dataset
# each sample is a list of [text, audio_file_path, speaker_name]
train_samples, eval_samples = load_tts_samples(
    dataset_config,
    eval_split=True,
    eval_split_max_size=config.eval_split_max_size,
    eval_split_size=config.eval_split_size,
)

 | > Found 337 files in /content/drive/MyDrive/ljspeech-002


## Model Initialization

In [8]:
# model initialization with configuration, audio processor, tokenizer, and no speaker manager
model = GlowTTS(config, ap, tokenizer, speaker_manager=None)

# trainer generic api for training the model
# here, we also restore the model from the last checkpoint and continue training,
# because it was trained for 2400+ steps before the runtime was disconnected
trainer = Trainer(
    TrainerArgs(restore_path=output_path+'/run-June-19-2024_05+47PM-0000000/checkpoint_2400.pth'),
    config,
    output_path,
    model=model,
    train_samples=train_samples,
    eval_samples=eval_samples
)

 > Training Environment:
 | > Backend: Torch
 | > Mixed precision: True
 | > Precision: fp16
 | > Num. of CPUs: 2
 | > Num. of Torch Threads: 1
 | > Torch seed: 54321
 | > Torch CUDNN: True
 | > Torch CUDNN deterministic: False
 | > Torch CUDNN benchmark: False
 | > Torch TF32 MatMul: False
 > Start Tensorboard: tensorboard --logdir=/content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_01+45PM-0000000
 > Restoring from checkpoint_2400.pth ...
 > Restoring Model...
 > Restoring Optimizer...
 > Model restored from step 2400

 > Model has 28610257 parameters


## Training

In [None]:
# start the training process
trainer.fit()


[4m[1m > EPOCH: 0/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000




> DataLoader initialization
| > Tokenizer:
	| > add_blank: False
	| > use_eos_bos: False
	| > use_phonemes: True
	| > phonemizer:
		| > phoneme language: en-us
		| > phoneme backend: gruut
| > Number of instances : 334



[1m > TRAINING (2024-06-21 12:12:32) [0m


 | > Preprocessing samples
 | > Max text length: 179
 | > Min text length: 32
 | > Avg text length: 107.95808383233533
 | 
 | > Max audio length: 222643.0
 | > Min audio length: 33971.0
 | > Avg audio length: 146823.3113772455
 | > Num. instances discarded samples: 0
 | > Batch group size: 0.


  self.pid = os.fork()

[1m > EVALUATION [0m





> DataLoader initialization
| > Tokenizer:
	| > add_blank: False
	| > use_eos_bos: False
	| > use_phonemes: True
	| > phonemizer:
		| > phoneme language: en-us
		| > phoneme backend: gruut
| > Number of instances : 3
 | > Preprocessing samples
 | > Max text length: 148
 | > Min text length: 85
 | > Avg text length: 125.66666666666667
 | 
 | > Max audio length: 208051.0
 | > Min audio length: 103603.0
 | > Avg audio length: 168029.66666666666
 | > Num. instances discarded samples: 0
 | > Batch group size: 0.


  return F.conv1d(input, weight, bias, self.stride,


 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 2.6919105052948 [0m(+0)
     | > avg_loss: 0.1961331069469452 [0m(+0)
     | > avg_log_mle: -0.11461901664733887 [0m(+0)
     | > avg_loss_dur: 0.31075212359428406 [0m(+0)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2412.pth

[4m[1m > EPOCH: 1/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:14:11) [0m
  self.pid = os.fork()

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5195260047912598 [0m(-2.17238450050354)
     | > avg_loss:[92m 0.1761244237422943 [0m(-0.02000868320465088)
     | > avg_log_mle:[92m -0.11624085903167725 [0m(-0.001621842384338379)
     | > avg_loss_dur:[92m 0.29236528277397156 [0m(-0.0183868408203125)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2423.pth

[4m[1m > EPOCH: 2/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:14:50) [0m

[1m   --> TIME: 2024-06-21 12:15:05 -- STEP: 2/11 -- GLOBAL_STEP: 2425[0m
     | > loss: 0.14706763625144958  (0.13680413365364075)
     | > log_mle: -0.1611539125442505  (-0.15940940380096436)
     | > loss_dur: 0.3082215487957001  (0.2962135374546051)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.7353, device='cuda:0')  (tensor(0.7434, device='cuda:0'))
     | > 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 1.2559545040130615 [0m(+0.7364284992218018)
     | > avg_loss:[91m 0.1828700304031372 [0m(+0.0067456066608428955)
     | > avg_log_mle:[92m -0.11753380298614502 [0m(-0.0012929439544677734)
     | > avg_loss_dur:[91m 0.3004038333892822 [0m(+0.008038550615310669)


[4m[1m > EPOCH: 3/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:15:30) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.551311731338501 [0m(-0.7046427726745605)
     | > avg_loss:[91m 0.18820402026176453 [0m(+0.005333989858627319)
     | > avg_log_mle:[92m -0.11852538585662842 [0m(-0.0009915828704833984)
     | > avg_loss_dur:[91m 0.30672940611839294 [0m(+0.006325572729110718)


[4m[1m > EPOCH: 4/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:16:05) [0m

[1m   --> TIME: 2024-06-21 12:16:21 -- STEP: 5/11 -- GLOBAL_STEP: 2450[0m
     | > loss: 0.18080687522888184  (0.15272245407104493)
     | > log_mle: -0.14653337001800537  (-0.15260822772979737)
     | > loss_dur: 0.3273402452468872  (0.3053306818008423)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.5357, device='cuda:0')  (tensor(0.5732, device='cuda:0'))
     | > current_lr: 1e-06 
     | > step_time: 1.1974  (2.357244873046875)
     | > loader_time: 0.0224  (0.039552259445

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.5545444488525391 [0m(+0.003232717514038086)
     | > avg_loss:[92m 0.18127459287643433 [0m(-0.0069294273853302)
     | > avg_log_mle:[92m -0.11919271945953369 [0m(-0.0006673336029052734)
     | > avg_loss_dur:[92m 0.300467312335968 [0m(-0.006262093782424927)


[4m[1m > EPOCH: 5/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:16:39) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.54925537109375 [0m(-0.0052890777587890625)
     | > avg_loss:[91m 0.18193325400352478 [0m(+0.0006586611270904541)
     | > avg_log_mle:[91m -0.11915743350982666 [0m(+3.528594970703125e-05)
     | > avg_loss_dur:[91m 0.30109068751335144 [0m(+0.0006233751773834229)


[4m[1m > EPOCH: 6/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:17:12) [0m

[1m   --> TIME: 2024-06-21 12:17:35 -- STEP: 8/11 -- GLOBAL_STEP: 2475[0m
     | > loss: 0.19276639819145203  (0.1658630073070526)
     | > log_mle: -0.15391266345977783  (-0.15204587578773499)
     | > loss_dur: 0.34667906165122986  (0.3179088830947876)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.0321, device='cuda:0')  (tensor(0.6242, device='cuda:0'))
     | > current_lr: 1.5e-06 
     | > step_time: 0.7252  (1.7648503482341766)
     | > loader_time: 0.0079  (0.02746

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.5760371685028076 [0m(+0.026781797409057617)
     | > avg_loss:[92m 0.18192172050476074 [0m(-1.1533498764038086e-05)
     | > avg_log_mle:[91m -0.11911225318908691 [0m(+4.5180320739746094e-05)
     | > avg_loss_dur:[92m 0.30103397369384766 [0m(-5.671381950378418e-05)


[4m[1m > EPOCH: 7/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:17:51) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5602304935455322 [0m(-0.01580667495727539)
     | > avg_loss:[91m 0.1823868751525879 [0m(+0.00046515464782714844)
     | > avg_log_mle:[92m -0.11918103694915771 [0m(-6.878376007080078e-05)
     | > avg_loss_dur:[91m 0.3015679121017456 [0m(+0.0005339384078979492)


[4m[1m > EPOCH: 8/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:18:25) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.558659553527832 [0m(-0.0015709400177001953)
     | > avg_loss:[92m 0.18176355957984924 [0m(-0.0006233155727386475)
     | > avg_log_mle:[92m -0.11920821666717529 [0m(-2.7179718017578125e-05)
     | > avg_loss_dur:[92m 0.30097177624702454 [0m(-0.0005961358547210693)


[4m[1m > EPOCH: 9/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:18:57) [0m

[1m   --> TIME: 2024-06-21 12:19:01 -- STEP: 0/11 -- GLOBAL_STEP: 2500[0m
     | > loss: 0.07789146900177002  (0.07789146900177002)
     | > log_mle: -0.1699584722518921  (-0.1699584722518921)
     | > loss_dur: 0.2478499412536621  (0.2478499412536621)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.6871, device='cuda:0')  (tensor(0.6871, device='cuda:0'))
     | > current_lr: 2.25e-06 
     | > step_time: 1.5467  (1.5467231273651123)
     | > loader_time: 2.5303  (2.5302

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6014444828033447 [0m(+0.042784929275512695)
     | > avg_loss:[92m 0.17590966820716858 [0m(-0.005853891372680664)
     | > avg_log_mle:[92m -0.11932647228240967 [0m(-0.000118255615234375)
     | > avg_loss_dur:[92m 0.29523614048957825 [0m(-0.005735635757446289)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2511.pth

[4m[1m > EPOCH: 10/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:19:35) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5758326053619385 [0m(-0.02561187744140625)
     | > avg_loss:[91m 0.17651447653770447 [0m(+0.0006048083305358887)
     | > avg_log_mle:[91m -0.11931633949279785 [0m(+1.0132789611816406e-05)
     | > avg_loss_dur:[91m 0.2958308160305023 [0m(+0.0005946755409240723)


[4m[1m > EPOCH: 11/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:20:12) [0m

[1m   --> TIME: 2024-06-21 12:20:26 -- STEP: 3/11 -- GLOBAL_STEP: 2525[0m
     | > loss: 0.14482605457305908  (0.13968878984451294)
     | > log_mle: -0.14788389205932617  (-0.1559940973917643)
     | > loss_dur: 0.29270994663238525  (0.2956828872362773)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.1650, device='cuda:0')  (tensor(0.9243, device='cuda:0'))
     | > current_lr: 2.75e-06 
     | > step_time: 2.7049  (2.8809019724527993)
     | > loader_time: 0.0567  (0.040

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.585282564163208 [0m(+0.009449958801269531)
     | > avg_loss:[91m 0.17678967118263245 [0m(+0.0002751946449279785)
     | > avg_log_mle:[92m -0.11941039562225342 [0m(-9.40561294555664e-05)
     | > avg_loss_dur:[91m 0.29620006680488586 [0m(+0.0003692507743835449)


[4m[1m > EPOCH: 12/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:20:49) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6069920063018799 [0m(+0.021709442138671875)
     | > avg_loss:[92m 0.17542937397956848 [0m(-0.0013602972030639648)
     | > avg_log_mle:[92m -0.11943626403808594 [0m(-2.586841583251953e-05)
     | > avg_loss_dur:[92m 0.2948656380176544 [0m(-0.0013344287872314453)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2544.pth

[4m[1m > EPOCH: 13/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:21:26) [0m

[1m   --> TIME: 2024-06-21 12:21:45 -- STEP: 6/11 -- GLOBAL_STEP: 2550[0m
     | > loss: 0.17748987674713135  (0.15564062694708505)
     | > log_mle: -0.14875197410583496  (-0.15243146816889444)
     | > loss_dur: 0.3262418508529663  (0.3080720951159795)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.5580, device='cuda:0')  (tensor(0.6288, device='cuda:0'

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5989959239959717 [0m(-0.007996082305908203)
     | > avg_loss:[91m 0.17647713422775269 [0m(+0.001047760248184204)
     | > avg_log_mle:[92m -0.11952710151672363 [0m(-9.083747863769531e-05)
     | > avg_loss_dur:[91m 0.2960042357444763 [0m(+0.0011385977268218994)


[4m[1m > EPOCH: 14/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:22:02) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5655233860015869 [0m(-0.033472537994384766)
     | > avg_loss:[92m 0.17548036575317383 [0m(-0.0009967684745788574)
     | > avg_log_mle:[92m -0.11955392360687256 [0m(-2.682209014892578e-05)
     | > avg_loss_dur:[92m 0.2950342893600464 [0m(-0.0009699463844299316)


[4m[1m > EPOCH: 15/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:22:36) [0m

[1m   --> TIME: 2024-06-21 12:22:58 -- STEP: 9/11 -- GLOBAL_STEP: 2575[0m
     | > loss: 0.18327438831329346  (0.16579846209949917)
     | > log_mle: -0.14632153511047363  (-0.1520196861690945)
     | > loss_dur: 0.3295959234237671  (0.31781814826859367)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.9008, device='cuda:0')  (tensor(0.8514, device='cuda:0'))
     | > current_lr: 3.7499999999999997e-06 
     | > step_time: 0.7882  (1.2025850613911946)
     | > loader_time: 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 1.5689778327941895 [0m(+1.0034544467926025)
     | > avg_loss:[92m 0.17407622933387756 [0m(-0.0014041364192962646)
     | > avg_log_mle:[92m -0.1198737621307373 [0m(-0.0003198385238647461)
     | > avg_loss_dur:[92m 0.29394999146461487 [0m(-0.0010842978954315186)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2577.pth

[4m[1m > EPOCH: 16/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:23:28) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.7588098049163818 [0m(-0.8101680278778076)
     | > avg_loss:[92m 0.17267978191375732 [0m(-0.0013964474201202393)
     | > avg_log_mle:[92m -0.11995577812194824 [0m(-8.20159912109375e-05)
     | > avg_loss_dur:[92m 0.29263556003570557 [0m(-0.0013144314289093018)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2588.pth

[4m[1m > EPOCH: 17/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:24:11) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5687947273254395 [0m(-0.19001507759094238)
     | > avg_loss:[92m 0.171473890542984 [0m(-0.0012058913707733154)
     | > avg_log_mle:[92m -0.12027299404144287 [0m(-0.0003172159194946289)
     | > avg_loss_dur:[92m 0.2917468845844269 [0m(-0.0008886754512786865)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2599.pth

[4m[1m > EPOCH: 18/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:24:54) [0m

[1m   --> TIME: 2024-06-21 12:24:59 -- STEP: 1/11 -- GLOBAL_STEP: 2600[0m
     | > loss: 0.1235416829586029  (0.1235416829586029)
     | > log_mle: -0.15907800197601318  (-0.15907800197601318)
     | > loss_dur: 0.2826196849346161  (0.2826196849346161)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.0844, device='cuda:0')  (tensor(1.0844, device='cuda:0'))
  

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 1.5835490226745605 [0m(+1.014754295349121)
     | > avg_loss:[91m 0.171816885471344 [0m(+0.00034299492835998535)
     | > avg_log_mle:[92m -0.12036705017089844 [0m(-9.40561294555664e-05)
     | > avg_loss_dur:[91m 0.29218393564224243 [0m(+0.00043705105781555176)


[4m[1m > EPOCH: 19/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:25:35) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.6018016338348389 [0m(-0.9817473888397217)
     | > avg_loss:[92m 0.16694530844688416 [0m(-0.004871577024459839)
     | > avg_log_mle:[92m -0.12066507339477539 [0m(-0.0002980232238769531)
     | > avg_loss_dur:[92m 0.28761038184165955 [0m(-0.004573553800582886)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2621.pth

[4m[1m > EPOCH: 20/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:26:14) [0m

[1m   --> TIME: 2024-06-21 12:26:29 -- STEP: 4/11 -- GLOBAL_STEP: 2625[0m
     | > loss: 0.1694798767566681  (0.14297743886709213)
     | > log_mle: -0.15152478218078613  (-0.1559045910835266)
     | > loss_dur: 0.3210046589374542  (0.29888202995061874)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.0325, device='cuda:0')  (tensor(1.3998, device='cuda:0'))
 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.8585488796234131 [0m(+0.2567472457885742)
     | > avg_loss:[91m 0.167738139629364 [0m(+0.0007928311824798584)
     | > avg_log_mle:[92m -0.12072980403900146 [0m(-6.473064422607422e-05)
     | > avg_loss_dur:[91m 0.2884679436683655 [0m(+0.0008575618267059326)


[4m[1m > EPOCH: 21/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:26:50) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5863780975341797 [0m(-0.2721707820892334)
     | > avg_loss:[91m 0.17357438802719116 [0m(+0.0058362483978271484)
     | > avg_log_mle:[91m -0.12067306041717529 [0m(+5.6743621826171875e-05)
     | > avg_loss_dur:[91m 0.29424744844436646 [0m(+0.0057795047760009766)


[4m[1m > EPOCH: 22/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:27:21) [0m

[1m   --> TIME: 2024-06-21 12:27:35 -- STEP: 7/11 -- GLOBAL_STEP: 2650[0m
     | > loss: 0.17587339878082275  (0.1557975709438324)
     | > log_mle: -0.15286004543304443  (-0.1535085780279977)
     | > loss_dur: 0.3287334442138672  (0.3093061489718301)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.9808, device='cuda:0')  (tensor(1.2589, device='cuda:0'))
     | > current_lr: 5.5e-06 
     | > step_time: 0.8374  (1.2723926135471888)
     | > loader_time: 0.0075  (0.017763

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5838649272918701 [0m(-0.0025131702423095703)
     | > avg_loss:[91m 0.17367014288902283 [0m(+9.575486183166504e-05)
     | > avg_log_mle:[92m -0.12076258659362793 [0m(-8.952617645263672e-05)
     | > avg_loss_dur:[91m 0.29443272948265076 [0m(+0.00018528103828430176)


[4m[1m > EPOCH: 23/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:27:53) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.599250078201294 [0m(+0.015385150909423828)
     | > avg_loss:[92m 0.17180365324020386 [0m(-0.0018664896488189697)
     | > avg_log_mle:[92m -0.12087225914001465 [0m(-0.00010967254638671875)
     | > avg_loss_dur:[92m 0.2926759123802185 [0m(-0.001756817102432251)


[4m[1m > EPOCH: 24/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:28:29) [0m

[1m   --> TIME: 2024-06-21 12:28:45 -- STEP: 10/11 -- GLOBAL_STEP: 2675[0m
     | > loss: 0.13428127765655518  (0.1608154684305191)
     | > log_mle: -0.1732633113861084  (-0.15543328523635863)
     | > loss_dur: 0.3075445890426636  (0.31624875366687777)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.6623, device='cuda:0')  (tensor(1.2505, device='cuda:0'))
     | > current_lr: 6e-06 
     | > step_time: 0.4942  (1.1066381692886353)
     | > loader_time: 0.0062  (0.0158175

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5846967697143555 [0m(-0.014553308486938477)
     | > avg_loss:[91m 0.17279785871505737 [0m(+0.0009942054748535156)
     | > avg_log_mle:[91m -0.12076115608215332 [0m(+0.00011110305786132812)
     | > avg_loss_dur:[91m 0.2935590147972107 [0m(+0.0008831024169921875)


[4m[1m > EPOCH: 25/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:29:01) [0m
  self.pid = os.fork()

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6150026321411133 [0m(+0.030305862426757812)
     | > avg_loss:[92m 0.17173343896865845 [0m(-0.0010644197463989258)
     | > avg_log_mle:[92m -0.1212085485458374 [0m(-0.00044739246368408203)
     | > avg_loss_dur:[92m 0.29294198751449585 [0m(-0.0006170272827148438)


[4m[1m > EPOCH: 26/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:29:33) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5886335372924805 [0m(-0.026369094848632812)
     | > avg_loss:[91m 0.172525554895401 [0m(+0.0007921159267425537)
     | > avg_log_mle:[92m -0.12146377563476562 [0m(-0.00025522708892822266)
     | > avg_loss_dur:[91m 0.2939893305301666 [0m(+0.0010473430156707764)


[4m[1m > EPOCH: 27/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:30:04) [0m

[1m   --> TIME: 2024-06-21 12:30:11 -- STEP: 2/11 -- GLOBAL_STEP: 2700[0m
     | > loss: 0.149117112159729  (0.1382044404745102)
     | > log_mle: -0.16467297077178955  (-0.16285520792007446)
     | > loss_dur: 0.31379008293151855  (0.30105964839458466)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.1650, device='cuda:0')  (tensor(1.0651, device='cuda:0'))
     | > current_lr: 6.75e-06 
     | > step_time: 1.858  (1.5029699802398682)
     | > loader_time: 0.0162  (0.011566

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5863897800445557 [0m(-0.0022437572479248047)
     | > avg_loss:[92m 0.1718067228794098 [0m(-0.0007188320159912109)
     | > avg_log_mle:[92m -0.12153196334838867 [0m(-6.818771362304688e-05)
     | > avg_loss_dur:[92m 0.29333868622779846 [0m(-0.0006506443023681641)


[4m[1m > EPOCH: 28/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:30:35) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.5962893962860107 [0m(+0.009899616241455078)
     | > avg_loss:[92m 0.1717754602432251 [0m(-3.126263618469238e-05)
     | > avg_log_mle:[92m -0.12181103229522705 [0m(-0.0002790689468383789)
     | > avg_loss_dur:[91m 0.29358649253845215 [0m(+0.0002478063106536865)


[4m[1m > EPOCH: 29/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:31:06) [0m

[1m   --> TIME: 2024-06-21 12:31:18 -- STEP: 5/11 -- GLOBAL_STEP: 2725[0m
     | > loss: 0.16567206382751465  (0.1458720088005066)
     | > log_mle: -0.1496291160583496  (-0.15583863258361816)
     | > loss_dur: 0.31530117988586426  (0.30171064138412473)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.9568, device='cuda:0')  (tensor(1.0591, device='cuda:0'))
     | > current_lr: 7.25e-06 
     | > step_time: 0.79  (1.5413455486297607)
     | > loader_time: 0.0072  (0.02089

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6029543876647949 [0m(+0.00666499137878418)
     | > avg_loss:[92m 0.17140227556228638 [0m(-0.0003731846809387207)
     | > avg_log_mle:[91m -0.12174010276794434 [0m(+7.092952728271484e-05)
     | > avg_loss_dur:[92m 0.2931423783302307 [0m(-0.00044411420822143555)


[4m[1m > EPOCH: 30/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:31:40) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5985190868377686 [0m(-0.004435300827026367)
     | > avg_loss:[92m 0.1709003746509552 [0m(-0.0005019009113311768)
     | > avg_log_mle:[92m -0.12207949161529541 [0m(-0.0003393888473510742)
     | > avg_loss_dur:[92m 0.2929798662662506 [0m(-0.00016251206398010254)


[4m[1m > EPOCH: 31/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:32:12) [0m

[1m   --> TIME: 2024-06-21 12:32:27 -- STEP: 8/11 -- GLOBAL_STEP: 2750[0m
     | > loss: 0.18160510063171387  (0.1572103574872017)
     | > log_mle: -0.15751898288726807  (-0.15566052496433258)
     | > loss_dur: 0.33912408351898193  (0.31287088245153427)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.0228, device='cuda:0')  (tensor(0.9861, device='cuda:0'))
     | > current_lr: 7.75e-06 
     | > step_time: 0.7884  (1.3169847428798676)
     | > loader_time: 0.0088  (0.01

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6095497608184814 [0m(+0.01103067398071289)
     | > avg_loss:[92m 0.17034456133842468 [0m(-0.0005558133125305176)
     | > avg_log_mle:[92m -0.12262403964996338 [0m(-0.0005445480346679688)
     | > avg_loss_dur:[92m 0.29296860098838806 [0m(-1.1265277862548828e-05)


[4m[1m > EPOCH: 32/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:32:44) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5975315570831299 [0m(-0.012018203735351562)
     | > avg_loss:[92m 0.1699312925338745 [0m(-0.0004132688045501709)
     | > avg_log_mle:[92m -0.12284219264984131 [0m(-0.0002181529998779297)
     | > avg_loss_dur:[92m 0.2927734851837158 [0m(-0.0001951158046722412)


[4m[1m > EPOCH: 33/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:33:14) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5715744495391846 [0m(-0.025957107543945312)
     | > avg_loss:[91m 0.17008858919143677 [0m(+0.00015729665756225586)
     | > avg_log_mle:[92m -0.12310516834259033 [0m(-0.00026297569274902344)
     | > avg_loss_dur:[91m 0.2931937575340271 [0m(+0.0004202723503112793)


[4m[1m > EPOCH: 34/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:33:45) [0m

[1m   --> TIME: 2024-06-21 12:33:49 -- STEP: 0/11 -- GLOBAL_STEP: 2775[0m
     | > loss: 0.06086397171020508  (0.06086397171020508)
     | > log_mle: -0.17466247081756592  (-0.17466247081756592)
     | > loss_dur: 0.235526442527771  (0.235526442527771)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.4376, device='cuda:0')  (tensor(1.4376, device='cuda:0'))
     | > current_lr: 8.5e-06 
     | > step_time: 1.6946  (1.6946032047271729)
     | > loader_time: 2.814  (2.81402

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5704119205474854 [0m(-0.0011625289916992188)
     | > avg_loss:[91m 0.1735670566558838 [0m(+0.0034784674644470215)
     | > avg_log_mle:[92m -0.12336695194244385 [0m(-0.0002617835998535156)
     | > avg_loss_dur:[91m 0.29693400859832764 [0m(+0.003740251064300537)


[4m[1m > EPOCH: 35/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:34:16) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6102068424224854 [0m(+0.039794921875)
     | > avg_loss:[92m 0.16845375299453735 [0m(-0.0051133036613464355)
     | > avg_log_mle:[91m -0.12330853939056396 [0m(+5.841255187988281e-05)
     | > avg_loss_dur:[92m 0.2917622923851013 [0m(-0.005171716213226318)


[4m[1m > EPOCH: 36/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:34:46) [0m

[1m   --> TIME: 2024-06-21 12:34:55 -- STEP: 3/11 -- GLOBAL_STEP: 2800[0m
     | > loss: 0.13212835788726807  (0.12688310941060385)
     | > log_mle: -0.15248560905456543  (-0.16029155254364014)
     | > loss_dur: 0.2846139669418335  (0.28717466195424396)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.4760, device='cuda:0')  (tensor(1.3395, device='cuda:0'))
     | > current_lr: 9e-06 
     | > step_time: 0.9633  (1.3981742858886719)
     | > loader_time: 0.0392  (0.026866912841

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.8953144550323486 [0m(+0.2851076126098633)
     | > avg_loss:[92m 0.166486918926239 [0m(-0.00196683406829834)
     | > avg_log_mle:[92m -0.12386035919189453 [0m(-0.0005518198013305664)
     | > avg_loss_dur:[92m 0.29034727811813354 [0m(-0.0014150142669677734)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2808.pth

[4m[1m > EPOCH: 37/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:35:38) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 1.3819870948791504 [0m(+0.48667263984680176)
     | > avg_loss:[91m 0.1727493703365326 [0m(+0.006262451410293579)
     | > avg_log_mle:[92m -0.12400054931640625 [0m(-0.00014019012451171875)
     | > avg_loss_dur:[91m 0.29674991965293884 [0m(+0.006402641534805298)


[4m[1m > EPOCH: 38/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:36:19) [0m

[1m   --> TIME: 2024-06-21 12:36:33 -- STEP: 6/11 -- GLOBAL_STEP: 2825[0m
     | > loss: 0.16390341520309448  (0.14655438562234244)
     | > log_mle: -0.1527320146560669  (-0.15697932243347168)
     | > loss_dur: 0.3166354298591614  (0.3035337080558141)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.2207, device='cuda:0')  (tensor(1.2090, device='cuda:0'))
     | > current_lr: 9.499999999999999e-06 
     | > step_time: 0.7615  (1.5381879011789958)
     | > loader_time: 0.00

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5984456539154053 [0m(-0.7835414409637451)
     | > avg_loss:[92m 0.17202001810073853 [0m(-0.0007293522357940674)
     | > avg_log_mle:[92m -0.1243445873260498 [0m(-0.0003440380096435547)
     | > avg_loss_dur:[92m 0.29636460542678833 [0m(-0.0003853142261505127)


[4m[1m > EPOCH: 39/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:36:51) [0m

[1m > EVALUATION [0m

  return F.conv1d(input, weight, bias, self.stride,


 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5950145721435547 [0m(-0.003431081771850586)
     | > avg_loss:[92m 0.16953271627426147 [0m(-0.0024873018264770508)
     | > avg_log_mle:[91m -0.12423050403594971 [0m(+0.00011408329010009766)
     | > avg_loss_dur:[92m 0.2937632203102112 [0m(-0.0026013851165771484)


[4m[1m > EPOCH: 40/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:37:21) [0m

[1m   --> TIME: 2024-06-21 12:37:35 -- STEP: 9/11 -- GLOBAL_STEP: 2850[0m
     | > loss: 0.1733393669128418  (0.15496810608439976)
     | > log_mle: -0.15116655826568604  (-0.15702560212877062)
     | > loss_dur: 0.32450592517852783  (0.31199370821317035)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.0893, device='cuda:0')  (tensor(1.3897, device='cuda:0'))
     | > current_lr: 9.999999999999999e-06 
     | > step_time: 0.9065  (1.1051199436187744)
     | > loader_time:

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5880303382873535 [0m(-0.006984233856201172)
     | > avg_loss:[92m 0.16406014561653137 [0m(-0.0054725706577301025)
     | > avg_log_mle:[92m -0.12461841106414795 [0m(-0.0003879070281982422)
     | > avg_loss_dur:[92m 0.2886785566806793 [0m(-0.00508466362953186)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2852.pth

[4m[1m > EPOCH: 41/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:38:00) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5873029232025146 [0m(-0.0007274150848388672)
     | > avg_loss:[91m 0.16845780611038208 [0m(+0.004397660493850708)
     | > avg_log_mle:[92m -0.12481105327606201 [0m(-0.0001926422119140625)
     | > avg_loss_dur:[91m 0.2932688593864441 [0m(+0.0045903027057647705)


[4m[1m > EPOCH: 42/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:38:38) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6046364307403564 [0m(+0.017333507537841797)
     | > avg_loss:[92m 0.16627493500709534 [0m(-0.002182871103286743)
     | > avg_log_mle:[92m -0.12519359588623047 [0m(-0.00038254261016845703)
     | > avg_loss_dur:[92m 0.2914685308933258 [0m(-0.0018003284931182861)


[4m[1m > EPOCH: 43/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:39:12) [0m

[1m   --> TIME: 2024-06-21 12:39:21 -- STEP: 1/11 -- GLOBAL_STEP: 2875[0m
     | > loss: 0.12497055530548096  (0.12497055530548096)
     | > log_mle: -0.16495180130004883  (-0.16495180130004883)
     | > loss_dur: 0.2899223566055298  (0.2899223566055298)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.8837, device='cuda:0')  (tensor(0.8837, device='cuda:0'))
     | > current_lr: 1.075e-05 
     | > step_time: 2.5622  (2.5622148513793945)
     | > loader_time: 0.0501  (0.05

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5839526653289795 [0m(-0.020683765411376953)
     | > avg_loss:[92m 0.15328150987625122 [0m(-0.012993425130844116)
     | > avg_log_mle:[92m -0.1252816915512085 [0m(-8.809566497802734e-05)
     | > avg_loss_dur:[92m 0.2785632014274597 [0m(-0.012905329465866089)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_2885.pth

[4m[1m > EPOCH: 44/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:39:56) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6788909435272217 [0m(+0.09493827819824219)
     | > avg_loss:[91m 0.16550955176353455 [0m(+0.012228041887283325)
     | > avg_log_mle:[91m -0.12524652481079102 [0m(+3.516674041748047e-05)
     | > avg_loss_dur:[91m 0.29075607657432556 [0m(+0.012192875146865845)


[4m[1m > EPOCH: 45/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:40:33) [0m

[1m   --> TIME: 2024-06-21 12:40:49 -- STEP: 4/11 -- GLOBAL_STEP: 2900[0m
     | > loss: 0.15517306327819824  (0.13058659434318542)
     | > log_mle: -0.15714514255523682  (-0.1615830957889557)
     | > loss_dur: 0.31231820583343506  (0.2921696901321411)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.9945, device='cuda:0')  (tensor(1.0951, device='cuda:0'))
     | > current_lr: 1.125e-05 
     | > step_time: 2.422  (2.633490800857544)
     | > loader_time: 0.0509  (0.037681

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5901527404785156 [0m(-0.08873820304870605)
     | > avg_loss:[92m 0.1570512056350708 [0m(-0.008458346128463745)
     | > avg_log_mle:[92m -0.12554025650024414 [0m(-0.000293731689453125)
     | > avg_loss_dur:[92m 0.28259146213531494 [0m(-0.00816461443901062)


[4m[1m > EPOCH: 46/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:41:08) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5847156047821045 [0m(-0.005437135696411133)
     | > avg_loss:[91m 0.1640763282775879 [0m(+0.00702512264251709)
     | > avg_log_mle:[92m -0.1261235475540161 [0m(-0.0005832910537719727)
     | > avg_loss_dur:[91m 0.290199875831604 [0m(+0.0076084136962890625)


[4m[1m > EPOCH: 47/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:41:40) [0m

[1m   --> TIME: 2024-06-21 12:41:59 -- STEP: 7/11 -- GLOBAL_STEP: 2925[0m
     | > loss: 0.17001047730445862  (0.14375399265970504)
     | > log_mle: -0.1585557460784912  (-0.15941153253827775)
     | > loss_dur: 0.32856622338294983  (0.3031655251979828)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.2970, device='cuda:0')  (tensor(1.4922, device='cuda:0'))
     | > current_lr: 1.1750000000000001e-05 
     | > step_time: 0.8095  (1.6973294530596053)
     | > loader_time: 0.007

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.5909719467163086 [0m(+0.0062563419342041016)
     | > avg_loss:[91m 0.16652336716651917 [0m(+0.0024470388889312744)
     | > avg_log_mle:[92m -0.1266242265701294 [0m(-0.0005006790161132812)
     | > avg_loss_dur:[91m 0.29314759373664856 [0m(+0.0029477179050445557)


[4m[1m > EPOCH: 48/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:42:19) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.5919654369354248 [0m(+0.000993490219116211)
     | > avg_loss:[92m 0.15656760334968567 [0m(-0.009955763816833496)
     | > avg_log_mle:[92m -0.12671566009521484 [0m(-9.143352508544922e-05)
     | > avg_loss_dur:[92m 0.2832832634449005 [0m(-0.009864330291748047)


[4m[1m > EPOCH: 49/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:42:51) [0m

[1m   --> TIME: 2024-06-21 12:43:05 -- STEP: 10/11 -- GLOBAL_STEP: 2950[0m
     | > loss: 0.13460469245910645  (0.1475994884967804)
     | > log_mle: -0.17974352836608887  (-0.16169605255126954)
     | > loss_dur: 0.3143482208251953  (0.3092955410480499)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(2.8828, device='cuda:0')  (tensor(2.0470, device='cuda:0'))
     | > current_lr: 1.225e-05 
     | > step_time: 0.5407  (1.023856782913208)
     | > loader_time: 0.0055  (0.01472

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.5996286869049072 [0m(+0.007663249969482422)
     | > avg_loss:[92m 0.15513578057289124 [0m(-0.0014318227767944336)
     | > avg_log_mle:[92m -0.12682664394378662 [0m(-0.00011098384857177734)
     | > avg_loss_dur:[92m 0.28196242451667786 [0m(-0.0013208389282226562)


[4m[1m > EPOCH: 50/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:43:21) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 1.448659896850586 [0m(+0.8490312099456787)
     | > avg_loss:[92m 0.1539362668991089 [0m(-0.0011995136737823486)
     | > avg_log_mle:[92m -0.12759041786193848 [0m(-0.0007637739181518555)
     | > avg_loss_dur:[92m 0.28152668476104736 [0m(-0.00043573975563049316)


[4m[1m > EPOCH: 51/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:43:57) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5972075462341309 [0m(-0.8514523506164551)
     | > avg_loss:[91m 0.1605234444141388 [0m(+0.006587177515029907)
     | > avg_log_mle:[91m -0.12749290466308594 [0m(+9.751319885253906e-05)
     | > avg_loss_dur:[91m 0.28801634907722473 [0m(+0.006489664316177368)


[4m[1m > EPOCH: 52/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:44:30) [0m

[1m   --> TIME: 2024-06-21 12:44:38 -- STEP: 2/11 -- GLOBAL_STEP: 2975[0m
     | > loss: 0.1303912103176117  (0.12231773138046265)
     | > log_mle: -0.17087531089782715  (-0.16927069425582886)
     | > loss_dur: 0.30126652121543884  (0.2915884256362915)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.4004, device='cuda:0')  (tensor(1.4500, device='cuda:0'))
     | > current_lr: 1.3e-05 
     | > step_time: 1.7658  (1.704005241394043)
     | > loader_time: 0.0184  (0.021151304

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6091225147247314 [0m(+0.011914968490600586)
     | > avg_loss:[91m 0.16153603792190552 [0m(+0.0010125935077667236)
     | > avg_log_mle:[92m -0.12815332412719727 [0m(-0.0006604194641113281)
     | > avg_loss_dur:[91m 0.2896893620491028 [0m(+0.0016730129718780518)


[4m[1m > EPOCH: 53/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:45:02) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.6088366508483887 [0m(-0.00028586387634277344)
     | > avg_loss:[92m 0.15584811568260193 [0m(-0.005687922239303589)
     | > avg_log_mle:[92m -0.12833261489868164 [0m(-0.000179290771484375)
     | > avg_loss_dur:[92m 0.28418073058128357 [0m(-0.005508631467819214)


[4m[1m > EPOCH: 54/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:45:34) [0m

[1m   --> TIME: 2024-06-21 12:45:46 -- STEP: 5/11 -- GLOBAL_STEP: 3000[0m
     | > loss: 0.15700796246528625  (0.13343065977096558)
     | > log_mle: -0.1568235158920288  (-0.16321072578430176)
     | > loss_dur: 0.31383147835731506  (0.2966413855552673)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.9887, device='cuda:0')  (tensor(1.8205, device='cuda:0'))
     | > current_lr: 1.35e-05 
     | > step_time: 0.9609  (1.6200244903564454)
     | > loader_time: 0.0258  (0.022

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 1.2179200649261475 [0m(+0.6090834140777588)
     | > avg_loss:[91m 0.16281500458717346 [0m(+0.006966888904571533)
     | > avg_log_mle:[92m -0.12905192375183105 [0m(-0.0007193088531494141)
     | > avg_loss_dur:[91m 0.2918669283390045 [0m(+0.007686197757720947)


[4m[1m > EPOCH: 55/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:46:18) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.7871158123016357 [0m(-0.4308042526245117)
     | > avg_loss:[92m 0.1609724462032318 [0m(-0.0018425583839416504)
     | > avg_log_mle:[92m -0.12911152839660645 [0m(-5.9604644775390625e-05)
     | > avg_loss_dur:[92m 0.29008397459983826 [0m(-0.0017829537391662598)


[4m[1m > EPOCH: 56/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:46:53) [0m

[1m   --> TIME: 2024-06-21 12:47:12 -- STEP: 8/11 -- GLOBAL_STEP: 3025[0m
     | > loss: 0.170511394739151  (0.13863785937428474)
     | > log_mle: -0.16486620903015137  (-0.16308285295963287)
     | > loss_dur: 0.33537760376930237  (0.3017207123339176)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(2.3576, device='cuda:0')  (tensor(2.3440, device='cuda:0'))
     | > current_lr: 1.4e-05 
     | > step_time: 0.6955  (1.3215722441673279)
     | > loader_time: 0.0073  (0.023901

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.6103425025939941 [0m(-0.1767733097076416)
     | > avg_loss:[92m 0.15275490283966064 [0m(-0.008217543363571167)
     | > avg_log_mle:[92m -0.1296004056930542 [0m(-0.0004888772964477539)
     | > avg_loss_dur:[92m 0.28235530853271484 [0m(-0.007728666067123413)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_3028.pth

[4m[1m > EPOCH: 57/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:47:36) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.6073863506317139 [0m(-0.0029561519622802734)
     | > avg_loss:[91m 0.15534070134162903 [0m(+0.002585798501968384)
     | > avg_log_mle:[92m -0.13024687767028809 [0m(-0.0006464719772338867)
     | > avg_loss_dur:[91m 0.2855875790119171 [0m(+0.0032322704792022705)


[4m[1m > EPOCH: 58/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:48:13) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5886609554290771 [0m(-0.01872539520263672)
     | > avg_loss:[91m 0.16160470247268677 [0m(+0.006264001131057739)
     | > avg_log_mle:[92m -0.1303802728652954 [0m(-0.00013339519500732422)
     | > avg_loss_dur:[91m 0.2919849753379822 [0m(+0.0063973963260650635)


[4m[1m > EPOCH: 59/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:48:46) [0m

[1m   --> TIME: 2024-06-21 12:48:50 -- STEP: 0/11 -- GLOBAL_STEP: 3050[0m
     | > loss: 0.038430243730545044  (0.038430243730545044)
     | > log_mle: -0.18436706066131592  (-0.18436706066131592)
     | > loss_dur: 0.22279730439186096  (0.22279730439186096)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(2.4999, device='cuda:0')  (tensor(2.4999, device='cuda:0'))
     | > current_lr: 1.475e-05 
     | > step_time: 1.6608  (1.6607816219329834)
     | > loader_time: 2.6432  (2.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6112544536590576 [0m(+0.02259349822998047)
     | > avg_loss:[92m 0.15401580929756165 [0m(-0.007588893175125122)
     | > avg_log_mle:[92m -0.13043570518493652 [0m(-5.543231964111328e-05)
     | > avg_loss_dur:[92m 0.28445151448249817 [0m(-0.007533460855484009)


[4m[1m > EPOCH: 60/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:49:19) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6759920120239258 [0m(+0.06473755836486816)
     | > avg_loss:[91m 0.15544578433036804 [0m(+0.0014299750328063965)
     | > avg_log_mle:[92m -0.13091576099395752 [0m(-0.0004800558090209961)
     | > avg_loss_dur:[91m 0.28636154532432556 [0m(+0.0019100308418273926)


[4m[1m > EPOCH: 61/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:49:53) [0m

[1m   --> TIME: 2024-06-21 12:50:02 -- STEP: 3/11 -- GLOBAL_STEP: 3075[0m
     | > loss: 0.11199221014976501  (0.10979432861010234)
     | > log_mle: -0.1605832576751709  (-0.1691890557607015)
     | > loss_dur: 0.2725754678249359  (0.27898338437080383)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(4.1132, device='cuda:0')  (tensor(3.3655, device='cuda:0'))
     | > current_lr: 1.525e-05 
     | > step_time: 1.1721  (1.5111398696899414)
     | > loader_time: 0.0226  (0.022

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.6159970760345459 [0m(-0.05999493598937988)
     | > avg_loss:[92m 0.15236154198646545 [0m(-0.003084242343902588)
     | > avg_log_mle:[91m -0.13088488578796387 [0m(+3.0875205993652344e-05)
     | > avg_loss_dur:[92m 0.2832464277744293 [0m(-0.0031151175498962402)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_3083.pth

[4m[1m > EPOCH: 62/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:50:31) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5932226181030273 [0m(-0.022774457931518555)
     | > avg_loss:[91m 0.15600383281707764 [0m(+0.0036422908306121826)
     | > avg_log_mle:[92m -0.13172078132629395 [0m(-0.0008358955383300781)
     | > avg_loss_dur:[91m 0.2877246141433716 [0m(+0.004478186368942261)


[4m[1m > EPOCH: 63/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:51:09) [0m

[1m   --> TIME: 2024-06-21 12:51:25 -- STEP: 6/11 -- GLOBAL_STEP: 3100[0m
     | > loss: 0.1429820954799652  (0.12642507751782736)
     | > log_mle: -0.1612452268600464  (-0.16602780421574911)
     | > loss_dur: 0.3042273223400116  (0.2924528817335765)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.9432, device='cuda:0')  (tensor(2.2077, device='cuda:0'))
     | > current_lr: 1.575e-05 
     | > step_time: 0.6186  (1.727272590001424)
     | > loader_time: 0.0064  (0.030634

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.6115174293518066 [0m(+0.018294811248779297)
     | > avg_loss:[92m 0.14943498373031616 [0m(-0.006568849086761475)
     | > avg_log_mle:[92m -0.13227951526641846 [0m(-0.0005587339401245117)
     | > avg_loss_dur:[92m 0.2817144989967346 [0m(-0.006010115146636963)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_3105.pth

[4m[1m > EPOCH: 64/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:51:45) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.9926092624664307 [0m(+0.381091833114624)
     | > avg_loss:[92m 0.14376598596572876 [0m(-0.005668997764587402)
     | > avg_log_mle:[91m -0.13215398788452148 [0m(+0.00012552738189697266)
     | > avg_loss_dur:[92m 0.27591997385025024 [0m(-0.005794525146484375)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_3116.pth

[4m[1m > EPOCH: 65/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:52:30) [0m

[1m   --> TIME: 2024-06-21 12:52:51 -- STEP: 9/11 -- GLOBAL_STEP: 3125[0m
     | > loss: 0.14526012539863586  (0.1337890326976776)
     | > log_mle: -0.15924739837646484  (-0.16563679112328422)
     | > loss_dur: 0.3045075237751007  (0.29942582382096183)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(2.8147, device='cuda:0')  (tensor(2.9737, device='cuda:0'))


 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.5890250205993652 [0m(-0.40358424186706543)
     | > avg_loss:[91m 0.1442832350730896 [0m(+0.0005172491073608398)
     | > avg_log_mle:[92m -0.13222074508666992 [0m(-6.67572021484375e-05)
     | > avg_loss_dur:[91m 0.2765039801597595 [0m(+0.0005840063095092773)


[4m[1m > EPOCH: 66/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:53:09) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.650470495223999 [0m(+0.06144547462463379)
     | > avg_loss:[91m 0.1469622552394867 [0m(+0.0026790201663970947)
     | > avg_log_mle:[92m -0.13293921947479248 [0m(-0.0007184743881225586)
     | > avg_loss_dur:[91m 0.2799014747142792 [0m(+0.0033974945545196533)


[4m[1m > EPOCH: 67/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:53:43) [0m

[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.6241376399993896 [0m(-0.026332855224609375)
     | > avg_loss:[92m 0.13371491432189941 [0m(-0.01324734091758728)
     | > avg_log_mle:[92m -0.13323307037353516 [0m(-0.0002938508987426758)
     | > avg_loss_dur:[92m 0.26694798469543457 [0m(-0.012953490018844604)

 > BEST MODEL : /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000/best_model_3149.pth

[4m[1m > EPOCH: 68/250[0m
 --> /content/drive/MyDrive/ljspeech-002/tts_train_dir/run-June-21-2024_12+06PM-0000000

[1m > TRAINING (2024-06-21 12:54:21) [0m

[1m   --> TIME: 2024-06-21 12:54:30 -- STEP: 1/11 -- GLOBAL_STEP: 3150[0m
     | > loss: 0.09258395433425903  (0.09258395433425903)
     | > log_mle: -0.17499899864196777  (-0.17499899864196777)
     | > loss_dur: 0.2675829529762268  (0.2675829529762268)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.4471, device='cuda:0')  (tensor(1.4471, device='cuda:0'))

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.9279077053070068 [0m(+0.3037700653076172)
     | > avg_loss:[92m 0.13156896829605103 [0m(-0.0021459460258483887)
     | > avg_log_mle:[92m -0.13351547718048096 [0m(-0.0002824068069458008)
     | > avg_loss_dur:[92m 0.265084445476532 [0m(-0.0018635392189025879)

