# Fine-tune GlowTTS model with LJSpeech dataset Quick Tutorial.
Ref: https://docs.coqui.ai/en/dev/tutorial_for_nervous_beginners.html

In [1]:
# check cuda availability
!nvcc --version

nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2023 NVIDIA Corporation
Built on Wed_Feb__8_05:53:42_Coordinated_Universal_Time_2023
Cuda compilation tools, release 12.1, V12.1.66
Build cuda_12.1.r12.1/compiler.32415258_0


In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
import os
if os.getenv("CUDA_VISIBLE_DEVICES") is None:
    os.environ["CUDA_VISIBLE_DEVICES"] = "0"

In [4]:
import os
import re
from pathlib import Path

# Trainer: Where the ✨️ happens.
# TrainingArgs: Defines the set of arguments of the Trainer.
from trainer import Trainer, TrainerArgs

# GlowTTSConfig: all model related values for training, validating and testing.
from TTS.tts.configs.glow_tts_config import GlowTTSConfig

# BaseDatasetConfig: defines name, formatter and path of the dataset.
from TTS.tts.configs.shared_configs import BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples
from TTS.tts.models.glow_tts import GlowTTS
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor

# we use the same path as this script as our training folder.
project_dir = Path(re.search(r".*Cockatoo.AI", os.getcwd())[0])
data_path = project_dir / "experiments/model_c/data/ljspeech"
output_path = project_dir / "experiments/model_c/output"

In [5]:
# Download dataset if not yet exists
if "LJSpeech-1.1" not in map(lambda x: x.name, data_path.glob("*")):
    from TTS.utils.downloaders import download_ljspeech
    download_ljspeech(data_path)

In [6]:
# DEFINE DATASET CONFIG
# Set LJSpeech as our target dataset and define its path.
# You can also use a simple Dict to define the dataset and pass it to your custom formatter.
dataset_config = BaseDatasetConfig(
    formatter="ljspeech", meta_file_train="metadata.csv", path= (data_path / "LJSpeech-1.1/").as_posix()
)
display(dataset_config)

# INITIALIZE THE TRAINING CONFIGURATION
# Configure the model. Every config class inherits the BaseTTSConfig.
config = GlowTTSConfig(
    run_description="🐸Coqui trainer run.",
    batch_size=32,
    eval_batch_size=16,
    num_loader_workers=4,
    num_eval_loader_workers=4,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=100,  # NOTE: mini run for demo. A complete 1000 run will be uploaded separately.
    text_cleaner="phoneme_cleaners",
    use_phonemes=True,
    phoneme_language="en-us",
    phoneme_cache_path=(output_path / "phoneme_cache").as_posix(),
    print_step=25,
    print_eval=False,
    cudnn_enable=True,
    mixed_precision=True,
    output_path=output_path.as_posix(),
    datasets=[dataset_config],
)

BaseDatasetConfig(formatter='ljspeech', dataset_name='', path='c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/data/ljspeech/LJSpeech-1.1', meta_file_train='metadata.csv', ignored_speakers=None, language='', phonemizer='', meta_file_val='', meta_file_attn_mask='')

In [7]:
# INITIALIZE THE AUDIO PROCESSOR
# Audio processor is used for feature extraction and audio I/O.
# It mainly serves to the dataloader and the training loggers.
ap = AudioProcessor.init_from_config(config)

# INITIALIZE THE TOKENIZER
# Tokenizer is used to convert text to sequences of token IDs.
# If characters are not defined in the config, default characters are passed to the config
tokenizer, config = TTSTokenizer.init_from_config(config)

# INITIALIZE THE MODEL
# Models take a config object and a speaker manager as input
# Config defines the details of the model like the number of layers, the size of the embedding, etc.
# Speaker manager is used by multi-speaker models.
model = GlowTTS(config, ap, tokenizer, speaker_manager=None)


# LOAD DATA SAMPLES
# Each sample is a list of ```[text, audio_file_path, speaker_name]```
# You can define your custom sample loader returning the list of samples.
# Or define your custom formatter and pass it to the `load_tts_samples`.
# Check `TTS.tts.datasets.load_tts_samples` for more details.
train_samples, eval_samples = load_tts_samples(
    dataset_config,
    eval_split=True,
    eval_split_max_size=config.eval_split_max_size,
    eval_split_size=config.eval_split_size,
)

 > Setting up Audio Processor...
 | > sample_rate:22050
 | > resample:False
 | > num_mels:80
 | > log_func:np.log10
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:True
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:None
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:20.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:45
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:10
 | > hop_length:256
 | > win_length:1024
 | > Found 13100 files in C:\Users\wkaic\GitHub\Cockatoo.AI\experiments\model_c\data\ljspeech\LJSpeech-1.1


In [8]:
# INITIALIZE THE TRAINER
# Trainer provides a generic API to train all the 🐸TTS models with all its perks like mixed-precision training,
# distributed training, etc.
trainer = Trainer(
    TrainerArgs(), config, output_path.as_posix(), model=model, train_samples=train_samples, eval_samples=eval_samples
)

# AND... 3,2,1... 🚀
trainer.fit()

 > Training Environment:
 | > Backend: Torch
 | > Mixed precision: True
 | > Precision: fp16
 | > Current device: 0
 | > Num. of GPUs: 1
 | > Num. of CPUs: 20
 | > Num. of Torch Threads: 12
 | > Torch seed: 54321
 | > Torch CUDNN: True
 | > Torch CUDNN deterministic: False
 | > Torch CUDNN benchmark: False
 | > Torch TF32 MatMul: False
 > Start Tensorboard: tensorboard --logdir=c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b
  from .autonotebook import tqdm as notebook_tqdm

 > Model has 28610257 parameters

[4m[1m > EPOCH: 0/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b




> DataLoader initialization
| > Tokenizer:
	| > add_blank: False
	| > use_eos_bos: False
	| > use_phonemes: True
	| > phonemizer:
		| > phoneme language: en-us
		| > phoneme backend: gruut
| > Number of instances : 12969



[1m > TRAINING (2024-04-13 09:45:19) [0m


 | > Preprocessing samples
 | > Max text length: 188
 | > Min text length: 13
 | > Avg text length: 100.90014650319993
 | 
 | > Max audio length: 222643.0
 | > Min audio length: 24499.0
 | > Avg audio length: 144984.29755570978
 | > Num. instances discarded samples: 0
 | > Batch group size: 0.



[1m   --> TIME: 2024-04-13 09:45:32 -- STEP: 0/406 -- GLOBAL_STEP: 0[0m
     | > current_lr: 2.5e-07 
     | > step_time: 1.1599  (1.1599225997924805)
     | > loader_time: 12.6947  (12.694692134857178)

 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.

[1m   --> TIME: 2024-04-13 09:45:40 -- STEP: 25/406 -- GLOBAL_STEP: 25[0m
     | > loss: 3.9253783226013184  (



> DataLoader initialization
| > Tokenizer:
	| > add_blank: False
	| > use_eos_bos: False
	| > use_phonemes: True
	| > phonemizer:
		| > phoneme language: en-us
		| > phoneme backend: gruut
| > Number of instances : 131
 | > Preprocessing samples
 | > Max text length: 174
 | > Min text length: 20
 | > Avg text length: 100.76335877862596
 | 
 | > Max audio length: 222643.0
 | > Min audio length: 34739.0
 | > Avg audio length: 144033.41221374046
 | > Num. instances discarded samples: 0
 | > Batch group size: 0.
 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.11286371946334839 [0m(+0)
     | > avg_loss: 3.748878389596939 [0m(+0)
     | > avg_log_mle: 0.7608828991651535 [0m(+0)
     | > avg_loss_dur: 2.9879954755306244 [0m(+0)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_406.pth

[4m[1m > EPOCH: 1/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 09:48:55) [0m

[1m   --> TIME: 2024-04-13 09:49:14 -- STEP: 19/406 -- GLOBAL_STEP: 425[0m
     | > loss: 3.658818006515503  (3.7185618249993575)
     | > log_mle: 0.7579686045646667  (0.7568229593728718)
     | > loss_dur: 2.9008493423461914  (2.961738862489399)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(10.1546, device='cuda:0')  (tensor(10.1667, device='cuda:0'))
     | > current_lr: 2.5e-07 
     | > step_time: 0.204  (0.2180589751193398)
    

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.20228952169418335 [0m(+0.08942580223083496)
     | > avg_loss:[92m 3.4678222239017487 [0m(-0.28105616569519043)
     | > avg_log_mle:[92m 0.7502943426370621 [0m(-0.01058855652809143)
     | > avg_loss_dur:[92m 2.717527836561203 [0m(-0.2704676389694214)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_812.pth

[4m[1m > EPOCH: 2/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 09:51:57) [0m

[1m   --> TIME: 2024-04-13 09:52:13 -- STEP: 13/406 -- GLOBAL_STEP: 825[0m
     | > loss: 3.4133071899414062  (3.4696326622596154)
     | > log_mle: 0.7399927973747253  (0.7469633405025189)
     | > loss_dur: 2.673314332962036  (2.7226693080021787)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(9.1708, device='cuda:0')  (tensor(9.0577, device='cu

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.0020756423473358154 [0m(-0.20021387934684753)
     | > avg_loss:[92m 3.0819119215011597 [0m(-0.385910302400589)
     | > avg_log_mle:[92m 0.71932003647089 [0m(-0.030974306166172028)
     | > avg_loss_dur:[92m 2.3625919222831726 [0m(-0.3549359142780304)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_1218.pth

[4m[1m > EPOCH: 3/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 09:55:10) [0m

[1m   --> TIME: 2024-04-13 09:55:26 -- STEP: 7/406 -- GLOBAL_STEP: 1225[0m
     | > loss: 3.143343448638916  (3.1900061539241245)
     | > log_mle: 0.7224079966545105  (0.7201393842697144)
     | > loss_dur: 2.4209353923797607  (2.4698667185647145)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(6.7699, device='cuda:0')  (tensor(6.7923, device='c

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.20126044750213623 [0m(+0.19918480515480042)
     | > avg_loss:[92m 2.9254431426525116 [0m(-0.15646877884864807)
     | > avg_log_mle:[92m 0.6609473153948784 [0m(-0.05837272107601166)
     | > avg_loss_dur:[92m 2.2644958198070526 [0m(-0.09809610247612)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_1624.pth

[4m[1m > EPOCH: 4/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 09:58:07) [0m

[1m   --> TIME: 2024-04-13 09:58:18 -- STEP: 1/406 -- GLOBAL_STEP: 1625[0m
     | > loss: 3.135505199432373  (3.135505199432373)
     | > log_mle: 0.6670010685920715  (0.6670010685920715)
     | > loss_dur: 2.4685041904449463  (2.4685041904449463)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(5.9373, device='cuda:0')  (tensor(5.9373, device='cud

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17788788676261902 [0m(-0.023372560739517212)
     | > avg_loss:[92m 2.494863122701645 [0m(-0.4305800199508667)
     | > avg_log_mle:[92m 0.5842952206730843 [0m(-0.07665209472179413)
     | > avg_loss_dur:[92m 1.9105678647756577 [0m(-0.35392795503139496)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_2030.pth

[4m[1m > EPOCH: 5/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:00:55) [0m

[1m   --> TIME: 2024-04-13 10:01:11 -- STEP: 20/406 -- GLOBAL_STEP: 2050[0m
     | > loss: 2.476205348968506  (2.578032982349396)
     | > log_mle: 0.6075752973556519  (0.5998351693153381)
     | > loss_dur: 1.8686301708221436  (1.9781978011131287)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(4.7467, device='cuda:0')  (tensor(4.8880, device='

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.20051038265228271 [0m(+0.022622495889663696)
     | > avg_loss:[92m 2.0453226566314697 [0m(-0.44954046607017517)
     | > avg_log_mle:[92m 0.5073534175753593 [0m(-0.07694180309772491)
     | > avg_loss_dur:[92m 1.5379692614078522 [0m(-0.3725986033678055)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_2436.pth

[4m[1m > EPOCH: 6/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:03:42) [0m

[1m   --> TIME: 2024-04-13 10:03:58 -- STEP: 14/406 -- GLOBAL_STEP: 2450[0m
     | > loss: 2.1241135597229004  (2.158552799906049)
     | > log_mle: 0.5144582986831665  (0.5275523364543914)
     | > loss_dur: 1.6096553802490234  (1.6310005017689295)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(3.9714, device='cuda:0')  (tensor(4.0325, device

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17879170179367065 [0m(-0.02171868085861206)
     | > avg_loss:[92m 1.7771641314029694 [0m(-0.26815852522850037)
     | > avg_log_mle:[92m 0.44783610478043556 [0m(-0.05951731279492378)
     | > avg_loss_dur:[92m 1.329328030347824 [0m(-0.20864123106002808)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_2842.pth

[4m[1m > EPOCH: 7/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:06:31) [0m

[1m   --> TIME: 2024-04-13 10:06:46 -- STEP: 8/406 -- GLOBAL_STEP: 2850[0m
     | > loss: 1.928223967552185  (1.9153958857059479)
     | > log_mle: 0.4690878689289093  (0.47548414021730423)
     | > loss_dur: 1.4591361284255981  (1.4399117678403854)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(3.6160, device='cuda:0')  (tensor(3.5157, device

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.17993327975273132 [0m(+0.001141577959060669)
     | > avg_loss:[92m 1.580278530716896 [0m(-0.1968856006860733)
     | > avg_log_mle:[92m 0.40132780000567436 [0m(-0.0465083047747612)
     | > avg_loss_dur:[92m 1.1789507418870926 [0m(-0.1503772884607315)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_3248.pth

[4m[1m > EPOCH: 8/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:09:32) [0m

[1m   --> TIME: 2024-04-13 10:09:44 -- STEP: 2/406 -- GLOBAL_STEP: 3250[0m
     | > loss: 1.752464771270752  (1.7332292199134827)
     | > log_mle: 0.4321346580982208  (0.42723168432712555)
     | > loss_dur: 1.3203301429748535  (1.3059975504875183)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(3.1996, device='cuda:0')  (tensor(3.1787, device='

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.184703528881073 [0m(+0.004770249128341675)
     | > avg_loss:[92m 1.4293521344661713 [0m(-0.1509263962507248)
     | > avg_log_mle:[92m 0.3571823835372925 [0m(-0.04414541646838188)
     | > avg_loss_dur:[92m 1.07216976583004 [0m(-0.10678097605705261)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_3654.pth

[4m[1m > EPOCH: 9/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:12:20) [0m

[1m   --> TIME: 2024-04-13 10:12:37 -- STEP: 21/406 -- GLOBAL_STEP: 3675[0m
     | > loss: 1.43070387840271  (1.50197514465877)
     | > log_mle: 0.3810341954231262  (0.3860941387358166)
     | > loss_dur: 1.049669623374939  (1.1158810115995863)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(2.6254, device='cuda:0')  (tensor(2.7413, device='cuda:0

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18242591619491577 [0m(-0.0022776126861572266)
     | > avg_loss:[92m 1.307359203696251 [0m(-0.12199293076992035)
     | > avg_log_mle:[92m 0.3150700777769089 [0m(-0.042112305760383606)
     | > avg_loss_dur:[92m 0.9922891184687614 [0m(-0.07988064736127853)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_4060.pth

[4m[1m > EPOCH: 10/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:15:09) [0m

[1m   --> TIME: 2024-04-13 10:15:25 -- STEP: 15/406 -- GLOBAL_STEP: 4075[0m
     | > loss: 1.3483855724334717  (1.3777251958847045)
     | > log_mle: 0.33522647619247437  (0.3441624959309896)
     | > loss_dur: 1.0131590366363525  (1.033562711874644)
     | > amp_scaler: 32768.0  (32768.0)
     | > grad_norm: tensor(2.4591, device='cuda:0')  (tensor(2.5087, de

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18634572625160217 [0m(+0.003919810056686401)
     | > avg_loss:[92m 1.206510379910469 [0m(-0.10084882378578186)
     | > avg_log_mle:[92m 0.27733784168958664 [0m(-0.037732236087322235)
     | > avg_loss_dur:[92m 0.9291725233197212 [0m(-0.06311659514904022)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_4466.pth

[4m[1m > EPOCH: 11/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:17:58) [0m

[1m   --> TIME: 2024-04-13 10:18:12 -- STEP: 9/406 -- GLOBAL_STEP: 4475[0m
     | > loss: 1.29317045211792  (1.2893420457839966)
     | > log_mle: 0.3114311099052429  (0.31176621384090847)
     | > loss_dur: 0.9817392826080322  (0.9775758253203498)
     | > amp_scaler: 32768.0  (32768.0)
     | > grad_norm: tensor(2.2541, device='cuda:0')  (tensor(2.3157, devi

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.20252418518066406 [0m(+0.01617845892906189)
     | > avg_loss:[92m 1.1272041648626328 [0m(-0.0793062150478363)
     | > avg_log_mle:[92m 0.24411217868328094 [0m(-0.033225663006305695)
     | > avg_loss_dur:[92m 0.8830919787287712 [0m(-0.04608054459095001)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_4872.pth

[4m[1m > EPOCH: 12/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:20:46) [0m

[1m   --> TIME: 2024-04-13 10:21:00 -- STEP: 3/406 -- GLOBAL_STEP: 4875[0m
     | > loss: 1.2178019285202026  (1.2055139541625977)
     | > log_mle: 0.2718191146850586  (0.27088554700215656)
     | > loss_dur: 0.945982813835144  (0.934628427028656)
     | > amp_scaler: 32768.0  (32768.0)
     | > grad_norm: tensor(2.2061, device='cuda:0')  (tensor(2.2809, devic

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.19226107001304626 [0m(-0.010263115167617798)
     | > avg_loss:[92m 1.0530192106962204 [0m(-0.07418495416641235)
     | > avg_log_mle:[92m 0.21143564581871033 [0m(-0.03267653286457062)
     | > avg_loss_dur:[92m 0.8415835797786713 [0m(-0.041508398950099945)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_5278.pth

[4m[1m > EPOCH: 13/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:23:35) [0m

[1m   --> TIME: 2024-04-13 10:23:52 -- STEP: 22/406 -- GLOBAL_STEP: 5300[0m
     | > loss: 1.0409116744995117  (1.0833255865357139)
     | > log_mle: 0.23895204067230225  (0.23937041109258478)
     | > loss_dur: 0.8019596934318542  (0.843955170024525)
     | > amp_scaler: 32768.0  (32768.0)
     | > grad_norm: tensor(1.6522, device='cuda:0')  (tensor(1.8517, 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1813351809978485 [0m(-0.010925889015197754)
     | > avg_loss:[92m 0.97606460750103 [0m(-0.07695460319519043)
     | > avg_log_mle:[92m 0.17753081023693085 [0m(-0.03390483558177948)
     | > avg_loss_dur:[92m 0.7985337972640991 [0m(-0.043049782514572144)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_5684.pth

[4m[1m > EPOCH: 14/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:26:23) [0m

[1m   --> TIME: 2024-04-13 10:26:39 -- STEP: 16/406 -- GLOBAL_STEP: 5700[0m
     | > loss: 1.0228521823883057  (1.0138566829264164)
     | > log_mle: 0.1999063491821289  (0.20422392711043358)
     | > loss_dur: 0.8229458332061768  (0.8096327595412731)
     | > amp_scaler: 32768.0  (32768.0)
     | > grad_norm: tensor(1.7933, device='cuda:0')  (tensor(1.8025, dev

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17534106969833374 [0m(-0.0059941112995147705)
     | > avg_loss:[92m 0.9021827951073647 [0m(-0.07388181239366531)
     | > avg_log_mle:[92m 0.14518257975578308 [0m(-0.032348230481147766)
     | > avg_loss_dur:[92m 0.7570002153515816 [0m(-0.04153358191251755)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_6090.pth

[4m[1m > EPOCH: 15/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:29:12) [0m

[1m   --> TIME: 2024-04-13 10:29:26 -- STEP: 10/406 -- GLOBAL_STEP: 6100[0m
     | > loss: 0.9265904426574707  (0.9458091020584106)
     | > log_mle: 0.17337220907211304  (0.1773269236087799)
     | > loss_dur: 0.7532182335853577  (0.7684821844100952)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.4759, device='cuda:0')  (tensor(1.6267,

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.17606216669082642 [0m(+0.0007210969924926758)
     | > avg_loss:[92m 0.8374649211764336 [0m(-0.06471787393093109)
     | > avg_log_mle:[92m 0.11294757574796677 [0m(-0.032235004007816315)
     | > avg_loss_dur:[92m 0.7245173454284668 [0m(-0.03248286992311478)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_6496.pth

[4m[1m > EPOCH: 16/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:32:01) [0m

[1m   --> TIME: 2024-04-13 10:32:14 -- STEP: 4/406 -- GLOBAL_STEP: 6500[0m
     | > loss: 0.9370136260986328  (0.912067100405693)
     | > log_mle: 0.1656697392463684  (0.14704649150371552)
     | > loss_dur: 0.7713438868522644  (0.7650206089019775)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.3416, device='cuda:0')  (tensor(1.5115, d

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.17612645030021667 [0m(+6.428360939025879e-05)
     | > avg_loss:[92m 0.7783215269446373 [0m(-0.059143394231796265)
     | > avg_log_mle:[92m 0.0825241208076477 [0m(-0.03042345494031906)
     | > avg_loss_dur:[92m 0.6957974061369896 [0m(-0.028719939291477203)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_6902.pth

[4m[1m > EPOCH: 17/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:35:05) [0m

[1m   --> TIME: 2024-04-13 10:35:22 -- STEP: 23/406 -- GLOBAL_STEP: 6925[0m
     | > loss: 0.8027536273002625  (0.8122631751972696)
     | > log_mle: 0.11451119184494019  (0.11430470580640047)
     | > loss_dur: 0.6882424354553223  (0.6979584693908691)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.9118, device='cuda:0')  (tensor(1.5591

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.17833268642425537 [0m(+0.0022062361240386963)
     | > avg_loss:[92m 0.7357967272400856 [0m(-0.0425247997045517)
     | > avg_log_mle:[92m 0.05406267195940018 [0m(-0.028461448848247528)
     | > avg_loss_dur:[92m 0.6817340552806854 [0m(-0.014063350856304169)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_7308.pth

[4m[1m > EPOCH: 18/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:37:54) [0m

[1m   --> TIME: 2024-04-13 10:38:10 -- STEP: 17/406 -- GLOBAL_STEP: 7325[0m
     | > loss: 0.7508141398429871  (0.7734610683777753)
     | > log_mle: 0.08674997091293335  (0.08568975855322446)
     | > loss_dur: 0.6640641689300537  (0.6877713098245508)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(1.1311, device='cuda:0')  (tensor(2.0122

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18113067746162415 [0m(+0.0027979910373687744)
     | > avg_loss:[92m 0.705224871635437 [0m(-0.03057185560464859)
     | > avg_log_mle:[92m 0.029786966741085052 [0m(-0.024275705218315125)
     | > avg_loss_dur:[92m 0.675437904894352 [0m(-0.006296150386333466)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_7714.pth

[4m[1m > EPOCH: 19/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:40:44) [0m

[1m   --> TIME: 2024-04-13 10:40:59 -- STEP: 11/406 -- GLOBAL_STEP: 7725[0m
     | > loss: 0.7613791823387146  (0.7441825704141096)
     | > log_mle: 0.05811220407485962  (0.06304921887137672)
     | > loss_dur: 0.703266978263855  (0.6811333515427329)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.9579, device='cuda:0')  (tensor(1.2732,

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18856436014175415 [0m(+0.007433682680130005)
     | > avg_loss:[92m 0.6613702476024628 [0m(-0.04385462403297424)
     | > avg_log_mle:[92m 0.005220986902713776 [0m(-0.024565979838371277)
     | > avg_loss_dur:[92m 0.656149260699749 [0m(-0.019288644194602966)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_8120.pth

[4m[1m > EPOCH: 20/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:43:34) [0m

[1m   --> TIME: 2024-04-13 10:43:48 -- STEP: 5/406 -- GLOBAL_STEP: 8125[0m
     | > loss: 0.6787331700325012  (0.7185565948486328)
     | > log_mle: 0.050607144832611084  (0.0427452564239502)
     | > loss_dur: 0.6281260251998901  (0.6758113384246827)
     | > amp_scaler: 65536.0  (65536.0)
     | > grad_norm: tensor(0.8593, device='cuda:0')  (tensor(1.4016,

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18881464004516602 [0m(+0.00025027990341186523)
     | > avg_loss:[92m 0.6352912858128548 [0m(-0.026078961789608)
     | > avg_log_mle:[92m -0.017207391560077667 [0m(-0.022428378462791443)
     | > avg_loss_dur:[92m 0.6524986773729324 [0m(-0.003650583326816559)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_8526.pth

[4m[1m > EPOCH: 21/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:46:25) [0m

[1m   --> TIME: 2024-04-13 10:46:42 -- STEP: 24/406 -- GLOBAL_STEP: 8550[0m
     | > loss: 0.6495680809020996  (0.6636287818352381)
     | > log_mle: 0.008820712566375732  (0.016151736179987587)
     | > loss_dur: 0.6407473683357239  (0.6474770456552505)
     | > amp_scaler: 32768.0  (32768.0)
     | > grad_norm: tensor(3.4209, device='cuda:0')  (tensor(1.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18129336833953857 [0m(-0.007521271705627441)
     | > avg_loss:[92m 0.6110861077904701 [0m(-0.024205178022384644)
     | > avg_log_mle:[92m -0.03538750857114792 [0m(-0.01818011701107025)
     | > avg_loss_dur:[92m 0.646473616361618 [0m(-0.006025061011314392)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_8932.pth

[4m[1m > EPOCH: 22/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:49:15) [0m

[1m   --> TIME: 2024-04-13 10:49:31 -- STEP: 18/406 -- GLOBAL_STEP: 8950[0m
     | > loss: 0.6399877667427063  (0.6434744629595015)
     | > log_mle: 0.0024533867835998535  (-0.003680162959628635)
     | > loss_dur: 0.6375343799591064  (0.6471546259191301)
     | > amp_scaler: 32768.0  (32768.0)
     | > grad_norm: tensor(2.3280, device='cuda:0')  (tensor(2.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19126397371292114 [0m(+0.009970605373382568)
     | > avg_loss:[92m 0.5941742807626724 [0m(-0.0169118270277977)
     | > avg_log_mle:[92m -0.054101042449474335 [0m(-0.018713533878326416)
     | > avg_loss_dur:[91m 0.6482753232121468 [0m(+0.001801706850528717)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_9338.pth

[4m[1m > EPOCH: 23/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:52:06) [0m

[1m   --> TIME: 2024-04-13 10:52:21 -- STEP: 12/406 -- GLOBAL_STEP: 9350[0m
     | > loss: 0.600519061088562  (0.6204914549986522)
     | > log_mle: -0.027687430381774902  (-0.018846129377683003)
     | > loss_dur: 0.6282064914703369  (0.639337584376335)
     | > amp_scaler: 32768.0  (32768.0)
     | > grad_norm: tensor(3.2721, device='cuda:0')  (tensor(2.2

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.19109591841697693 [0m(-0.00016805529594421387)
     | > avg_loss:[92m 0.5730053186416626 [0m(-0.021168962121009827)
     | > avg_log_mle:[92m -0.06654092669487 [0m(-0.01243988424539566)
     | > avg_loss_dur:[92m 0.6395462453365326 [0m(-0.008729077875614166)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_9744.pth

[4m[1m > EPOCH: 24/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:54:57) [0m

[1m   --> TIME: 2024-04-13 10:55:10 -- STEP: 6/406 -- GLOBAL_STEP: 9750[0m
     | > loss: 0.6237442493438721  (0.616846909125646)
     | > log_mle: -0.03202944993972778  (-0.029060145219167072)
     | > loss_dur: 0.6557736992835999  (0.645907054344813)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(1.5406, device='cuda:0')  (tensor(1.8788

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18945106863975525 [0m(-0.0016448497772216797)
     | > avg_loss:[92m 0.5585506558418274 [0m(-0.014454662799835205)
     | > avg_log_mle:[92m -0.08041201531887054 [0m(-0.01387108862400055)
     | > avg_loss_dur:[92m 0.6389626711606979 [0m(-0.0005835741758346558)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_10150.pth

[4m[1m > EPOCH: 25/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 10:57:50) [0m

[1m   --> TIME: 2024-04-13 10:58:01 -- STEP: 0/406 -- GLOBAL_STEP: 10150[0m
     | > loss: 0.5912695527076721  (0.5912695527076721)
     | > log_mle: -0.03034365177154541  (-0.03034365177154541)
     | > loss_dur: 0.6216132044792175  (0.6216132044792175)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(1.7884, device='cuda:0')  (tensor(

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1832231879234314 [0m(-0.0062278807163238525)
     | > avg_loss:[92m 0.5367815941572189 [0m(-0.02176906168460846)
     | > avg_log_mle:[92m -0.0913231149315834 [0m(-0.01091109961271286)
     | > avg_loss_dur:[92m 0.6281047090888023 [0m(-0.0108579620718956)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_10556.pth

[4m[1m > EPOCH: 26/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:00:41) [0m

[1m   --> TIME: 2024-04-13 11:00:57 -- STEP: 19/406 -- GLOBAL_STEP: 10575[0m
     | > loss: 0.5600071549415588  (0.5633610392871656)
     | > log_mle: -0.0509684681892395  (-0.05950399762705753)
     | > loss_dur: 0.6109756231307983  (0.6228650369142231)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(2.0675, device='cuda:0')  (tensor(2.9238

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19594928622245789 [0m(+0.01272609829902649)
     | > avg_loss:[92m 0.5335754305124283 [0m(-0.0032061636447906494)
     | > avg_log_mle:[92m -0.10048963129520416 [0m(-0.009166516363620758)
     | > avg_loss_dur:[91m 0.6340650618076324 [0m(+0.005960352718830109)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_10962.pth

[4m[1m > EPOCH: 27/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:03:31) [0m

[1m   --> TIME: 2024-04-13 11:03:46 -- STEP: 13/406 -- GLOBAL_STEP: 10975[0m
     | > loss: 0.5365938544273376  (0.5396840022160456)
     | > log_mle: -0.0781177282333374  (-0.0689764481324416)
     | > loss_dur: 0.614711582660675  (0.6086604503484873)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(1.7583, device='cuda:0')  (tensor(1.9

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17969125509262085 [0m(-0.016258031129837036)
     | > avg_loss:[92m 0.5095981284976006 [0m(-0.02397730201482773)
     | > avg_log_mle:[92m -0.11061429977416992 [0m(-0.01012466847896576)
     | > avg_loss_dur:[92m 0.6202124282717705 [0m(-0.013852633535861969)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_11368.pth

[4m[1m > EPOCH: 28/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:06:22) [0m

[1m   --> TIME: 2024-04-13 11:06:36 -- STEP: 7/406 -- GLOBAL_STEP: 11375[0m
     | > loss: 0.5251403450965881  (0.5232947298458644)
     | > log_mle: -0.06913489103317261  (-0.0747761641229902)
     | > loss_dur: 0.5942752361297607  (0.5980708939688546)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(2.5071, device='cuda:0')  (tensor(2.49

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18831461668014526 [0m(+0.008623361587524414)
     | > avg_loss:[92m 0.4945725277066231 [0m(-0.015025600790977478)
     | > avg_log_mle:[92m -0.11776673793792725 [0m(-0.007152438163757324)
     | > avg_loss_dur:[92m 0.6123392656445503 [0m(-0.007873162627220154)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_11774.pth

[4m[1m > EPOCH: 29/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:09:12) [0m

[1m   --> TIME: 2024-04-13 11:09:24 -- STEP: 1/406 -- GLOBAL_STEP: 11775[0m
     | > loss: 0.5012784600257874  (0.5012784600257874)
     | > log_mle: -0.08468711376190186  (-0.08468711376190186)
     | > loss_dur: 0.5859655737876892  (0.5859655737876892)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(4.9939, device='cuda:0')  (tensor(4

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1890571415424347 [0m(+0.0007425248622894287)
     | > avg_loss:[92m 0.478062242269516 [0m(-0.016510285437107086)
     | > avg_log_mle:[92m -0.12471956014633179 [0m(-0.006952822208404541)
     | > avg_loss_dur:[92m 0.6027818024158478 [0m(-0.009557463228702545)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_12180.pth

[4m[1m > EPOCH: 30/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:12:03) [0m

[1m   --> TIME: 2024-04-13 11:12:19 -- STEP: 20/406 -- GLOBAL_STEP: 12200[0m
     | > loss: 0.48242872953414917  (0.4848208248615265)
     | > log_mle: -0.07988816499710083  (-0.09457372426986695)
     | > loss_dur: 0.56231689453125  (0.5793945491313934)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(2.1644, device='cuda:0')  (tensor(2.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17428332567214966 [0m(-0.014773815870285034)
     | > avg_loss:[92m 0.4444078579545021 [0m(-0.033654384315013885)
     | > avg_log_mle:[92m -0.13462243974208832 [0m(-0.00990287959575653)
     | > avg_loss_dur:[92m 0.5790302976965904 [0m(-0.023751504719257355)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_12586.pth

[4m[1m > EPOCH: 31/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:14:53) [0m

[1m   --> TIME: 2024-04-13 11:15:08 -- STEP: 14/406 -- GLOBAL_STEP: 12600[0m
     | > loss: 0.4449005722999573  (0.4608378069741385)
     | > log_mle: -0.11840927600860596  (-0.10369018997464861)
     | > loss_dur: 0.5633098483085632  (0.5645279969487872)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(4.5934, device='cuda:0')  (tensor(2.5

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18009895086288452 [0m(+0.005815625190734863)
     | > avg_loss:[92m 0.41839005053043365 [0m(-0.02601780742406845)
     | > avg_log_mle:[92m -0.14265939593315125 [0m(-0.008036956191062927)
     | > avg_loss_dur:[92m 0.5610494464635849 [0m(-0.017980851233005524)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_12992.pth

[4m[1m > EPOCH: 32/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:17:44) [0m

[1m   --> TIME: 2024-04-13 11:17:58 -- STEP: 8/406 -- GLOBAL_STEP: 13000[0m
     | > loss: 0.44554752111434937  (0.44742725789546967)
     | > log_mle: -0.1200869083404541  (-0.10619708895683289)
     | > loss_dur: 0.5656344294548035  (0.5536243468523026)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(2.6151, device='cuda:0')  (tensor(2.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1884550154209137 [0m(+0.008356064558029175)
     | > avg_loss:[92m 0.4019618481397629 [0m(-0.016428202390670776)
     | > avg_log_mle:[92m -0.14707529544830322 [0m(-0.0044158995151519775)
     | > avg_loss_dur:[92m 0.5490371435880661 [0m(-0.012012302875518799)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_13398.pth

[4m[1m > EPOCH: 33/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:20:34) [0m

[1m   --> TIME: 2024-04-13 11:20:46 -- STEP: 2/406 -- GLOBAL_STEP: 13400[0m
     | > loss: 0.4215177893638611  (0.4163089096546173)
     | > log_mle: -0.12446403503417969  (-0.11816555261611938)
     | > loss_dur: 0.5459818243980408  (0.5344744622707367)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(3.6612, device='cuda:0')  (tensor(3.0

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17725899815559387 [0m(-0.011196017265319824)
     | > avg_loss:[92m 0.38440174981951714 [0m(-0.017560098320245743)
     | > avg_log_mle:[92m -0.15391188859939575 [0m(-0.006836593151092529)
     | > avg_loss_dur:[92m 0.5383136384189129 [0m(-0.010723505169153214)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_13804.pth

[4m[1m > EPOCH: 34/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:23:25) [0m

[1m   --> TIME: 2024-04-13 11:23:42 -- STEP: 21/406 -- GLOBAL_STEP: 13825[0m
     | > loss: 0.4082459807395935  (0.4041555864470346)
     | > log_mle: -0.12576937675476074  (-0.12016322499229795)
     | > loss_dur: 0.5340153574943542  (0.5243188114393327)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(4.4462, device='cuda:0')  (tensor(3

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18699637055397034 [0m(+0.009737372398376465)
     | > avg_loss:[92m 0.3677310422062874 [0m(-0.01667070761322975)
     | > avg_log_mle:[92m -0.15802158415317535 [0m(-0.004109695553779602)
     | > avg_loss_dur:[92m 0.5257526263594627 [0m(-0.01256101205945015)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_14210.pth

[4m[1m > EPOCH: 35/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:26:16) [0m

[1m   --> TIME: 2024-04-13 11:26:31 -- STEP: 15/406 -- GLOBAL_STEP: 14225[0m
     | > loss: 0.35580071806907654  (0.38879772822062175)
     | > log_mle: -0.12814438343048096  (-0.12705492973327637)
     | > loss_dur: 0.4839451014995575  (0.515852657953898)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(7.3667, device='cuda:0')  (tensor(4.7

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1747904121875763 [0m(-0.012205958366394043)
     | > avg_loss:[92m 0.34779704734683037 [0m(-0.019933994859457016)
     | > avg_log_mle:[92m -0.16540642082691193 [0m(-0.007384836673736572)
     | > avg_loss_dur:[92m 0.5132034681737423 [0m(-0.012549158185720444)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_14616.pth

[4m[1m > EPOCH: 36/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:29:07) [0m

[1m   --> TIME: 2024-04-13 11:29:22 -- STEP: 9/406 -- GLOBAL_STEP: 14625[0m
     | > loss: 0.372841477394104  (0.3732890751626756)
     | > log_mle: -0.1299206018447876  (-0.12907588481903076)
     | > loss_dur: 0.5027620792388916  (0.5023649599817064)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(12.1565, device='cuda:0')  (tensor(6.56

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17180228233337402 [0m(-0.0029881298542022705)
     | > avg_loss:[92m 0.3366844691336155 [0m(-0.011112578213214874)
     | > avg_log_mle:[92m -0.16756191849708557 [0m(-0.002155497670173645)
     | > avg_loss_dur:[92m 0.5042463876307011 [0m(-0.00895708054304123)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_15022.pth

[4m[1m > EPOCH: 37/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:31:58) [0m

[1m   --> TIME: 2024-04-13 11:32:11 -- STEP: 3/406 -- GLOBAL_STEP: 15025[0m
     | > loss: 0.3690323829650879  (0.3446590006351471)
     | > log_mle: -0.13939177989959717  (-0.14019389947255453)
     | > loss_dur: 0.5084241628646851  (0.4848529001077016)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(3.4142, device='cuda:0')  (tensor(3.9

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19202253222465515 [0m(+0.020220249891281128)
     | > avg_loss:[92m 0.32366209477186203 [0m(-0.013022374361753464)
     | > avg_log_mle:[92m -0.17109082639217377 [0m(-0.003528907895088196)
     | > avg_loss_dur:[92m 0.4947529211640358 [0m(-0.009493466466665268)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_15428.pth

[4m[1m > EPOCH: 38/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:34:48) [0m

[1m   --> TIME: 2024-04-13 11:35:05 -- STEP: 22/406 -- GLOBAL_STEP: 15450[0m
     | > loss: 0.3132335841655731  (0.34318235652013257)
     | > log_mle: -0.13724792003631592  (-0.140431669625369)
     | > loss_dur: 0.45048150420188904  (0.4836140261455016)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(6.3867, device='cuda:0')  (tensor(5

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18440601229667664 [0m(-0.007616519927978516)
     | > avg_loss:[92m 0.30297691002488136 [0m(-0.020685184746980667)
     | > avg_log_mle:[92m -0.17891259491443634 [0m(-0.007821768522262573)
     | > avg_loss_dur:[92m 0.4818895049393177 [0m(-0.012863416224718094)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_15834.pth

[4m[1m > EPOCH: 39/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:37:49) [0m

[1m   --> TIME: 2024-04-13 11:38:05 -- STEP: 16/406 -- GLOBAL_STEP: 15850[0m
     | > loss: 0.3475199043750763  (0.32255532406270504)
     | > log_mle: -0.14825963973999023  (-0.14724311977624893)
     | > loss_dur: 0.49577954411506653  (0.46979844383895397)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(6.5346, device='cuda:0')  (tenso

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17603397369384766 [0m(-0.00837203860282898)
     | > avg_loss:[92m 0.29021139815449715 [0m(-0.012765511870384216)
     | > avg_log_mle:[92m -0.18229281902313232 [0m(-0.003380224108695984)
     | > avg_loss_dur:[92m 0.47250421717762947 [0m(-0.009385287761688232)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_16240.pth

[4m[1m > EPOCH: 40/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:40:41) [0m

[1m   --> TIME: 2024-04-13 11:40:55 -- STEP: 10/406 -- GLOBAL_STEP: 16250[0m
     | > loss: 0.32367652654647827  (0.3091888129711151)
     | > log_mle: -0.14807534217834473  (-0.1491619348526001)
     | > loss_dur: 0.471751868724823  (0.4583507478237152)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(2.8627, device='cuda:0')  (tensor(4.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1927662193775177 [0m(+0.016732245683670044)
     | > avg_loss:[92m 0.27717389166355133 [0m(-0.013037506490945816)
     | > avg_log_mle:[92m -0.18653500080108643 [0m(-0.0042421817779541016)
     | > avg_loss_dur:[92m 0.46370889246463776 [0m(-0.008795324712991714)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_16646.pth

[4m[1m > EPOCH: 41/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:43:32) [0m

[1m   --> TIME: 2024-04-13 11:43:45 -- STEP: 4/406 -- GLOBAL_STEP: 16650[0m
     | > loss: 0.3341292440891266  (0.29914844781160355)
     | > log_mle: -0.13641464710235596  (-0.15432536602020264)
     | > loss_dur: 0.47054389119148254  (0.4534738138318062)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(2.2151, device='cuda:0')  (tens

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18960922956466675 [0m(-0.003156989812850952)
     | > avg_loss:[92m 0.2624487392604351 [0m(-0.014725152403116226)
     | > avg_log_mle:[92m -0.1940830647945404 [0m(-0.0075480639934539795)
     | > avg_loss_dur:[92m 0.4565318040549755 [0m(-0.007177088409662247)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_17052.pth

[4m[1m > EPOCH: 42/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:46:23) [0m

[1m   --> TIME: 2024-04-13 11:46:40 -- STEP: 23/406 -- GLOBAL_STEP: 17075[0m
     | > loss: 0.27186688780784607  (0.2834196971810383)
     | > log_mle: -0.1575636863708496  (-0.15839380285014276)
     | > loss_dur: 0.4294305741786957  (0.44181350003118103)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(9.3469, device='cuda:0')  (tensor

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17347580194473267 [0m(-0.016133427619934082)
     | > avg_loss:[92m 0.25235873460769653 [0m(-0.010090004652738571)
     | > avg_log_mle:[92m -0.19612805545330048 [0m(-0.002044990658760071)
     | > avg_loss_dur:[92m 0.448486790060997 [0m(-0.0080450139939785)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_17458.pth

[4m[1m > EPOCH: 43/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:49:14) [0m

[1m   --> TIME: 2024-04-13 11:49:30 -- STEP: 17/406 -- GLOBAL_STEP: 17475[0m
     | > loss: 0.28350356221199036  (0.2730693221092224)
     | > log_mle: -0.16236400604248047  (-0.1652516056509579)
     | > loss_dur: 0.4458675682544708  (0.4383209277601803)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(2.7046, device='cuda:0')  (tensor(4.89

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1722007393836975 [0m(-0.0012750625610351562)
     | > avg_loss:[92m 0.24032413214445114 [0m(-0.012034602463245392)
     | > avg_log_mle:[92m -0.20073150098323822 [0m(-0.004603445529937744)
     | > avg_loss_dur:[92m 0.44105563312768936 [0m(-0.007431156933307648)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_17864.pth

[4m[1m > EPOCH: 44/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:52:06) [0m

[1m   --> TIME: 2024-04-13 11:52:20 -- STEP: 11/406 -- GLOBAL_STEP: 17875[0m
     | > loss: 0.25723791122436523  (0.2588644948872653)
     | > log_mle: -0.16912639141082764  (-0.16694768992337314)
     | > loss_dur: 0.42636430263519287  (0.4258121848106384)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(8.0172, device='cuda:0')  (tenso

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19543996453285217 [0m(+0.023239225149154663)
     | > avg_loss:[92m 0.22452248260378838 [0m(-0.015801649540662766)
     | > avg_log_mle:[92m -0.20409215986728668 [0m(-0.003360658884048462)
     | > avg_loss_dur:[92m 0.42861464247107506 [0m(-0.012440990656614304)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_18270.pth

[4m[1m > EPOCH: 45/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:54:57) [0m

[1m   --> TIME: 2024-04-13 11:55:10 -- STEP: 5/406 -- GLOBAL_STEP: 18275[0m
     | > loss: 0.2642602026462555  (0.2565278947353363)
     | > log_mle: -0.1637510061264038  (-0.17074165344238282)
     | > loss_dur: 0.4280112087726593  (0.4272695481777191)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(6.5778, device='cuda:0')  (tensor(8.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18719696998596191 [0m(-0.008242994546890259)
     | > avg_loss:[92m 0.21298672258853912 [0m(-0.011535760015249252)
     | > avg_log_mle:[92m -0.20508040487766266 [0m(-0.0009882450103759766)
     | > avg_loss_dur:[92m 0.4180671274662018 [0m(-0.010547515004873276)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_18676.pth

[4m[1m > EPOCH: 46/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 11:57:48) [0m

[1m   --> TIME: 2024-04-13 11:58:05 -- STEP: 24/406 -- GLOBAL_STEP: 18700[0m
     | > loss: 0.235586017370224  (0.23394576956828436)
     | > log_mle: -0.181571364402771  (-0.17591597636540726)
     | > loss_dur: 0.417157381772995  (0.4098617459336917)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(16.9152, device='cuda:0')  (tensor(6.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17908942699432373 [0m(-0.008107542991638184)
     | > avg_loss:[92m 0.2064593993127346 [0m(-0.00652732327580452)
     | > avg_log_mle:[91m -0.2033461034297943 [0m(+0.0017343014478683472)
     | > avg_loss_dur:[92m 0.4098055027425289 [0m(-0.008261624723672867)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_19082.pth

[4m[1m > EPOCH: 47/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:00:39) [0m

[1m   --> TIME: 2024-04-13 12:00:55 -- STEP: 18/406 -- GLOBAL_STEP: 19100[0m
     | > loss: 0.2205985188484192  (0.21925365759266746)
     | > log_mle: -0.17137718200683594  (-0.18150768015119764)
     | > loss_dur: 0.3919757008552551  (0.40076133774386513)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(14.7545, device='cuda:0')  (tensor(

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18638461828231812 [0m(+0.007295191287994385)
     | > avg_loss:[92m 0.18621448799967766 [0m(-0.020244911313056946)
     | > avg_log_mle:[92m -0.21192005276679993 [0m(-0.008573949337005615)
     | > avg_loss_dur:[92m 0.3981345407664776 [0m(-0.01167096197605133)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_19488.pth

[4m[1m > EPOCH: 48/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:03:31) [0m

[1m   --> TIME: 2024-04-13 12:03:46 -- STEP: 12/406 -- GLOBAL_STEP: 19500[0m
     | > loss: 0.2027052342891693  (0.20610842108726501)
     | > log_mle: -0.191023588180542  (-0.18439901868502298)
     | > loss_dur: 0.3937288224697113  (0.390507439772288)
     | > amp_scaler: 16384.0  (16384.0)
     | > grad_norm: tensor(10.6428, device='cuda:0')  (tensor(1

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18013688921928406 [0m(-0.006247729063034058)
     | > avg_loss:[92m 0.17123359441757202 [0m(-0.014980893582105637)
     | > avg_log_mle:[92m -0.21310269832611084 [0m(-0.001182645559310913)
     | > avg_loss_dur:[92m 0.38433629274368286 [0m(-0.013798248022794724)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_19894.pth

[4m[1m > EPOCH: 49/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:06:22) [0m

[1m   --> TIME: 2024-04-13 12:06:36 -- STEP: 6/406 -- GLOBAL_STEP: 19900[0m
     | > loss: 0.1655806303024292  (0.1906689703464508)
     | > log_mle: -0.1856696605682373  (-0.18591143687566122)
     | > loss_dur: 0.3512502908706665  (0.376580407222112)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(8.7367, device='cuda:0')  (tensor(12.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1805020272731781 [0m(+0.00036513805389404297)
     | > avg_loss:[92m 0.14798471331596375 [0m(-0.023248881101608276)
     | > avg_log_mle:[92m -0.22224217653274536 [0m(-0.009139478206634521)
     | > avg_loss_dur:[92m 0.3702268898487091 [0m(-0.014109402894973755)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_20300.pth

[4m[1m > EPOCH: 50/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:09:16) [0m

[1m   --> TIME: 2024-04-13 12:09:28 -- STEP: 0/406 -- GLOBAL_STEP: 20300[0m
     | > loss: 0.1830604374408722  (0.1830604374408722)
     | > log_mle: -0.17955458164215088  (-0.17955458164215088)
     | > loss_dur: 0.36261501908302307  (0.36261501908302307)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(5.0577, device='cuda:0')  (tensor

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1911570429801941 [0m(+0.010655015707015991)
     | > avg_loss:[92m 0.13855064287781715 [0m(-0.009434070438146591)
     | > avg_log_mle:[92m -0.22422802448272705 [0m(-0.0019858479499816895)
     | > avg_loss_dur:[92m 0.3627786673605442 [0m(-0.007448222488164902)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_20706.pth

[4m[1m > EPOCH: 51/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:12:08) [0m

[1m   --> TIME: 2024-04-13 12:12:24 -- STEP: 19/406 -- GLOBAL_STEP: 20725[0m
     | > loss: 0.19483941793441772  (0.16333269445519696)
     | > log_mle: -0.1802835464477539  (-0.1950667092674657)
     | > loss_dur: 0.37512296438217163  (0.35839940372266266)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(15.3311, device='cuda:0')  (tenso

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18204399943351746 [0m(-0.009113043546676636)
     | > avg_loss:[92m 0.11631568521261215 [0m(-0.022234957665205002)
     | > avg_log_mle:[92m -0.23083896934986115 [0m(-0.006610944867134094)
     | > avg_loss_dur:[92m 0.3471546545624733 [0m(-0.015624012798070908)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_21112.pth

[4m[1m > EPOCH: 52/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:15:00) [0m

[1m   --> TIME: 2024-04-13 12:15:15 -- STEP: 13/406 -- GLOBAL_STEP: 21125[0m
     | > loss: 0.1285478174686432  (0.1423276410653041)
     | > log_mle: -0.20604944229125977  (-0.1987862862073458)
     | > loss_dur: 0.33459725975990295  (0.3411139272726499)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(18.2647, device='cuda:0')  (tensor(

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19337674975395203 [0m(+0.01133275032043457)
     | > avg_loss:[92m 0.10345371440052986 [0m(-0.01286197081208229)
     | > avg_log_mle:[92m -0.2351304292678833 [0m(-0.004291459918022156)
     | > avg_loss_dur:[92m 0.33858414366841316 [0m(-0.008570510894060135)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_21518.pth

[4m[1m > EPOCH: 53/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:17:52) [0m

[1m   --> TIME: 2024-04-13 12:18:06 -- STEP: 7/406 -- GLOBAL_STEP: 21525[0m
     | > loss: 0.11566710472106934  (0.1182835144656045)
     | > log_mle: -0.19116508960723877  (-0.19795097623552596)
     | > loss_dur: 0.3068321943283081  (0.31623449070113047)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(39.1459, device='cuda:0')  (tensor(1

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18477797508239746 [0m(-0.008598774671554565)
     | > avg_loss:[92m 0.08042037114501 [0m(-0.023033343255519867)
     | > avg_log_mle:[92m -0.23681031167507172 [0m(-0.0016798824071884155)
     | > avg_loss_dur:[92m 0.3172306828200817 [0m(-0.02135346084833145)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_21924.pth

[4m[1m > EPOCH: 54/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:20:44) [0m

[1m   --> TIME: 2024-04-13 12:20:55 -- STEP: 1/406 -- GLOBAL_STEP: 21925[0m
     | > loss: 0.0753680169582367  (0.0753680169582367)
     | > log_mle: -0.20502924919128418  (-0.20502924919128418)
     | > loss_dur: 0.2803972661495209  (0.2803972661495209)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(24.2684, device='cuda:0')  (tensor(24.2

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17684122920036316 [0m(-0.007936745882034302)
     | > avg_loss:[92m 0.05845118686556816 [0m(-0.021969184279441833)
     | > avg_log_mle:[92m -0.24145832657814026 [0m(-0.0046480149030685425)
     | > avg_loss_dur:[92m 0.2999095134437084 [0m(-0.01732116937637329)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_22330.pth

[4m[1m > EPOCH: 55/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:23:35) [0m

[1m   --> TIME: 2024-04-13 12:23:52 -- STEP: 20/406 -- GLOBAL_STEP: 22350[0m
     | > loss: 0.1060950756072998  (0.08616143465042114)
     | > log_mle: -0.19201242923736572  (-0.20761603116989136)
     | > loss_dur: 0.2981075048446655  (0.2937774658203125)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(20.2721, device='cuda:0')  (tensor

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.17983800172805786 [0m(+0.002996772527694702)
     | > avg_loss:[92m 0.04348096251487732 [0m(-0.014970224350690842)
     | > avg_log_mle:[91m -0.24012744426727295 [0m(+0.0013308823108673096)
     | > avg_loss_dur:[92m 0.28360840678215027 [0m(-0.01630110666155815)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_22736.pth

[4m[1m > EPOCH: 56/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:26:27) [0m

[1m   --> TIME: 2024-04-13 12:26:42 -- STEP: 14/406 -- GLOBAL_STEP: 22750[0m
     | > loss: 0.06515821814537048  (0.06105044590575354)
     | > log_mle: -0.22460424900054932  (-0.21278507368905203)
     | > loss_dur: 0.2897624671459198  (0.27383551959480557)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(12.0947, device='cuda:0')  (ten

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18564507365226746 [0m(+0.005807071924209595)
     | > avg_loss:[92m 0.030592259019613266 [0m(-0.012888703495264053)
     | > avg_log_mle:[92m -0.24151863157749176 [0m(-0.001391187310218811)
     | > avg_loss_dur:[92m 0.272110890597105 [0m(-0.011497516185045242)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_23142.pth

[4m[1m > EPOCH: 57/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:29:19) [0m

[1m   --> TIME: 2024-04-13 12:29:33 -- STEP: 8/406 -- GLOBAL_STEP: 23150[0m
     | > loss: 0.01937776803970337  (0.04480874352157116)
     | > log_mle: -0.23129689693450928  (-0.21459059417247772)
     | > loss_dur: 0.25067466497421265  (0.2593993376940489)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(21.9343, device='cuda:0')  (tenso

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1721659004688263 [0m(-0.013479173183441162)
     | > avg_loss:[92m 0.0175472479313612 [0m(-0.013045011088252068)
     | > avg_log_mle:[92m -0.24782006442546844 [0m(-0.006301432847976685)
     | > avg_loss_dur:[92m 0.26536731235682964 [0m(-0.006743578240275383)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_23548.pth

[4m[1m > EPOCH: 58/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:32:11) [0m

[1m   --> TIME: 2024-04-13 12:32:22 -- STEP: 2/406 -- GLOBAL_STEP: 23550[0m
     | > loss: 0.014573410153388977  (0.018739238381385803)
     | > log_mle: -0.2335362434387207  (-0.22721105813980103)
     | > loss_dur: 0.24810965359210968  (0.24595029652118683)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(26.0183, device='cuda:0')  (tens

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1879999041557312 [0m(+0.015834003686904907)
     | > avg_loss:[92m 0.007478184998035431 [0m(-0.010069062933325768)
     | > avg_log_mle:[92m -0.2490127980709076 [0m(-0.001192733645439148)
     | > avg_loss_dur:[92m 0.256490983068943 [0m(-0.00887632928788662)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_23954.pth

[4m[1m > EPOCH: 59/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:35:02) [0m

[1m   --> TIME: 2024-04-13 12:35:19 -- STEP: 21/406 -- GLOBAL_STEP: 23975[0m
     | > loss: 0.04427897930145264  (0.02771056266058059)
     | > log_mle: -0.22822439670562744  (-0.2222804625829061)
     | > loss_dur: 0.2725033760070801  (0.24999102524348668)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(24.4605, device='cuda:0')  (tensor(1

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18581625819206238 [0m(-0.0021836459636688232)
     | > avg_loss:[92m -0.0033404696732759476 [0m(-0.010818654671311378)
     | > avg_log_mle:[92m -0.25297509133815765 [0m(-0.003962293267250061)
     | > avg_loss_dur:[92m 0.2496346216648817 [0m(-0.0068563614040613174)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_24360.pth

[4m[1m > EPOCH: 60/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:37:54) [0m

[1m   --> TIME: 2024-04-13 12:38:09 -- STEP: 15/406 -- GLOBAL_STEP: 24375[0m
     | > loss: 0.0209692120552063  (0.012327795227368673)
     | > log_mle: -0.22261297702789307  (-0.22519155343373617)
     | > loss_dur: 0.24358218908309937  (0.23751934866110483)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(11.9976, device='cuda:0') 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1938193142414093 [0m(+0.008003056049346924)
     | > avg_loss:[92m -0.015109213069081305 [0m(-0.011768743395805357)
     | > avg_log_mle:[92m -0.2609311193227768 [0m(-0.00795602798461914)
     | > avg_loss_dur:[92m 0.2458219062536955 [0m(-0.0038127154111862183)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_24766.pth

[4m[1m > EPOCH: 61/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:40:46) [0m

[1m   --> TIME: 2024-04-13 12:41:00 -- STEP: 9/406 -- GLOBAL_STEP: 24775[0m
     | > loss: 0.045720070600509644  (0.006685281793276469)
     | > log_mle: -0.22370994091033936  (-0.22729055086771646)
     | > loss_dur: 0.269430011510849  (0.23397583266099295)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(9.7263, device='cuda:0')  (tenso

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1738453209400177 [0m(-0.0199739933013916)
     | > avg_loss:[92m -0.019521374255418777 [0m(-0.004412161186337473)
     | > avg_log_mle:[92m -0.2610504776239395 [0m(-0.00011935830116271973)
     | > avg_loss_dur:[92m 0.24152910336852074 [0m(-0.004292802885174751)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_25172.pth

[4m[1m > EPOCH: 62/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:43:37) [0m

[1m   --> TIME: 2024-04-13 12:43:51 -- STEP: 3/406 -- GLOBAL_STEP: 25175[0m
     | > loss: 0.0009274929761886597  (-0.015178442001342773)
     | > log_mle: -0.23420989513397217  (-0.23655712604522705)
     | > loss_dur: 0.23513738811016083  (0.22137868404388428)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(9.7500, device='cuda:0')  (

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.17594054341316223 [0m(+0.0020952224731445312)
     | > avg_loss:[92m -0.02994377538561821 [0m(-0.010422401130199432)
     | > avg_log_mle:[92m -0.2705373615026474 [0m(-0.009486883878707886)
     | > avg_loss_dur:[92m 0.2405935861170292 [0m(-0.0009355172514915466)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_25578.pth

[4m[1m > EPOCH: 63/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:46:30) [0m

[1m   --> TIME: 2024-04-13 12:46:46 -- STEP: 22/406 -- GLOBAL_STEP: 25600[0m
     | > loss: 0.002338990569114685  (-0.003333016552708366)
     | > log_mle: -0.22664880752563477  (-0.23257045854221692)
     | > loss_dur: 0.22898779809474945  (0.22923744198950854)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(5.6314, device='cuda:0')  

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.198157399892807 [0m(+0.022216856479644775)
     | > avg_loss:[92m -0.032673006877303123 [0m(-0.0027292314916849136)
     | > avg_log_mle:[91m -0.2665911316871643 [0m(+0.003946229815483093)
     | > avg_loss_dur:[92m 0.23391812480986118 [0m(-0.006675461307168007)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_25984.pth

[4m[1m > EPOCH: 64/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:49:22) [0m

[1m   --> TIME: 2024-04-13 12:49:37 -- STEP: 16/406 -- GLOBAL_STEP: 26000[0m
     | > loss: -0.0053776949644088745  (-0.015473752282559872)
     | > log_mle: -0.23734652996063232  (-0.2382848635315895)
     | > loss_dur: 0.23196883499622345  (0.22281111124902964)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(16.1379, device='cuda:0') 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18454048037528992 [0m(-0.01361691951751709)
     | > avg_loss:[92m -0.03428200073540211 [0m(-0.0016089938580989838)
     | > avg_log_mle:[91m -0.26516783237457275 [0m(+0.0014232993125915527)
     | > avg_loss_dur:[92m 0.23088583163917065 [0m(-0.0030322931706905365)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_26390.pth

[4m[1m > EPOCH: 65/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:52:14) [0m

[1m   --> TIME: 2024-04-13 12:52:28 -- STEP: 10/406 -- GLOBAL_STEP: 26400[0m
     | > loss: -0.028206154704093933  (-0.01857522428035736)
     | > log_mle: -0.23467957973480225  (-0.23652410507202148)
     | > loss_dur: 0.2064734250307083  (0.21794888079166413)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(19.3593, device='cuda:0')

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1776193380355835 [0m(-0.006921142339706421)
     | > avg_loss:[92m -0.04295210354030132 [0m(-0.008670102804899216)
     | > avg_log_mle:[92m -0.27109451591968536 [0m(-0.00592668354511261)
     | > avg_loss_dur:[92m 0.22814241237938404 [0m(-0.002743419259786606)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_26796.pth

[4m[1m > EPOCH: 66/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:55:21) [0m

[1m   --> TIME: 2024-04-13 12:55:34 -- STEP: 4/406 -- GLOBAL_STEP: 26800[0m
     | > loss: -0.015420734882354736  (-0.03502733260393143)
     | > log_mle: -0.22614383697509766  (-0.2453635036945343)
     | > loss_dur: 0.21072310209274292  (0.21033617109060287)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(36.6357, device='cuda:0')  (te

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1817692220211029 [0m(+0.004149883985519409)
     | > avg_loss:[92m -0.055771881714463234 [0m(-0.012819778174161911)
     | > avg_log_mle:[92m -0.2811603844165802 [0m(-0.010065868496894836)
     | > avg_loss_dur:[92m 0.22538850270211697 [0m(-0.0027539096772670746)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_27202.pth

[4m[1m > EPOCH: 67/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 12:58:14) [0m

[1m   --> TIME: 2024-04-13 12:58:31 -- STEP: 23/406 -- GLOBAL_STEP: 27225[0m
     | > loss: -0.0214831680059433  (-0.031915566843488945)
     | > log_mle: -0.24063241481781006  (-0.24410033226013184)
     | > loss_dur: 0.21914924681186676  (0.21218476541664288)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(30.1947, device='cuda:0')  

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1983109712600708 [0m(+0.016541749238967896)
     | > avg_loss:[92m -0.057952841743826866 [0m(-0.002180960029363632)
     | > avg_log_mle:[91m -0.28026774525642395 [0m(+0.00089263916015625)
     | > avg_loss_dur:[92m 0.22231490351259708 [0m(-0.003073599189519882)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_27608.pth

[4m[1m > EPOCH: 68/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:01:06) [0m

[1m   --> TIME: 2024-04-13 13:01:22 -- STEP: 17/406 -- GLOBAL_STEP: 27625[0m
     | > loss: -0.04014827311038971  (-0.034802514840574825)
     | > log_mle: -0.24416136741638184  (-0.24496811978957234)
     | > loss_dur: 0.20401309430599213  (0.2101656049489975)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(14.4942, device='cuda:0')  (

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18598219752311707 [0m(-0.012328773736953735)
     | > avg_loss:[92m -0.06295204907655716 [0m(-0.004999207332730293)
     | > avg_log_mle:[92m -0.2839347869157791 [0m(-0.0036670416593551636)
     | > avg_loss_dur:[92m 0.22098273783922195 [0m(-0.0013321656733751297)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_28014.pth

[4m[1m > EPOCH: 69/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:03:59) [0m

[1m   --> TIME: 2024-04-13 13:04:13 -- STEP: 11/406 -- GLOBAL_STEP: 28025[0m
     | > loss: -0.04398852586746216  (-0.04751195826313712)
     | > log_mle: -0.25206637382507324  (-0.25100709091533313)
     | > loss_dur: 0.20807784795761108  (0.20349513265219601)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(31.8099, device='cuda:0') 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1886301040649414 [0m(+0.002647906541824341)
     | > avg_loss:[91m -0.06256827339529991 [0m(+0.0003837756812572479)
     | > avg_log_mle:[91m -0.28291183710098267 [0m(+0.0010229498147964478)
     | > avg_loss_dur:[92m 0.22034356370568275 [0m(-0.0006391741335391998)


[4m[1m > EPOCH: 70/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:06:50) [0m

[1m   --> TIME: 2024-04-13 13:07:04 -- STEP: 5/406 -- GLOBAL_STEP: 28425[0m
     | > loss: -0.02737542986869812  (-0.051399043202400206)
     | > log_mle: -0.24485015869140625  (-0.251951789855957)
     | > loss_dur: 0.21747472882270813  (0.20055274665355682)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(20.7418, device='cuda:0')  (tensor(29.0485, device='cuda:0'))
     | > current_lr: 1.7500000000000002e-05 
     | > step_time: 0.183  (0.18641219139099122)
   

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17852023243904114 [0m(-0.010109871625900269)
     | > avg_loss:[92m -0.0731719583272934 [0m(-0.010603684931993484)
     | > avg_log_mle:[92m -0.2908957600593567 [0m(-0.007983922958374023)
     | > avg_loss_dur:[92m 0.2177238017320633 [0m(-0.002619761973619461)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_28826.pth

[4m[1m > EPOCH: 71/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:09:42) [0m

[1m   --> TIME: 2024-04-13 13:09:59 -- STEP: 24/406 -- GLOBAL_STEP: 28850[0m
     | > loss: -0.06242029368877411  (-0.055056689927975334)
     | > log_mle: -0.2640724182128906  (-0.25614527861277264)
     | > loss_dur: 0.20165212452411652  (0.2010885886847973)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(28.1690, device='cuda:0')  (ten

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17064052820205688 [0m(-0.007879704236984253)
     | > avg_loss:[92m -0.07505777105689049 [0m(-0.0018858127295970917)
     | > avg_log_mle:[92m -0.29106050729751587 [0m(-0.0001647472381591797)
     | > avg_loss_dur:[92m 0.21600273624062538 [0m(-0.001721065491437912)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_29232.pth

[4m[1m > EPOCH: 72/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:12:35) [0m

[1m   --> TIME: 2024-04-13 13:12:51 -- STEP: 18/406 -- GLOBAL_STEP: 29250[0m
     | > loss: -0.04501606523990631  (-0.0602532716261016)
     | > log_mle: -0.24890244007110596  (-0.2607316705915663)
     | > loss_dur: 0.20388637483119965  (0.2004783989654647)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(13.9438, device='cuda:0')  (

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18448638916015625 [0m(+0.013845860958099365)
     | > avg_loss:[92m -0.08130923099815845 [0m(-0.006251459941267967)
     | > avg_log_mle:[92m -0.2940511554479599 [0m(-0.0029906481504440308)
     | > avg_loss_dur:[92m 0.21274192444980145 [0m(-0.0032608117908239365)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_29638.pth

[4m[1m > EPOCH: 73/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:15:27) [0m

[1m   --> TIME: 2024-04-13 13:15:42 -- STEP: 12/406 -- GLOBAL_STEP: 29650[0m
     | > loss: -0.03867892920970917  (-0.06548480192820232)
     | > log_mle: -0.2638723850250244  (-0.2575148642063141)
     | > loss_dur: 0.22519345581531525  (0.19203006227811178)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(23.5217, device='cuda:0')  (




[1m   --> TIME: 2024-04-13 13:17:51 -- STEP: 387/406 -- GLOBAL_STEP: 30025[0m
     | > loss: -0.04750543832778931  (-0.05566446829673856)
     | > log_mle: -0.28460693359375  (-0.28070734175600753)
     | > loss_dur: 0.2371014952659607  (0.2250428734592689)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(19.0776, device='cuda:0')  (tensor(30.5816, device='cuda:0'))
     | > current_lr: 1.825e-05 
     | > step_time: 0.462  (0.32202712509983267)
     | > loader_time: 0.005  (0.006963057727468722)


[1m > EVALUATION [0m



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1710655391216278 [0m(-0.013420850038528442)
     | > avg_loss:[92m -0.08392482623457909 [0m(-0.0026155952364206314)
     | > avg_log_mle:[92m -0.2972467988729477 [0m(-0.003195643424987793)
     | > avg_loss_dur:[91m 0.2133219726383686 [0m(+0.0005800481885671616)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_30044.pth

[4m[1m > EPOCH: 74/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:18:24) [0m

[1m   --> TIME: 2024-04-13 13:18:38 -- STEP: 6/406 -- GLOBAL_STEP: 30050[0m
     | > loss: -0.09991756081581116  (-0.0738879144191742)
     | > log_mle: -0.25868237018585205  (-0.2627933621406555)
     | > loss_dur: 0.1587648093700409  (0.18890544772148132)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(12.2955, device='cuda:0')  (tens

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19202426075935364 [0m(+0.02095872163772583)
     | > avg_loss:[92m -0.0918884389102459 [0m(-0.007963612675666809)
     | > avg_log_mle:[92m -0.30177097022533417 [0m(-0.004524171352386475)
     | > avg_loss_dur:[92m 0.20988253131508827 [0m(-0.0034394413232803345)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_30450.pth

[4m[1m > EPOCH: 75/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:21:17) [0m

[1m   --> TIME: 2024-04-13 13:21:28 -- STEP: 0/406 -- GLOBAL_STEP: 30450[0m
     | > loss: -0.052547380328178406  (-0.052547380328178406)
     | > log_mle: -0.265331506729126  (-0.265331506729126)
     | > loss_dur: 0.21278412640094757  (0.21278412640094757)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(3.9818, device='cuda:0')  (tens

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17580384016036987 [0m(-0.016220420598983765)
     | > avg_loss:[92m -0.09487836621701717 [0m(-0.0029899273067712784)
     | > avg_log_mle:[92m -0.3024752140045166 [0m(-0.0007042437791824341)
     | > avg_loss_dur:[92m 0.20759684778749943 [0m(-0.0022856835275888443)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_30856.pth

[4m[1m > EPOCH: 76/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:24:09) [0m

[1m   --> TIME: 2024-04-13 13:24:25 -- STEP: 19/406 -- GLOBAL_STEP: 30875[0m
     | > loss: -0.07982505857944489  (-0.08339700494941912)
     | > log_mle: -0.25729966163635254  (-0.2711261322623805)
     | > loss_dur: 0.17747460305690765  (0.18772912731296137)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(27.3343, device='cuda:0') 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18766385316848755 [0m(+0.011860013008117676)
     | > avg_loss:[92m -0.10368075221776962 [0m(-0.008802386000752449)
     | > avg_log_mle:[92m -0.30891045928001404 [0m(-0.0064352452754974365)
     | > avg_loss_dur:[92m 0.20522970706224442 [0m(-0.0023671407252550125)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_31262.pth

[4m[1m > EPOCH: 77/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:27:01) [0m

[1m   --> TIME: 2024-04-13 13:27:16 -- STEP: 13/406 -- GLOBAL_STEP: 31275[0m
     | > loss: -0.08988465368747711  (-0.08758341119839595)
     | > log_mle: -0.2799309492111206  (-0.2732370083148663)
     | > loss_dur: 0.1900462955236435  (0.18565359711647034)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(34.4931, device='cuda:0')  (

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18999451398849487 [0m(+0.0023306608200073242)
     | > avg_loss:[91m -0.10224419459700584 [0m(+0.0014365576207637787)
     | > avg_log_mle:[91m -0.30658046901226044 [0m(+0.002329990267753601)
     | > avg_loss_dur:[92m 0.2043362744152546 [0m(-0.0008934326469898224)


[4m[1m > EPOCH: 78/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:29:52) [0m

[1m   --> TIME: 2024-04-13 13:30:06 -- STEP: 7/406 -- GLOBAL_STEP: 31675[0m
     | > loss: -0.0861654132604599  (-0.09521354734897614)
     | > log_mle: -0.2651686668395996  (-0.27145230770111084)
     | > loss_dur: 0.1790032535791397  (0.1762387603521347)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(32.2093, device='cuda:0')  (tensor(31.5002, device='cuda:0'))
     | > current_lr: 1.95e-05 
     | > step_time: 0.171  (0.17614459991455078)
     | > loader_time

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.190518319606781 [0m(+0.0005238056182861328)
     | > avg_loss:[92m -0.10474017821252346 [0m(-0.0024959836155176163)
     | > avg_log_mle:[92m -0.31119823455810547 [0m(-0.004617765545845032)
     | > avg_loss_dur:[91m 0.206458056345582 [0m(+0.0021217819303274155)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_32074.pth

[4m[1m > EPOCH: 79/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:32:44) [0m

[1m   --> TIME: 2024-04-13 13:32:56 -- STEP: 1/406 -- GLOBAL_STEP: 32075[0m
     | > loss: -0.10279811918735504  (-0.10279811918735504)
     | > log_mle: -0.2765768766403198  (-0.2765768766403198)
     | > loss_dur: 0.17377875745296478  (0.17377875745296478)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(34.0525, device='cuda:0')  (ten

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19949078559875488 [0m(+0.008972465991973877)
     | > avg_loss:[92m -0.11102832481265068 [0m(-0.00628814660012722)
     | > avg_log_mle:[92m -0.31260834634304047 [0m(-0.0014101117849349976)
     | > avg_loss_dur:[92m 0.20158002153038979 [0m(-0.004878034815192223)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_32480.pth

[4m[1m > EPOCH: 80/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:35:36) [0m

[1m   --> TIME: 2024-04-13 13:35:53 -- STEP: 20/406 -- GLOBAL_STEP: 32500[0m
     | > loss: -0.0695168673992157  (-0.09607001841068268)
     | > log_mle: -0.2621324062347412  (-0.2790830016136169)
     | > loss_dur: 0.1926155388355255  (0.18301298320293427)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(27.1922, device='cuda:0')  (ten

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.19416218996047974 [0m(-0.0053285956382751465)
     | > avg_loss:[92m -0.11385733634233475 [0m(-0.0028290115296840668)
     | > avg_log_mle:[92m -0.31709350645542145 [0m(-0.0044851601123809814)
     | > avg_loss_dur:[91m 0.2032361701130867 [0m(+0.0016561485826969147)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_32886.pth

[4m[1m > EPOCH: 81/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:38:28) [0m

[1m   --> TIME: 2024-04-13 13:38:43 -- STEP: 14/406 -- GLOBAL_STEP: 32900[0m
     | > loss: -0.09522302448749542  (-0.0974667061652456)
     | > log_mle: -0.29308414459228516  (-0.2796476568494524)
     | > loss_dur: 0.19786112010478973  (0.18218095068420684)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(28.6858, device='cuda:0') 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18128499388694763 [0m(-0.012877196073532104)
     | > avg_loss:[92m -0.12406663037836552 [0m(-0.01020929403603077)
     | > avg_log_mle:[92m -0.32242245972156525 [0m(-0.005328953266143799)
     | > avg_loss_dur:[92m 0.19835582934319973 [0m(-0.0048803407698869705)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_33292.pth

[4m[1m > EPOCH: 82/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:41:20) [0m

[1m   --> TIME: 2024-04-13 13:41:34 -- STEP: 8/406 -- GLOBAL_STEP: 33300[0m
     | > loss: -0.11372938752174377  (-0.1076347678899765)
     | > log_mle: -0.29279088973999023  (-0.28164950013160706)
     | > loss_dur: 0.17906150221824646  (0.17401473224163055)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(33.4714, device='cuda:0')  (t

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18110224604606628 [0m(-0.00018274784088134766)
     | > avg_loss:[91m -0.12269887514412403 [0m(+0.0013677552342414856)
     | > avg_log_mle:[91m -0.3200826048851013 [0m(+0.0023398548364639282)
     | > avg_loss_dur:[92m 0.1973837297409773 [0m(-0.0009720996022224426)


[4m[1m > EPOCH: 83/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:44:11) [0m

[1m   --> TIME: 2024-04-13 13:44:23 -- STEP: 2/406 -- GLOBAL_STEP: 33700[0m
     | > loss: -0.12859010696411133  (-0.12011139839887619)
     | > log_mle: -0.3016188144683838  (-0.2944025993347168)
     | > loss_dur: 0.17302870750427246  (0.1742912009358406)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(36.5418, device='cuda:0')  (tensor(24.5265, device='cuda:0'))
     | > current_lr: 2.075e-05 
     | > step_time: 0.181  (0.18458569049835205)
     | > loader_t

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19800826907157898 [0m(+0.016906023025512695)
     | > avg_loss:[91m -0.11951666697859764 [0m(+0.00318220816552639)
     | > avg_log_mle:[91m -0.3155578523874283 [0m(+0.004524752497673035)
     | > avg_loss_dur:[92m 0.19604118540883064 [0m(-0.0013425443321466446)


[4m[1m > EPOCH: 84/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:47:03) [0m

[1m   --> TIME: 2024-04-13 13:47:19 -- STEP: 21/406 -- GLOBAL_STEP: 34125[0m
     | > loss: -0.11671070754528046  (-0.10779169556640443)
     | > log_mle: -0.2940930128097534  (-0.2868859484082177)
     | > loss_dur: 0.17738230526447296  (0.1790942528418132)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(18.6591, device='cuda:0')  (tensor(24.6502, device='cuda:0'))
     | > current_lr: 2.1e-05 
     | > step_time: 0.195  (0.18272195543561662)
     | > loader_time: 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18690219521522522 [0m(-0.01110607385635376)
     | > avg_loss:[92m -0.12557853944599628 [0m(-0.0060618724673986435)
     | > avg_log_mle:[92m -0.32322148978710175 [0m(-0.007663637399673462)
     | > avg_loss_dur:[91m 0.19764295034110546 [0m(+0.0016017649322748184)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_34510.pth

[4m[1m > EPOCH: 85/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:49:55) [0m

[1m   --> TIME: 2024-04-13 13:50:10 -- STEP: 15/406 -- GLOBAL_STEP: 34525[0m
     | > loss: -0.10894618928432465  (-0.12128384709358216)
     | > log_mle: -0.28902173042297363  (-0.29362789789835614)
     | > loss_dur: 0.180075541138649  (0.17234405080477397)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(24.5354, device='cuda:0')  (

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.19453400373458862 [0m(+0.007631808519363403)
     | > avg_loss:[92m -0.13099676184356213 [0m(-0.005418222397565842)
     | > avg_log_mle:[92m -0.3262673616409302 [0m(-0.00304587185382843)
     | > avg_loss_dur:[92m 0.19527059979736805 [0m(-0.0023723505437374115)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_34916.pth

[4m[1m > EPOCH: 86/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:52:47) [0m

[1m   --> TIME: 2024-04-13 13:53:02 -- STEP: 9/406 -- GLOBAL_STEP: 34925[0m
     | > loss: -0.08774113655090332  (-0.12053436703152126)
     | > log_mle: -0.2875019311904907  (-0.2908284929063585)
     | > loss_dur: 0.1997607946395874  (0.17029412587483725)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(24.3828, device='cuda:0')  (tens

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18254157900810242 [0m(-0.011992424726486206)
     | > avg_loss:[91m -0.1257335264235735 [0m(+0.005263235419988632)
     | > avg_log_mle:[91m -0.3255961984395981 [0m(+0.0006711632013320923)
     | > avg_loss_dur:[91m 0.1998626720160246 [0m(+0.00459207221865654)


[4m[1m > EPOCH: 87/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:55:39) [0m

[1m   --> TIME: 2024-04-13 13:55:52 -- STEP: 3/406 -- GLOBAL_STEP: 35325[0m
     | > loss: -0.1307639181613922  (-0.13640190660953522)
     | > log_mle: -0.2952014207839966  (-0.29897061983744305)
     | > loss_dur: 0.16443750262260437  (0.1625687132279078)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(48.6888, device='cuda:0')  (tensor(41.4199, device='cuda:0'))
     | > current_lr: 2.175e-05 
     | > step_time: 0.1824  (0.18280919392903647)
     | > loader_time: 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1789548695087433 [0m(-0.003586709499359131)
     | > avg_loss:[92m -0.13986727222800255 [0m(-0.014133745804429054)
     | > avg_log_mle:[92m -0.3324677050113678 [0m(-0.006871506571769714)
     | > avg_loss_dur:[92m 0.19260043278336525 [0m(-0.00726223923265934)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_35728.pth

[4m[1m > EPOCH: 88/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 13:58:31) [0m

[1m   --> TIME: 2024-04-13 13:58:48 -- STEP: 22/406 -- GLOBAL_STEP: 35750[0m
     | > loss: -0.10018154978752136  (-0.12184496088461443)
     | > log_mle: -0.2862575054168701  (-0.2964196367697282)
     | > loss_dur: 0.18607595562934875  (0.1745746758851138)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(36.2249, device='cuda:0')  (tenso

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1912575364112854 [0m(+0.012302666902542114)
     | > avg_loss:[91m -0.1376494038850069 [0m(+0.0022178683429956436)
     | > avg_log_mle:[91m -0.3309934586286545 [0m(+0.0014742463827133179)
     | > avg_loss_dur:[91m 0.19334405474364758 [0m(+0.0007436219602823257)


[4m[1m > EPOCH: 89/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:01:23) [0m

[1m   --> TIME: 2024-04-13 14:01:38 -- STEP: 16/406 -- GLOBAL_STEP: 36150[0m
     | > loss: -0.11517442762851715  (-0.13185176998376846)
     | > log_mle: -0.30132436752319336  (-0.301162913441658)
     | > loss_dur: 0.1861499398946762  (0.16931114345788953)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(37.0188, device='cuda:0')  (tensor(31.7311, device='cuda:0'))
     | > current_lr: 2.2250000000000002e-05 
     | > step_time: 0.18  (0.17720268666744232)
     | 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1935117542743683 [0m(+0.0022542178630828857)
     | > avg_loss:[92m -0.14324595965445042 [0m(-0.005596555769443512)
     | > avg_log_mle:[92m -0.3353518545627594 [0m(-0.004358395934104919)
     | > avg_loss_dur:[92m 0.19210589490830898 [0m(-0.0012381598353385925)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_36540.pth

[4m[1m > EPOCH: 90/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:04:15) [0m

[1m   --> TIME: 2024-04-13 14:04:30 -- STEP: 10/406 -- GLOBAL_STEP: 36550[0m
     | > loss: -0.15342245995998383  (-0.13568832725286484)
     | > log_mle: -0.3047128915786743  (-0.30126922130584716)
     | > loss_dur: 0.1512904316186905  (0.16558089405298232)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(21.4102, device='cuda:0')  (t

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.1907961666584015 [0m(-0.002715587615966797)
     | > avg_loss:[92m -0.14656851813197136 [0m(-0.0033225584775209427)
     | > avg_log_mle:[92m -0.3355214148759842 [0m(-0.00016956031322479248)
     | > avg_loss_dur:[92m 0.18895289674401283 [0m(-0.00315299816429615)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_36946.pth

[4m[1m > EPOCH: 91/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:07:07) [0m

[1m   --> TIME: 2024-04-13 14:07:20 -- STEP: 4/406 -- GLOBAL_STEP: 36950[0m
     | > loss: -0.1193898469209671  (-0.14556143432855606)
     | > log_mle: -0.284521222114563  (-0.30351918935775757)
     | > loss_dur: 0.1651313751935959  (0.1579577550292015)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(20.5596, device='cuda:0')  (tenso

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17799153923988342 [0m(-0.012804627418518066)
     | > avg_loss:[92m -0.14930416643619537 [0m(-0.0027356483042240143)
     | > avg_log_mle:[92m -0.3397820293903351 [0m(-0.004260614514350891)
     | > avg_loss_dur:[91m 0.1904778629541397 [0m(+0.0015249662101268768)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_37352.pth

[4m[1m > EPOCH: 92/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:10:00) [0m

[1m   --> TIME: 2024-04-13 14:10:17 -- STEP: 23/406 -- GLOBAL_STEP: 37375[0m
     | > loss: -0.13445231318473816  (-0.1345943840949432)
     | > log_mle: -0.2994053363800049  (-0.303108723267265)
     | > loss_dur: 0.16495302319526672  (0.1685143391723218)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(25.4525, device='cuda:0')  (tens

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.18690750002861023 [0m(+0.008915960788726807)
     | > avg_loss:[92m -0.1566541325300932 [0m(-0.0073499660938978195)
     | > avg_log_mle:[92m -0.3454207181930542 [0m(-0.005638688802719116)
     | > avg_loss_dur:[92m 0.188766585662961 [0m(-0.0017112772911787033)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_37758.pth

[4m[1m > EPOCH: 93/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:12:52) [0m

[1m   --> TIME: 2024-04-13 14:13:08 -- STEP: 17/406 -- GLOBAL_STEP: 37775[0m
     | > loss: -0.12249279022216797  (-0.13887528373914607)
     | > log_mle: -0.29859018325805664  (-0.30509814094094667)
     | > loss_dur: 0.17609739303588867  (0.16622285720180063)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(21.2042, device='cuda:0')  (t

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.16014933586120605 [0m(-0.026758164167404175)
     | > avg_loss:[91m -0.15122381784021854 [0m(+0.005430314689874649)
     | > avg_log_mle:[91m -0.3399885594844818 [0m(+0.005432158708572388)
     | > avg_loss_dur:[92m 0.18876474164426327 [0m(-1.8440186977386475e-06)


[4m[1m > EPOCH: 94/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:15:45) [0m

[1m   --> TIME: 2024-04-13 14:15:59 -- STEP: 11/406 -- GLOBAL_STEP: 38175[0m
     | > loss: -0.15878157317638397  (-0.15117324075915597)
     | > log_mle: -0.31081438064575195  (-0.30939821763472125)
     | > loss_dur: 0.15203280746936798  (0.15822497687556528)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(41.3495, device='cuda:0')  (tensor(30.9408, device='cuda:0'))
     | > current_lr: 2.3500000000000002e-05 
     | > step_time: 0.175  (0.17935141650113193)
  

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1901036500930786 [0m(+0.02995431423187256)
     | > avg_loss:[92m -0.16158265806734562 [0m(-0.010358840227127075)
     | > avg_log_mle:[92m -0.3471989780664444 [0m(-0.0072104185819625854)
     | > avg_loss_dur:[92m 0.18561631999909878 [0m(-0.0031484216451644897)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_38570.pth

[4m[1m > EPOCH: 95/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:18:37) [0m

[1m   --> TIME: 2024-04-13 14:18:51 -- STEP: 5/406 -- GLOBAL_STEP: 38575[0m
     | > loss: -0.1272568255662918  (-0.14668910801410676)
     | > log_mle: -0.29500842094421387  (-0.30344243049621583)
     | > loss_dur: 0.16775159537792206  (0.15675332248210908)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(32.0467, device='cuda:0')  (te

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.16990989446640015 [0m(-0.020193755626678467)
     | > avg_loss:[92m -0.16169222630560398 [0m(-0.00010956823825836182)
     | > avg_log_mle:[92m -0.34737420082092285 [0m(-0.0001752227544784546)
     | > avg_loss_dur:[91m 0.18568197451531887 [0m(+6.565451622009277e-05)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_38976.pth

[4m[1m > EPOCH: 96/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:21:29) [0m

[1m   --> TIME: 2024-04-13 14:21:47 -- STEP: 24/406 -- GLOBAL_STEP: 39000[0m
     | > loss: -0.16502872109413147  (-0.14780206295351186)
     | > log_mle: -0.32185447216033936  (-0.3117801596721013)
     | > loss_dur: 0.15682575106620789  (0.16397809671858948)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(34.9061, device='cuda:0'

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.1976008415222168 [0m(+0.02769094705581665)
     | > avg_loss:[91m -0.14889066852629185 [0m(+0.012801557779312134)
     | > avg_log_mle:[91m -0.3397950679063797 [0m(+0.007579132914543152)
     | > avg_loss_dur:[91m 0.19090439938008785 [0m(+0.005222424864768982)


[4m[1m > EPOCH: 97/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:24:22) [0m

[1m   --> TIME: 2024-04-13 14:24:38 -- STEP: 18/406 -- GLOBAL_STEP: 39400[0m
     | > loss: -0.1297745704650879  (-0.15419714897871017)
     | > log_mle: -0.3041576147079468  (-0.31580517689387005)
     | > loss_dur: 0.1743830442428589  (0.16160802791515985)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(43.8614, device='cuda:0')  (tensor(31.0043, device='cuda:0'))
     | > current_lr: 2.425e-05 
     | > step_time: 0.19  (0.1802767382727729)
     | > loader_time: 0.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.18087583780288696 [0m(-0.016725003719329834)
     | > avg_loss:[92m -0.16652203910052776 [0m(-0.017631370574235916)
     | > avg_log_mle:[92m -0.3516310602426529 [0m(-0.011835992336273193)
     | > avg_loss_dur:[92m 0.18510902114212513 [0m(-0.005795378237962723)

 > BEST MODEL : c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b\best_model_39788.pth

[4m[1m > EPOCH: 98/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:27:14) [0m

[1m   --> TIME: 2024-04-13 14:27:29 -- STEP: 12/406 -- GLOBAL_STEP: 39800[0m
     | > loss: -0.14463159441947937  (-0.1582875760893027)
     | > log_mle: -0.3181033134460449  (-0.3147835036118825)
     | > loss_dur: 0.17347171902656555  (0.15649592752257982)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(24.3347, device='cuda:0')  (ten




[1m   --> TIME: 2024-04-13 14:28:34 -- STEP: 237/406 -- GLOBAL_STEP: 40025[0m
     | > loss: -0.13221123814582825  (-0.13810097114949288)
     | > log_mle: -0.3471163511276245  (-0.3269686376998194)
     | > loss_dur: 0.21490511298179626  (0.18886766655032647)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(31.6172, device='cuda:0')  (tensor(34.1840, device='cuda:0'))
     | > current_lr: 2.45e-05 
     | > step_time: 0.353  (0.2665157086738554)
     | > loader_time: 0.004  (0.009036178830303719)


[1m   --> TIME: 2024-04-13 14:28:43 -- STEP: 262/406 -- GLOBAL_STEP: 40050[0m
     | > loss: -0.12780824303627014  (-0.13860100627172994)
     | > log_mle: -0.32798540592193604  (-0.3282430444964926)
     | > loss_dur: 0.2001771628856659  (0.18964203822476267)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(36.8432, device='cuda:0')  (tensor(35.3033, device='cuda:0'))
     | > current_lr: 2.45e-05 
     | > step_time: 0.3619  (0.27569705384378207)
     

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.17417970299720764 [0m(-0.006696134805679321)
     | > avg_loss:[91m -0.16577791422605515 [0m(+0.0007441248744726181)
     | > avg_log_mle:[92m -0.35437817871570587 [0m(-0.0027471184730529785)
     | > avg_loss_dur:[91m 0.18860026448965073 [0m(+0.0034912433475255966)


[4m[1m > EPOCH: 99/100[0m
 --> c:/Users/wkaic/GitHub/Cockatoo.AI/experiments/model_c/output\run-April-13-2024_09+45AM-046eb8b

[1m > TRAINING (2024-04-13 14:30:10) [0m

[1m   --> TIME: 2024-04-13 14:30:23 -- STEP: 6/406 -- GLOBAL_STEP: 40200[0m
     | > loss: -0.18063248693943024  (-0.16544577976067862)
     | > log_mle: -0.31555676460266113  (-0.3182462652524312)
     | > loss_dur: 0.1349242776632309  (0.15280048549175262)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(11.9370, device='cuda:0')  (tensor(24.6279, device='cuda:0'))
     | > current_lr: 2.475e-05 
     | > step_time: 0.171  (0.18100074927012125)
     | > loader_

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.17894962430000305 [0m(+0.00476992130279541)
     | > avg_loss:[91m -0.15775873884558678 [0m(+0.008019175380468369)
     | > avg_log_mle:[91m -0.34596972167491913 [0m(+0.008408457040786743)
     | > avg_loss_dur:[92m 0.18821098282933235 [0m(-0.00038928166031837463)

