# Bibliotecas

In [None]:
!pip install TTS==0.15.5

Collecting TTS==0.15.5
  Downloading TTS-0.15.5-cp310-cp310-manylinux1_x86_64.whl (762 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m762.2/762.2 kB[0m [31m10.5 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting cython==0.29.30 (from TTS==0.15.5)
  Downloading Cython-0.29.30-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.manylinux_2_24_x86_64.whl (1.9 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.9/1.9 MB[0m [31m66.9 MB/s[0m eta [36m0:00:00[0m
Collecting inflect==5.6.0 (from TTS==0.15.5)
  Downloading inflect-5.6.0-py3-none-any.whl (33 kB)
Collecting anyascii (from TTS==0.15.5)
  Downloading anyascii-0.3.2-py3-none-any.whl (289 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m289.9/289.9 kB[0m [31m32.4 MB/s[0m eta [36m0:00:00[0m
Collecting pysbd (from TTS==0.15.5)
  Downloading pysbd-0.3.4-py3-none-any.whl (71 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m71.1/71.1 kB[0m [31m10.3 MB/s[0

In [None]:
import os
import glob
import zipfile
from google.colab import drive
import pandas as pd
import warnings

from TTS.config.shared_configs import BaseAudioConfig, BaseDatasetConfig
from TTS.tts.datasets import load_tts_samples

from trainer import Trainer, TrainerArgs
from TTS.tts.models.glow_tts import GlowTTS
from TTS.tts.configs.glow_tts_config import GlowTTSConfig
from TTS.tts.utils.text.tokenizer import TTSTokenizer
from TTS.utils.audio import AudioProcessor

warnings.filterwarnings('ignore')

# Base de dados

In [None]:
# Drive para chave API do Kaggle
drive.mount('/content/drive', force_remount=True)
drive_path = '/content/drive/MyDrive/'
kaggle_json_file = 'kaggle.json'

# Download do dataset diretamente do Kaggle
!mkdir -p ~/.kaggle
!cp "{drive_path}/{kaggle_json_file}" ~/.kaggle/
!chmod 600 ~/.kaggle/kaggle.json
!kaggle datasets download -d mediatechlab/gneutralspeech

zip_file = 'gneutralspeech.zip'
with zipfile.ZipFile(zip_file, 'r') as zip_ref:
    zip_ref.extractall()
os.remove(zip_file)

Mounted at /content/drive
Downloading gneutralspeech.zip to /content
100% 4.75G/4.76G [00:47<00:00, 183MB/s]
100% 4.76G/4.76G [00:47<00:00, 108MB/s]


In [None]:
# Carregar o arquivo CSV
df_voz = pd.read_csv("/content/voz_base_44kHz_16bit/metadata_voz_base_norm.csv", delimiter="|", header=None, engine='python', error_bad_lines=False)

# Criar um novo dataframe usando as colunas desejadas e separando-as com " | "
metadata_voz_df = df_voz[[0, 1]].apply(lambda x: ' | '.join(x.astype(str)), axis=1)

# Salvar o dataframe no formato de arquivo .txt
with open('/content/metadata_voz.txt', 'w') as f:
    for line in metadata_voz_df:
        f.write(f"{line}\n")

files_path = [filename.split('/')[-1].split('.')[0] for filename in glob.glob('/content/voz_base_44kHz_16bit/wavs/*.wav')]
lines_list = []

# Formatar os nomes dos arquivos na base de metadados
with open('/content/metadata_voz.txt', 'r') as metadata:
  for line in metadata.readlines():
    filename, txt = line.strip().split('|')
    filename = filename.strip()
    txt = txt.strip()
    if filename in files_path:
      lines_list.append('|'.join([filename, txt])+'\n')

# Escrever um novo arquivo de metadado corrigido
with open('/content/metadata.txt', 'w') as new_metadata:
  for line in lines_list[:800]:
    new_metadata.write(line)

In [None]:
# Configurações de dataset
dataset_config = BaseDatasetConfig(formatter='thorsten', meta_file_train="/content/metadata.txt", path="/content/voz_base_44kHz_16bit")

# Diretório de saída
output_path = '/content/output'
if not os.path.exists(output_path):
    os.makedirs(output_path)

# Treinamento do GlowTTS (modelo acústico)

In [None]:
# Configurações dos áudios
audio_config = BaseAudioConfig(
    sample_rate=44100,
    do_trim_silence=True,
    trim_db=60.0,
    signal_norm=False,
    mel_fmin=0.0,
    mel_fmax=8000,
    spec_gain=1.0,
    log_func="np.log",
    ref_level_db=20,
    preemphasis=0.0,
)

# Parâmetros para o treinamento do  modelo
config = GlowTTSConfig(
    audio=audio_config,
    batch_size=16,
    eval_batch_size=1,
    num_loader_workers=4,
    num_eval_loader_workers=4,
    run_eval=True,
    test_delay_epochs=-1,
    epochs=100,
    text_cleaner="phoneme_cleaners",
    use_phonemes=True,
    phoneme_language="pt",
    phoneme_cache_path=os.path.join(output_path, "phoneme_cache"),
    print_step=50,
    print_eval=False,
    mixed_precision=True,
    output_path=output_path,
    datasets=[dataset_config],
)

# Pré-processamento dos áudios
ap = AudioProcessor.init_from_config(config)

# Tokenização
tokenizer, config = TTSTokenizer.init_from_config(config)

# Carregamento dos dados de treino e validação
train_samples, eval_samples = load_tts_samples(
    dataset_config,
    eval_split=True,
    eval_split_max_size=config.eval_split_max_size,
    eval_split_size=config.eval_split_size,
)

# Inicialização do modelo
model = GlowTTS(config, ap, tokenizer, speaker_manager=None)

# Treinamento do modelo
trainer = Trainer(
    TrainerArgs(), config, output_path, model=model,
    train_samples=train_samples, eval_samples=eval_samples
)
trainer.fit()

 > Setting up Audio Processor...
 | > sample_rate:44100
 | > resample:False
 | > num_mels:80
 | > log_func:np.log
 | > min_level_db:-100
 | > frame_shift_ms:None
 | > frame_length_ms:None
 | > ref_level_db:20
 | > fft_size:1024
 | > power:1.5
 | > preemphasis:0.0
 | > griffin_lim_iters:60
 | > signal_norm:False
 | > symmetric_norm:True
 | > mel_fmin:0
 | > mel_fmax:8000
 | > pitch_fmin:1.0
 | > pitch_fmax:640.0
 | > spec_gain:1.0
 | > stft_pad_mode:reflect
 | > max_norm:4.0
 | > clip_norm:True
 | > do_trim_silence:True
 | > trim_db:60.0
 | > do_sound_norm:False
 | > do_amp_to_db_linear:True
 | > do_amp_to_db_mel:True
 | > do_rms_norm:False
 | > db_level:None
 | > stats_path:None
 | > base:2.718281828459045
 | > hop_length:256
 | > win_length:1024
 | > Found 800 files in /content/voz_base_44kHz_16bit


 > Training Environment:
 | > Backend: Torch
 | > Mixed precision: True
 | > Precision: fp16
 | > Current device: 0
 | > Num. of GPUs: 1
 | > Num. of CPUs: 2
 | > Num. of Torch Threads: 1
 | > Torch seed: 54321
 | > Torch CUDNN: True
 | > Torch CUDNN deterministic: False
 | > Torch CUDNN benchmark: False
 | > Torch TF32 MatMul: False
 > Start Tensorboard: tensorboard --logdir=/content/output/run-August-12-2023_11+43PM-0000000

 > Model has 28610257 parameters

[4m[1m > EPOCH: 0/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000


[*] Pre-computing phonemes...


 40%|████      | 317/792 [00:04<00:07, 65.94it/s]

. . , , , … .
 [!] Character '…' not found in the vocabulary. Discarding it.


 68%|██████▊   | 542/792 [00:07<00:03, 70.64it/s]

, ,  — , , .
 [!] Character '—' not found in the vocabulary. Discarding it.


100%|██████████| 792/792 [00:10<00:00, 72.70it/s]

[1m > TRAINING (2023-08-12 23:43:35) [0m




> DataLoader initialization
| > Tokenizer:
	| > add_blank: False
	| > use_eos_bos: False
	| > use_phonemes: True
	| > phonemizer:
		| > phoneme language: pt
		| > phoneme backend: gruut
	| > 2 not found characters:
	| > …
	| > —
| > Number of instances : 792
 | > Preprocessing samples
 | > Max text length: 286
 | > Min text length: 82
 | > Avg text length: 188.57449494949495
 | 
 | > Max audio length: 1257063.0
 | > Min audio length: 226548.0
 | > Avg audio length: 534580.0
 | > Num. instances discarded samples: 0
 | > Batch group size: 0.



[1m   --> TIME: 2023-08-12 23:43:56 -- STEP: 0/50 -- GLOBAL_STEP: 0[0m
     | > current_lr: 2.5e-07 
     | > step_time: 14.4543  (14.454307556152344)
     | > loader_time: 5.7283  (5.728253602981567)

 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.
 [!] `train_step()` retuned `None` outputs. Skipping training step.

[1m > EVALUATION [0m





> DataLoader initialization
| > Tokenizer:
	| > add_blank: False
	| > use_eos_bos: False
	| > use_phonemes: True
	| > phonemizer:
		| > phoneme language: pt
		| > phoneme backend: gruut
	| > 2 not found characters:
	| > …
	| > —
| > Number of instances : 8
 | > Preprocessing samples
 | > Max text length: 236
 | > Min text length: 99
 | > Avg text length: 179.0
 | 
 | > Max audio length: 634456.0
 | > Min audio length: 291262.0
 | > Avg audio length: 507695.625
 | > Num. instances discarded samples: 0
 | > Batch group size: 0.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.001940420695713588 [0m(+0)
     | > avg_loss: 26.46731458391462 [0m(+0)
     | > avg_log_mle: 1.0781050409589494 [0m(+0)
     | > avg_loss_dur: 25.38920933859689 [0m(+0)



 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.001940420695713588 [0m(+0.0)
     | > avg_loss: 26.46731458391462 [0m(+0.0)
     | > avg_log_mle: 1.0781050409589494 [0m(+0.0)
     | > avg_loss_dur: 25.38920933859689 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_50.pth

[4m[1m > EPOCH: 1/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-12 23:45:58) [0m

[1m   --> TIME: 2023-08-12 23:46:05 -- STEP: 0/50 -- GLOBAL_STEP: 50[0m
     | > loss: 26.65632438659668  (26.65632438659668)
     | > log_mle: 1.0921236276626587  (1.0921236276626587)
     | > loss_dur: 25.56420135498047  (25.56420135498047)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(29.6717, device='cuda:0')  (tensor(29.6717, device='cuda:0'))
     | > current_lr: 2.5e-07 
     | > step_time: 3.2591  (3.2590811252593994)
     | > loader_time: 3.6685  (3.6685452461242676)


[1m > EVALUATION [0m


  [1m--> EVA

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0026228087288992746 [0m(+0.0)
     | > avg_loss: 26.44022137778146 [0m(+0.0)
     | > avg_log_mle: 1.0778675930840629 [0m(+0.0)
     | > avg_loss_dur: 25.362354006086075 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_100.pth

[4m[1m > EPOCH: 2/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-12 23:47:54) [0m

[1m   --> TIME: 2023-08-12 23:47:59 -- STEP: 0/50 -- GLOBAL_STEP: 100[0m
     | > loss: 23.457963943481445  (23.457963943481445)
     | > log_mle: 1.0918407440185547  (1.0918407440185547)
     | > loss_dur: 22.36612319946289  (22.36612319946289)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(26.0661, device='cuda:0')  (tensor(26.0661, device='cuda:0'))
     | > current_lr: 5e-07 
     | > step_time: 1.5507  (1.550727128982544)
     | > loader_time: 3.1996  (3.1995561122894287)


[1m > EVALUATION [0m


  [1m--> 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.00258599008832659 [0m(+0.0)
     | > avg_loss: 27.67368643624442 [0m(+0.0)
     | > avg_log_mle: 1.077216420854841 [0m(+0.0)
     | > avg_loss_dur: 26.596469742911204 [0m(+0.0)


[4m[1m > EPOCH: 3/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-12 23:49:49) [0m

[1m   --> TIME: 2023-08-12 23:49:54 -- STEP: 0/50 -- GLOBAL_STEP: 150[0m
     | > loss: 25.875324249267578  (25.875324249267578)
     | > log_mle: 1.0915299654006958  (1.0915299654006958)
     | > loss_dur: 24.783794403076172  (24.783794403076172)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(28.5940, device='cuda:0')  (tensor(28.5940, device='cuda:0'))
     | > current_lr: 7.5e-07 
     | > step_time: 1.627  (1.6270287036895752)
     | > loader_time: 3.3132  (3.3132264614105225)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.002007722854614258 [0m(-0.0005

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002007722854614258 [0m(+0.0)
     | > avg_loss: 27.63676275525774 [0m(+0.0)
     | > avg_log_mle: 1.0760207516806466 [0m(+0.0)
     | > avg_loss_dur: 26.5607419695173 [0m(+0.0)


[4m[1m > EPOCH: 4/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-12 23:51:39) [0m

[1m   --> TIME: 2023-08-12 23:51:47 -- STEP: 0/50 -- GLOBAL_STEP: 200[0m
     | > loss: 26.901220321655273  (26.901220321655273)
     | > log_mle: 1.0902998447418213  (1.0902998447418213)
     | > loss_dur: 25.81092071533203  (25.81092071533203)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(30.4851, device='cuda:0')  (tensor(30.4851, device='cuda:0'))
     | > current_lr: 1e-06 
     | > step_time: 2.7171  (2.7171413898468018)
     | > loader_time: 4.6718  (4.671815872192383)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.0038066591535295758 [0m(+0.0017989

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0038066591535295758 [0m(+0.0)
     | > avg_loss: 28.89525740487235 [0m(+0.0)
     | > avg_log_mle: 1.0741172177450997 [0m(+0.0)
     | > avg_loss_dur: 27.82114028930664 [0m(+0.0)


[4m[1m > EPOCH: 5/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-12 23:53:34) [0m

[1m   --> TIME: 2023-08-12 23:53:40 -- STEP: 0/50 -- GLOBAL_STEP: 250[0m
     | > loss: 26.221940994262695  (26.221940994262695)
     | > log_mle: 1.0884785652160645  (1.0884785652160645)
     | > loss_dur: 25.13346290588379  (25.13346290588379)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(29.5680, device='cuda:0')  (tensor(29.5680, device='cuda:0'))
     | > current_lr: 1.2499999999999999e-06 
     | > step_time: 2.2929  (2.2929325103759766)
     | > loader_time: 3.4211  (3.4210755825042725)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.003112213952200

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0031122139522007535 [0m(+0.0)
     | > avg_loss: 29.2554383959089 [0m(+0.0)
     | > avg_log_mle: 1.071187973022461 [0m(+0.0)
     | > avg_loss_dur: 28.184250695364817 [0m(+0.0)


[4m[1m > EPOCH: 6/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-12 23:55:29) [0m

[1m   --> TIME: 2023-08-12 23:55:34 -- STEP: 0/50 -- GLOBAL_STEP: 300[0m
     | > loss: 25.302392959594727  (25.302392959594727)
     | > log_mle: 1.085363745689392  (1.085363745689392)
     | > loss_dur: 24.217029571533203  (24.217029571533203)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(28.3146, device='cuda:0')  (tensor(28.3146, device='cuda:0'))
     | > current_lr: 1.5e-06 
     | > step_time: 1.3952  (1.3952107429504395)
     | > loader_time: 3.4002  (3.400228261947632)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.0028086730412074496 [0m(-0.0003

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0028086730412074496 [0m(+0.0)
     | > avg_loss: 30.135390417916433 [0m(+0.0)
     | > avg_log_mle: 1.0666626521519251 [0m(+0.0)
     | > avg_loss_dur: 29.068728038242885 [0m(+0.0)


[4m[1m > EPOCH: 7/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-12 23:57:24) [0m

[1m   --> TIME: 2023-08-12 23:57:30 -- STEP: 0/50 -- GLOBAL_STEP: 350[0m
     | > loss: 25.973363876342773  (25.973363876342773)
     | > log_mle: 1.0811514854431152  (1.0811514854431152)
     | > loss_dur: 24.8922119140625  (24.8922119140625)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(29.3616, device='cuda:0')  (tensor(29.3616, device='cuda:0'))
     | > current_lr: 1.75e-06 
     | > step_time: 1.5637  (1.5637295246124268)
     | > loader_time: 4.8027  (4.802672386169434)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.0020201546805245535 [0m(-0.00

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0020201546805245535 [0m(+0.0)
     | > avg_loss: 29.76131166730608 [0m(+0.0)
     | > avg_log_mle: 1.0597516298294067 [0m(+0.0)
     | > avg_loss_dur: 28.70155988420759 [0m(+0.0)


[4m[1m > EPOCH: 8/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-12 23:59:16) [0m

[1m   --> TIME: 2023-08-12 23:59:24 -- STEP: 0/50 -- GLOBAL_STEP: 400[0m
     | > loss: 25.86806869506836  (25.86806869506836)
     | > log_mle: 1.0744010210037231  (1.0744010210037231)
     | > loss_dur: 24.79366683959961  (24.79366683959961)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(28.7740, device='cuda:0')  (tensor(28.7740, device='cuda:0'))
     | > current_lr: 2e-06 
     | > step_time: 2.7491  (2.7491183280944824)
     | > loader_time: 5.2578  (5.257805347442627)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.0026419503348214285 [0m(+0.0006217

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0026419503348214285 [0m(+0.0)
     | > avg_loss: 29.493578229631698 [0m(+0.0)
     | > avg_log_mle: 1.0496142762047904 [0m(+0.0)
     | > avg_loss_dur: 28.443963732038224 [0m(+0.0)


[4m[1m > EPOCH: 9/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:01:08) [0m

[1m   --> TIME: 2023-08-13 00:01:13 -- STEP: 0/50 -- GLOBAL_STEP: 450[0m
     | > loss: 26.564054489135742  (26.564054489135742)
     | > log_mle: 1.0640860795974731  (1.0640860795974731)
     | > loss_dur: 25.499967575073242  (25.499967575073242)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(28.8348, device='cuda:0')  (tensor(28.8348, device='cuda:0'))
     | > current_lr: 2.25e-06 
     | > step_time: 2.1682  (2.1681630611419678)
     | > loader_time: 3.2103  (3.2102863788604736)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.003102166312081473 [0m(+

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.003102166312081473 [0m(+0.0)
     | > avg_loss: 28.93283544267927 [0m(+0.0)
     | > avg_log_mle: 1.0354631458009993 [0m(+0.0)
     | > avg_loss_dur: 27.89737238202776 [0m(+0.0)


[4m[1m > EPOCH: 10/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:03:02) [0m

[1m   --> TIME: 2023-08-13 00:03:07 -- STEP: 0/50 -- GLOBAL_STEP: 500[0m
     | > loss: 26.48465347290039  (26.48465347290039)
     | > log_mle: 1.0490944385528564  (1.0490944385528564)
     | > loss_dur: 25.435558319091797  (25.435558319091797)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(27.9425, device='cuda:0')  (tensor(27.9425, device='cuda:0'))
     | > current_lr: 2.4999999999999998e-06 
     | > step_time: 1.4742  (1.4742381572723389)
     | > loader_time: 3.1912  (3.191190481185913)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.0029093538011823

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0029093538011823383 [0m(+0.0)
     | > avg_loss: 28.559900556291854 [0m(+0.0)
     | > avg_log_mle: 1.0167917353766305 [0m(+0.0)
     | > avg_loss_dur: 27.543108531406947 [0m(+0.0)


[4m[1m > EPOCH: 11/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:04:57) [0m

[1m   --> TIME: 2023-08-13 00:05:02 -- STEP: 0/50 -- GLOBAL_STEP: 550[0m
     | > loss: 26.02890968322754  (26.02890968322754)
     | > log_mle: 1.0298669338226318  (1.0298669338226318)
     | > loss_dur: 24.999042510986328  (24.999042510986328)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(26.4876, device='cuda:0')  (tensor(26.4876, device='cuda:0'))
     | > current_lr: 2.75e-06 
     | > step_time: 1.6528  (1.6527626514434814)
     | > loader_time: 3.1474  (3.147446632385254)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.002599409648350307 [0m(-0.

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002599409648350307 [0m(+0.0)
     | > avg_loss: 28.556949615478516 [0m(+0.0)
     | > avg_log_mle: 0.9942120909690857 [0m(+0.0)
     | > avg_loss_dur: 27.562737601143972 [0m(+0.0)


[4m[1m > EPOCH: 12/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:06:50) [0m

[1m   --> TIME: 2023-08-13 00:06:57 -- STEP: 0/50 -- GLOBAL_STEP: 600[0m
     | > loss: 25.987871170043945  (25.987871170043945)
     | > log_mle: 1.006934404373169  (1.006934404373169)
     | > loss_dur: 24.98093605041504  (24.98093605041504)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(25.3429, device='cuda:0')  (tensor(25.3429, device='cuda:0'))
     | > current_lr: 3e-06 
     | > step_time: 1.6157  (1.6157341003417969)
     | > loader_time: 5.1867  (5.18671441078186)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.002511092594691685 [0m(-8.8317053

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002511092594691685 [0m(+0.0)
     | > avg_loss: 28.106936591012136 [0m(+0.0)
     | > avg_log_mle: 0.9704767976488385 [0m(+0.0)
     | > avg_loss_dur: 27.136460168021067 [0m(+0.0)


[4m[1m > EPOCH: 13/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:08:41) [0m

[1m   --> TIME: 2023-08-13 00:08:48 -- STEP: 0/50 -- GLOBAL_STEP: 650[0m
     | > loss: 25.689516067504883  (25.689516067504883)
     | > log_mle: 0.9815424084663391  (0.9815424084663391)
     | > loss_dur: 24.70797348022461  (24.70797348022461)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(24.0532, device='cuda:0')  (tensor(24.0532, device='cuda:0'))
     | > current_lr: 3.25e-06 
     | > step_time: 2.48  (2.480015754699707)
     | > loader_time: 5.2468  (5.246845483779907)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.0021647044590541293 [0m(-0.000

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0021647044590541293 [0m(+0.0)
     | > avg_loss: 27.357282638549805 [0m(+0.0)
     | > avg_log_mle: 0.9473785417420524 [0m(+0.0)
     | > avg_loss_dur: 26.40990420750209 [0m(+0.0)


[4m[1m > EPOCH: 14/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:10:31) [0m

[1m   --> TIME: 2023-08-13 00:10:36 -- STEP: 0/50 -- GLOBAL_STEP: 700[0m
     | > loss: 25.060739517211914  (25.060739517211914)
     | > log_mle: 0.9576022624969482  (0.9576022624969482)
     | > loss_dur: 24.103137969970703  (24.103137969970703)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(23.4337, device='cuda:0')  (tensor(23.4337, device='cuda:0'))
     | > current_lr: 3.5e-06 
     | > step_time: 1.4817  (1.4817216396331787)
     | > loader_time: 3.5025  (3.502476930618286)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.0027330262320382254 [0m(+0

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0027330262320382254 [0m(+0.0)
     | > avg_loss: 26.731351579938615 [0m(+0.0)
     | > avg_log_mle: 0.924613322530474 [0m(+0.0)
     | > avg_loss_dur: 25.806738444737025 [0m(+0.0)


[4m[1m > EPOCH: 15/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:12:24) [0m

[1m   --> TIME: 2023-08-13 00:12:29 -- STEP: 0/50 -- GLOBAL_STEP: 750[0m
     | > loss: 24.46186637878418  (24.46186637878418)
     | > log_mle: 0.9324480295181274  (0.9324480295181274)
     | > loss_dur: 23.5294189453125  (23.5294189453125)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(22.0217, device='cuda:0')  (tensor(22.0217, device='cuda:0'))
     | > current_lr: 3.7499999999999997e-06 
     | > step_time: 1.5865  (1.5865161418914795)
     | > loader_time: 3.5777  (3.5776820182800293)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[91m 0.00276265825544084

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002762658255440848 [0m(+0.0)
     | > avg_loss: 26.088875361851283 [0m(+0.0)
     | > avg_log_mle: 0.9010975786617824 [0m(+0.0)
     | > avg_loss_dur: 25.187778200422013 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_800.pth

[4m[1m > EPOCH: 16/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:14:17) [0m

[1m   --> TIME: 2023-08-13 00:14:25 -- STEP: 0/50 -- GLOBAL_STEP: 800[0m
     | > loss: 23.470014572143555  (23.470014572143555)
     | > log_mle: 0.9090144634246826  (0.9090144634246826)
     | > loss_dur: 22.56100082397461  (22.56100082397461)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(21.2025, device='cuda:0')  (tensor(21.2025, device='cuda:0'))
     | > current_lr: 4e-06 
     | > step_time: 2.1651  (2.1650991439819336)
     | > loader_time: 5.2746  (5.274641990661621)


[1m > EVALUATION [0m


  [1m-->

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002036571502685547 [0m(+0.0)
     | > avg_loss: 25.512663432529994 [0m(+0.0)
     | > avg_log_mle: 0.873758784362248 [0m(+0.0)
     | > avg_loss_dur: 24.638904571533203 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_850.pth

[4m[1m > EPOCH: 17/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:16:11) [0m

[1m   --> TIME: 2023-08-13 00:16:18 -- STEP: 0/50 -- GLOBAL_STEP: 850[0m
     | > loss: 23.22522735595703  (23.22522735595703)
     | > log_mle: 0.8820022940635681  (0.8820022940635681)
     | > loss_dur: 22.343225479125977  (22.343225479125977)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(20.8167, device='cuda:0')  (tensor(20.8167, device='cuda:0'))
     | > current_lr: 4.25e-06 
     | > step_time: 2.6943  (2.6943485736846924)
     | > loader_time: 4.4734  (4.473425388336182)


[1m > EVALUATION [0m


  [1m-

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0029139178139822824 [0m(+0.0)
     | > avg_loss: 24.833547047206334 [0m(+0.0)
     | > avg_log_mle: 0.8373908741133553 [0m(+0.0)
     | > avg_loss_dur: 23.99615614754813 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_900.pth

[4m[1m > EPOCH: 18/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:18:03) [0m

[1m   --> TIME: 2023-08-13 00:18:10 -- STEP: 0/50 -- GLOBAL_STEP: 900[0m
     | > loss: 22.639324188232422  (22.639324188232422)
     | > log_mle: 0.8474187850952148  (0.8474187850952148)
     | > loss_dur: 21.79190444946289  (21.79190444946289)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(20.8461, device='cuda:0')  (tensor(20.8461, device='cuda:0'))
     | > current_lr: 4.5e-06 
     | > step_time: 2.6839  (2.683933973312378)
     | > loader_time: 3.9179  (3.9178946018218994)


[1m > EVALUATION [0m


  [1m-

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0030963761465890066 [0m(+0.0)
     | > avg_loss: 24.48097120012556 [0m(+0.0)
     | > avg_log_mle: 0.8024392298289708 [0m(+0.0)
     | > avg_loss_dur: 23.678531646728516 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_950.pth

[4m[1m > EPOCH: 19/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:19:59) [0m

[1m   --> TIME: 2023-08-13 00:20:04 -- STEP: 0/50 -- GLOBAL_STEP: 950[0m
     | > loss: 22.329814910888672  (22.329814910888672)
     | > log_mle: 0.8139922618865967  (0.8139922618865967)
     | > loss_dur: 21.515823364257812  (21.515823364257812)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(20.0514, device='cuda:0')  (tensor(20.0514, device='cuda:0'))
     | > current_lr: 4.749999999999999e-06 
     | > step_time: 1.6998  (1.6997730731964111)
     | > loader_time: 3.108  (3.1080286502838135)


[1m > EVALUATIO

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0027881009238106863 [0m(+0.0)
     | > avg_loss: 23.925461087908065 [0m(+0.0)
     | > avg_log_mle: 0.774017470223563 [0m(+0.0)
     | > avg_loss_dur: 23.15144375392369 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1000.pth

[4m[1m > EPOCH: 20/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:21:55) [0m

[1m   --> TIME: 2023-08-13 00:21:59 -- STEP: 0/50 -- GLOBAL_STEP: 1000[0m
     | > loss: 22.090782165527344  (22.090782165527344)
     | > log_mle: 0.7858397364616394  (0.7858397364616394)
     | > loss_dur: 21.304943084716797  (21.304943084716797)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(19.7565, device='cuda:0')  (tensor(19.7565, device='cuda:0'))
     | > current_lr: 4.9999999999999996e-06 
     | > step_time: 1.596  (1.5960395336151123)
     | > loader_time: 3.1394  (3.1393513679504395)


[1m > EVALUAT

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0047304630279541016 [0m(+0.0)
     | > avg_loss: 23.71010180882045 [0m(+0.0)
     | > avg_log_mle: 0.7498538494110107 [0m(+0.0)
     | > avg_loss_dur: 22.96024785723005 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1050.pth

[4m[1m > EPOCH: 21/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:23:52) [0m

[1m   --> TIME: 2023-08-13 00:23:56 -- STEP: 0/50 -- GLOBAL_STEP: 1050[0m
     | > loss: 21.19182586669922  (21.19182586669922)
     | > log_mle: 0.7618227005004883  (0.7618227005004883)
     | > loss_dur: 20.430004119873047  (20.430004119873047)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(18.5392, device='cuda:0')  (tensor(18.5392, device='cuda:0'))
     | > current_lr: 5.25e-06 
     | > step_time: 1.7212  (1.7211618423461914)
     | > loader_time: 2.7868  (2.7868077754974365)


[1m > EVALUATION [0m


  [

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0040318284715924945 [0m(+0.0)
     | > avg_loss: 22.64527702331543 [0m(+0.0)
     | > avg_log_mle: 0.7294573528426034 [0m(+0.0)
     | > avg_loss_dur: 21.915819576808385 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1100.pth

[4m[1m > EPOCH: 22/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:25:48) [0m

[1m   --> TIME: 2023-08-13 00:25:53 -- STEP: 0/50 -- GLOBAL_STEP: 1100[0m
     | > loss: 20.7684268951416  (20.7684268951416)
     | > log_mle: 0.741117000579834  (0.741117000579834)
     | > loss_dur: 20.02730941772461  (20.02730941772461)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.8850, device='cuda:0')  (tensor(17.8850, device='cuda:0'))
     | > current_lr: 5.5e-06 
     | > step_time: 1.3912  (1.3912391662597656)
     | > loader_time: 3.399  (3.3989784717559814)


[1m > EVALUATION [0m


  [1m--> E

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0019412040710449219 [0m(+0.0)
     | > avg_loss: 22.248817443847656 [0m(+0.0)
     | > avg_log_mle: 0.7112479380198887 [0m(+0.0)
     | > avg_loss_dur: 21.537569727216447 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1150.pth

[4m[1m > EPOCH: 23/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:27:44) [0m

[1m   --> TIME: 2023-08-13 00:27:49 -- STEP: 0/50 -- GLOBAL_STEP: 1150[0m
     | > loss: 19.77836799621582  (19.77836799621582)
     | > log_mle: 0.721882164478302  (0.721882164478302)
     | > loss_dur: 19.056486129760742  (19.056486129760742)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(16.9763, device='cuda:0')  (tensor(16.9763, device='cuda:0'))
     | > current_lr: 5.75e-06 
     | > step_time: 1.41  (1.410020112991333)
     | > loader_time: 3.233  (3.2330286502838135)


[1m > EVALUATION [0m


  [1m--

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0028713090079171316 [0m(+0.0)
     | > avg_loss: 21.772479466029576 [0m(+0.0)
     | > avg_log_mle: 0.6939547913415092 [0m(+0.0)
     | > avg_loss_dur: 21.078524317060197 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1200.pth

[4m[1m > EPOCH: 24/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:29:43) [0m

[1m   --> TIME: 2023-08-13 00:29:48 -- STEP: 0/50 -- GLOBAL_STEP: 1200[0m
     | > loss: 19.964017868041992  (19.964017868041992)
     | > log_mle: 0.7042320966720581  (0.7042320966720581)
     | > loss_dur: 19.25978660583496  (19.25978660583496)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.6325, device='cuda:0')  (tensor(17.6325, device='cuda:0'))
     | > current_lr: 6e-06 
     | > step_time: 1.5987  (1.5987293720245361)
     | > loader_time: 3.3079  (3.3078813552856445)


[1m > EVALUATION [0m


  [1

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.001985720225742885 [0m(+0.0)
     | > avg_loss: 21.60244928087507 [0m(+0.0)
     | > avg_log_mle: 0.6776637434959412 [0m(+0.0)
     | > avg_loss_dur: 20.924785614013672 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1250.pth

[4m[1m > EPOCH: 25/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:31:40) [0m

[1m   --> TIME: 2023-08-13 00:31:45 -- STEP: 0/50 -- GLOBAL_STEP: 1250[0m
     | > loss: 19.916976928710938  (19.916976928710938)
     | > log_mle: 0.6872297525405884  (0.6872297525405884)
     | > loss_dur: 19.229747772216797  (19.229747772216797)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.7270, device='cuda:0')  (tensor(17.7270, device='cuda:0'))
     | > current_lr: 6.2499999999999995e-06 
     | > step_time: 1.5703  (1.570336103439331)
     | > loader_time: 3.2107  (3.210742712020874)


[1m > EVALUATI

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0033090455191476004 [0m(+0.0)
     | > avg_loss: 21.921932765415736 [0m(+0.0)
     | > avg_log_mle: 0.6630742635045733 [0m(+0.0)
     | > avg_loss_dur: 21.25885854448591 [0m(+0.0)


[4m[1m > EPOCH: 26/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:33:34) [0m

[1m   --> TIME: 2023-08-13 00:33:40 -- STEP: 0/50 -- GLOBAL_STEP: 1300[0m
     | > loss: 19.470407485961914  (19.470407485961914)
     | > log_mle: 0.6723793745040894  (0.6723793745040894)
     | > loss_dur: 18.79802894592285  (18.79802894592285)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.3615, device='cuda:0')  (tensor(17.3615, device='cuda:0'))
     | > current_lr: 6.5e-06 
     | > step_time: 1.5487  (1.548712968826294)
     | > loader_time: 3.8499  (3.84985613822937)


[1m > EVALUATION [0m


  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time:[92m 0.0027866363525390625 [0m(-0.00

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0027866363525390625 [0m(+0.0)
     | > avg_loss: 21.5941002709525 [0m(+0.0)
     | > avg_log_mle: 0.6495065774236407 [0m(+0.0)
     | > avg_loss_dur: 20.94459342956543 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1350.pth

[4m[1m > EPOCH: 27/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:35:27) [0m

[1m   --> TIME: 2023-08-13 00:35:34 -- STEP: 0/50 -- GLOBAL_STEP: 1350[0m
     | > loss: 18.797225952148438  (18.797225952148438)
     | > log_mle: 0.6587718725204468  (0.6587718725204468)
     | > loss_dur: 18.13845443725586  (18.13845443725586)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(16.8769, device='cuda:0')  (tensor(16.8769, device='cuda:0'))
     | > current_lr: 6.75e-06 
     | > step_time: 2.3185  (2.3185465335845947)
     | > loader_time: 4.8383  (4.838254928588867)


[1m > EVALUATION [0m


  [1m

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0020212786538260324 [0m(+0.0)
     | > avg_loss: 21.372795786176408 [0m(+0.0)
     | > avg_log_mle: 0.6356446487562997 [0m(+0.0)
     | > avg_loss_dur: 20.737151418413436 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1400.pth

[4m[1m > EPOCH: 28/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:37:20) [0m

[1m   --> TIME: 2023-08-13 00:37:28 -- STEP: 0/50 -- GLOBAL_STEP: 1400[0m
     | > loss: 18.983631134033203  (18.983631134033203)
     | > log_mle: 0.6443922519683838  (0.6443922519683838)
     | > loss_dur: 18.3392391204834  (18.3392391204834)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.3411, device='cuda:0')  (tensor(17.3411, device='cuda:0'))
     | > current_lr: 7e-06 
     | > step_time: 2.7281  (2.72812557220459)
     | > loader_time: 5.2896  (5.289614200592041)


[1m > EVALUATION [0m


  [1m--> 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0034167425973074777 [0m(+0.0)
     | > avg_loss: 21.170677866254533 [0m(+0.0)
     | > avg_log_mle: 0.6215761133602687 [0m(+0.0)
     | > avg_loss_dur: 20.54910182952881 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1450.pth

[4m[1m > EPOCH: 29/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:39:16) [0m

[1m   --> TIME: 2023-08-13 00:39:21 -- STEP: 0/50 -- GLOBAL_STEP: 1450[0m
     | > loss: 19.025936126708984  (19.025936126708984)
     | > log_mle: 0.6296718716621399  (0.6296718716621399)
     | > loss_dur: 18.396265029907227  (18.396265029907227)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.6038, device='cuda:0')  (tensor(17.6038, device='cuda:0'))
     | > current_lr: 7.25e-06 
     | > step_time: 2.5732  (2.573183298110962)
     | > loader_time: 3.2951  (3.2950596809387207)


[1m > EVALUATION [0m


  

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002143587384905134 [0m(+0.0)
     | > avg_loss: 21.00169508797782 [0m(+0.0)
     | > avg_log_mle: 0.6082749537059239 [0m(+0.0)
     | > avg_loss_dur: 20.393420219421387 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1500.pth

[4m[1m > EPOCH: 30/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:41:12) [0m

[1m   --> TIME: 2023-08-13 00:41:17 -- STEP: 0/50 -- GLOBAL_STEP: 1500[0m
     | > loss: 18.707538604736328  (18.707538604736328)
     | > log_mle: 0.616034746170044  (0.616034746170044)
     | > loss_dur: 18.091503143310547  (18.091503143310547)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.5875, device='cuda:0')  (tensor(17.5875, device='cuda:0'))
     | > current_lr: 7.499999999999999e-06 
     | > step_time: 1.6119  (1.6118826866149902)
     | > loader_time: 3.1877  (3.187739610671997)


[1m > EVALUATION

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0034102031162806918 [0m(+0.0)
     | > avg_loss: 20.773873601640975 [0m(+0.0)
     | > avg_log_mle: 0.5952604583331517 [0m(+0.0)
     | > avg_loss_dur: 20.17861352648054 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1550.pth

[4m[1m > EPOCH: 31/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:43:09) [0m

[1m   --> TIME: 2023-08-13 00:43:14 -- STEP: 0/50 -- GLOBAL_STEP: 1550[0m
     | > loss: 18.428081512451172  (18.428081512451172)
     | > log_mle: 0.6026500463485718  (0.6026500463485718)
     | > loss_dur: 17.82543182373047  (17.82543182373047)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.5809, device='cuda:0')  (tensor(17.5809, device='cuda:0'))
     | > current_lr: 7.75e-06 
     | > step_time: 1.6522  (1.6521532535552979)
     | > loader_time: 2.9828  (2.982764959335327)


[1m > EVALUATION [0m


  [

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0027600015912737164 [0m(+0.0)
     | > avg_loss: 20.62424741472517 [0m(+0.0)
     | > avg_log_mle: 0.5822010380881173 [0m(+0.0)
     | > avg_loss_dur: 20.042046819414413 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1600.pth

[4m[1m > EPOCH: 32/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:45:10) [0m

[1m   --> TIME: 2023-08-13 00:45:16 -- STEP: 0/50 -- GLOBAL_STEP: 1600[0m
     | > loss: 18.192502975463867  (18.192502975463867)
     | > log_mle: 0.5888771414756775  (0.5888771414756775)
     | > loss_dur: 17.603626251220703  (17.603626251220703)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.6252, device='cuda:0')  (tensor(17.6252, device='cuda:0'))
     | > current_lr: 8e-06 
     | > step_time: 2.5302  (2.530214548110962)
     | > loader_time: 3.337  (3.3370425701141357)


[1m > EVALUATION [0m


  [1m

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.003051689692905971 [0m(+0.0)
     | > avg_loss: 20.44875376565116 [0m(+0.0)
     | > avg_log_mle: 0.5688356672014508 [0m(+0.0)
     | > avg_loss_dur: 19.879918234688894 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1650.pth

[4m[1m > EPOCH: 33/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:47:07) [0m

[1m   --> TIME: 2023-08-13 00:47:12 -- STEP: 0/50 -- GLOBAL_STEP: 1650[0m
     | > loss: 18.016759872436523  (18.016759872436523)
     | > log_mle: 0.5745666027069092  (0.5745666027069092)
     | > loss_dur: 17.44219398498535  (17.44219398498535)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.5651, device='cuda:0')  (tensor(17.5651, device='cuda:0'))
     | > current_lr: 8.25e-06 
     | > step_time: 1.5936  (1.5936200618743896)
     | > loader_time: 3.15  (3.150012493133545)


[1m > EVALUATION [0m


  [1m-

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002897228513445173 [0m(+0.0)
     | > avg_loss: 20.3958192552839 [0m(+0.0)
     | > avg_log_mle: 0.5553787691252572 [0m(+0.0)
     | > avg_loss_dur: 19.840440613882883 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1700.pth

[4m[1m > EPOCH: 34/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:49:05) [0m

[1m   --> TIME: 2023-08-13 00:49:10 -- STEP: 0/50 -- GLOBAL_STEP: 1700[0m
     | > loss: 17.149595260620117  (17.149595260620117)
     | > log_mle: 0.5604512691497803  (0.5604512691497803)
     | > loss_dur: 16.589143753051758  (16.589143753051758)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(16.9465, device='cuda:0')  (tensor(16.9465, device='cuda:0'))
     | > current_lr: 8.5e-06 
     | > step_time: 1.5566  (1.556579351425171)
     | > loader_time: 3.3806  (3.3805744647979736)


[1m > EVALUATION [0m


  [1

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0025878974369594027 [0m(+0.0)
     | > avg_loss: 20.125878061567033 [0m(+0.0)
     | > avg_log_mle: 0.5418181163924081 [0m(+0.0)
     | > avg_loss_dur: 19.584060260227748 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1750.pth

[4m[1m > EPOCH: 35/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:51:04) [0m

[1m   --> TIME: 2023-08-13 00:51:09 -- STEP: 0/50 -- GLOBAL_STEP: 1750[0m
     | > loss: 17.501638412475586  (17.501638412475586)
     | > log_mle: 0.5456752777099609  (0.5456752777099609)
     | > loss_dur: 16.955963134765625  (16.955963134765625)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.4366, device='cuda:0')  (tensor(17.4366, device='cuda:0'))
     | > current_lr: 8.750000000000001e-06 
     | > step_time: 1.8203  (1.8202519416809082)
     | > loader_time: 3.0282  (3.0281548500061035)


[1m > EVALU

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002168893814086914 [0m(+0.0)
     | > avg_loss: 19.864961624145508 [0m(+0.0)
     | > avg_log_mle: 0.5284098386764526 [0m(+0.0)
     | > avg_loss_dur: 19.336551666259766 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1800.pth

[4m[1m > EPOCH: 36/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:53:02) [0m

[1m   --> TIME: 2023-08-13 00:53:07 -- STEP: 0/50 -- GLOBAL_STEP: 1800[0m
     | > loss: 17.19863510131836  (17.19863510131836)
     | > log_mle: 0.5312339663505554  (0.5312339663505554)
     | > loss_dur: 16.667400360107422  (16.667400360107422)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.4063, device='cuda:0')  (tensor(17.4063, device='cuda:0'))
     | > current_lr: 9e-06 
     | > step_time: 1.3941  (1.3941481113433838)
     | > loader_time: 3.4054  (3.4054460525512695)


[1m > EVALUATION [0m


  [1m

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.002027273178100586 [0m(+0.0)
     | > avg_loss: 19.607156617300852 [0m(+0.0)
     | > avg_log_mle: 0.5158722570964268 [0m(+0.0)
     | > avg_loss_dur: 19.091284343174525 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1850.pth

[4m[1m > EPOCH: 37/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:55:01) [0m

[1m   --> TIME: 2023-08-13 00:55:06 -- STEP: 0/50 -- GLOBAL_STEP: 1850[0m
     | > loss: 16.851308822631836  (16.851308822631836)
     | > log_mle: 0.5172572135925293  (0.5172572135925293)
     | > loss_dur: 16.33405113220215  (16.33405113220215)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.3581, device='cuda:0')  (tensor(17.3581, device='cuda:0'))
     | > current_lr: 9.250000000000001e-06 
     | > step_time: 1.6207  (1.620734691619873)
     | > loader_time: 3.0449  (3.0448801517486572)


[1m > EVALUATIO

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0029166085379464285 [0m(+0.0)
     | > avg_loss: 19.321101188659668 [0m(+0.0)
     | > avg_log_mle: 0.5044238695076534 [0m(+0.0)
     | > avg_loss_dur: 18.816677365984237 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1900.pth

[4m[1m > EPOCH: 38/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:57:00) [0m

[1m   --> TIME: 2023-08-13 00:57:05 -- STEP: 0/50 -- GLOBAL_STEP: 1900[0m
     | > loss: 16.14384651184082  (16.14384651184082)
     | > log_mle: 0.504236102104187  (0.504236102104187)
     | > loss_dur: 15.63961124420166  (15.63961124420166)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(16.6420, device='cuda:0')  (tensor(16.6420, device='cuda:0'))
     | > current_lr: 9.499999999999999e-06 
     | > step_time: 1.5708  (1.5708434581756592)
     | > loader_time: 3.3753  (3.375300645828247)


[1m > EVALUATION 

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.003935609545026507 [0m(+0.0)
     | > avg_loss: 19.032914297921316 [0m(+0.0)
     | > avg_log_mle: 0.49319384353501455 [0m(+0.0)
     | > avg_loss_dur: 18.539720262799943 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_1950.pth

[4m[1m > EPOCH: 39/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 00:58:59) [0m

[1m   --> TIME: 2023-08-13 00:59:03 -- STEP: 0/50 -- GLOBAL_STEP: 1950[0m
     | > loss: 16.12113380432129  (16.12113380432129)
     | > log_mle: 0.4928693175315857  (0.4928693175315857)
     | > loss_dur: 15.628265380859375  (15.628265380859375)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.0339, device='cuda:0')  (tensor(17.0339, device='cuda:0'))
     | > current_lr: 9.75e-06 
     | > step_time: 1.6721  (1.6720740795135498)
     | > loader_time: 3.1406  (3.140556812286377)


[1m > EVALUATION [0m


  

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0021690300532749723 [0m(+0.0)
     | > avg_loss: 18.8566677910941 [0m(+0.0)
     | > avg_log_mle: 0.48236291323389324 [0m(+0.0)
     | > avg_loss_dur: 18.374304907662527 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_2000.pth

[4m[1m > EPOCH: 40/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 01:00:57) [0m

[1m   --> TIME: 2023-08-13 01:01:02 -- STEP: 0/50 -- GLOBAL_STEP: 2000[0m
     | > loss: 15.78085994720459  (15.78085994720459)
     | > log_mle: 0.48144271969795227  (0.48144271969795227)
     | > loss_dur: 15.299417495727539  (15.299417495727539)
     | > amp_scaler: 4096.0  (4096.0)
     | > grad_norm: tensor(17.0037, device='cuda:0')  (tensor(17.0037, device='cuda:0'))
     | > current_lr: 9.999999999999999e-06 
     | > step_time: 1.4412  (1.4411780834197998)
     | > loader_time: 3.4323  (3.4323296546936035)


[1m > EVALUA

 | > Synthesizing test sentences.



  [1m--> EVAL PERFORMANCE[0m
     | > avg_loader_time: 0.0029793807438441683 [0m(+0.0)
     | > avg_loss: 18.617325919015066 [0m(+0.0)
     | > avg_log_mle: 0.47254463178770884 [0m(+0.0)
     | > avg_loss_dur: 18.144781521388463 [0m(+0.0)

 > BEST MODEL : /content/output/run-August-12-2023_11+43PM-0000000/best_model_2050.pth

[4m[1m > EPOCH: 41/100[0m
 --> /content/output/run-August-12-2023_11+43PM-0000000

[1m > TRAINING (2023-08-13 01:02:55) [0m

[1m   --> TIME: 2023-08-13 01:02:59 -- STEP: 0/50 -- GLOBAL_STEP: 2050[0m
     | > loss: 15.199819564819336  (15.199819564819336)
     | > log_mle: 0.4708048105239868  (0.4708048105239868)
     | > loss_dur: 14.72901439666748  (14.72901439666748)
     | > amp_scaler: 8192.0  (8192.0)
     | > grad_norm: tensor(16.4805, device='cuda:0')  (tensor(16.4805, device='cuda:0'))
     | > current_lr: 1.025e-05 
     | > step_time: 1.5396  (1.5395569801330566)
     | > loader_time: 3.2339  (3.2339329719543457)

