In [1]:
import json
import nemo
import torch
import librosa
import numpy as np

from pathlib import Path
from tqdm.notebook import tqdm


In [2]:
from nemo.collections.tts.models.base import SpectrogramGenerator
from nemo.collections.tts.models import MixerTTSModel

from matplotlib.pyplot import imshow
from matplotlib import pyplot as plt
%matplotlib inline

################################################################################
###          (please add 'export KALDI_ROOT=<your_path>' in your $HOME/.profile)
###          (or run as: KALDI_ROOT=<your_path> python <your_script>.py)
################################################################################

[NeMo W 2022-05-25 03:38:46 experimental:27] Module <class 'nemo.collections.nlp.data.language_modeling.megatron.megatron_batch_samplers.MegatronPretrainingRandomBatchSampler'> is experimental, not ready for production and is not fully supported. Use at your own risk.


In [3]:
from nemo.collections.tts.torch.g2ps import EnglishG2p
from nemo.collections.tts.torch.data import TTSDataset
from nemo_text_processing.text_normalization.normalize import Normalizer
from nemo.collections.tts.torch.tts_tokenizers import EnglishPhonemesTokenizer, EnglishCharsTokenizer

In [5]:
# download data and manifests
!wget https://github.com/NVIDIA/NeMo/releases/download/v0.11.0/test_data.tar.gz && mkdir -p tests/data && tar xzf test_data.tar.gz -C tests/data

# additional files
!mkdir -p tts_dataset_files && cd tts_dataset_files \
&& wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tts_dataset_files/cmudict-0.7b_nv22.01 \
&& wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tts_dataset_files/heteronyms-030921 \
&& wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/nemo_text_processing/text_normalization/en/data/whitelist/lj_speech.tsv \
&& cd ..

--2022-05-25 03:41:06--  https://github.com/NVIDIA/NeMo/releases/download/v0.11.0/test_data.tar.gz
Resolving github.com (github.com)... 140.82.121.3
Connecting to github.com (github.com)|140.82.121.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/200722670/140fcd80-a9ca-11eb-8af9-e4e1e5cd3508?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20220525%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20220525T003939Z&X-Amz-Expires=300&X-Amz-Signature=d51ddd69242e3126eaf5d9ca06aeb8a459a78819caee07f31aedd73ed70adf39&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=200722670&response-content-disposition=attachment%3B%20filename%3Dtest_data.tar.gz&response-content-type=application%2Foctet-stream [following]
--2022-05-25 03:41:06--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/200722670/140fcd80-a9ca-11eb-8af9-e4e1e5cd3508?X-Amz-Algo

In [8]:
!wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/tts/mixer_tts.py

!mkdir -p conf && cd conf \
&& wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/tts/conf/mixer-tts.yaml \
&& cd ..

--2022-05-25 03:42:28--  https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/tts/mixer_tts.py
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 1291 (1,3K) [text/plain]
Saving to: ‘mixer_tts.py’


2022-05-25 03:42:28 (84,9 MB/s) - ‘mixer_tts.py’ saved [1291/1291]

--2022-05-25 03:42:28--  https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/tts/conf/mixer-tts.yaml
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.110.133, 185.199.109.133, 185.199.108.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.110.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 6589 (6,4K) [text/plain]
Saving to: ‘mixer-tts.yaml’


2022-05-25 03:42:28 (89,3 MB/s) - ‘mixer-tts.yaml’ sa

In [9]:
# Text normalizer
text_normalizer = Normalizer(
    lang="en", 
    input_case="cased", 
    whitelist="tts_dataset_files/lj_speech.tsv"
)

text_normalizer_call_kwargs = {
    "punct_pre_process": True,
    "punct_post_process": True
}

# Grapheme-to-phoneme module
g2p = EnglishG2p(
    phoneme_dict="tts_dataset_files/cmudict-0.7b_nv22.01",
    heteronyms="tts_dataset_files/heteronyms-030921"
)

# Text tokenizer
text_tokenizer = EnglishPhonemesTokenizer(
    punct=True,
    stresses=True,
    chars=True,
    apostrophe=True,
    pad_with_space=True,
    g2p=g2p,
)

[NeMo I 2022-05-25 03:42:38 tokenize_and_classify:88] Creating ClassifyFst grammars.


[NeMo W 2022-05-25 03:42:42 g2ps:84] apply_to_oov_word=None, it means that some of words will remain unchanged if they are not handled by one of rule in self.parse_one_word(). It is useful when you use tokenizer with set of phonemes and chars together, otherwise it can be not.


In [11]:
def pre_calculate_supplementary_data(sup_data_path, sup_data_types, text_tokenizer, text_normalizer, text_normalizer_call_kwargs):
    # init train and val dataloaders
    stages = ["train", "val"]
    stage2dl = {}
    for stage in stages:
        ds = TTSDataset(
            manifest_filepath=f"tests/data/asr/an4_{stage}.json",
            sample_rate=16000,
            sup_data_path=sup_data_path,
            sup_data_types=sup_data_types,
            n_fft=1024,
            win_length=1024,
            hop_length=256,
            window="hann",
            n_mels=80,
            lowfreq=0,
            highfreq=8000,
            text_tokenizer=text_tokenizer,
            text_normalizer=text_normalizer,
            text_normalizer_call_kwargs=text_normalizer_call_kwargs

        ) 
        stage2dl[stage] = torch.utils.data.DataLoader(ds, batch_size=1, collate_fn=ds._collate_fn, num_workers=1)

    # iteration over dataloaders
    pitch_mean, pitch_std, pitch_min, pitch_max = None, None, None, None
    for stage, dl in stage2dl.items():
        pitch_list = []
        for batch in tqdm(dl, total=len(dl)):
            tokens, tokens_lengths, audios, audio_lengths, attn_prior, pitches, pitches_lengths = batch
            pitch = pitches.squeeze(0)
            pitch_list.append(pitch[pitch != 0])

        if stage == "train":
            pitch_tensor = torch.cat(pitch_list)
            pitch_mean, pitch_std = pitch_tensor.mean().item(), pitch_tensor.std().item()
            pitch_min, pitch_max = pitch_tensor.min().item(), pitch_tensor.max().item()
            
    return pitch_mean, pitch_std, pitch_min, pitch_max

In [16]:
pitch_mean, pitch_std, pitch_min, pitch_max

(190.2564697265625, 265.6141052246094, 65.4063949584961, 1998.48779296875)

In [12]:
mixer_tts_sup_data_path = "mixer_tts_sup_data_folder"
sup_data_types = ["align_prior_matrix", "pitch"]

pitch_mean, pitch_std, pitch_min, pitch_max = pre_calculate_supplementary_data(
    mixer_tts_sup_data_path, sup_data_types, text_tokenizer, text_normalizer, text_normalizer_call_kwargs
)

[NeMo I 2022-05-25 03:43:00 data:173] Loading dataset from tests/data/asr/an4_train.json.


30it [00:05,  5.57it/s]

[NeMo I 2022-05-25 03:43:06 data:207] Loaded dataset with 30 files.
[NeMo I 2022-05-25 03:43:06 data:209] Dataset contains 0.02 hours.
[NeMo I 2022-05-25 03:43:06 data:297] Pruned 0 files. Final dataset contains 30 files
[NeMo I 2022-05-25 03:43:06 data:299] Pruned 0.00 hours. Final dataset contains 0.02 hours.





[NeMo I 2022-05-25 03:43:06 data:173] Loading dataset from tests/data/asr/an4_val.json.


10it [00:01,  5.63it/s]

[NeMo I 2022-05-25 03:43:08 data:207] Loaded dataset with 10 files.
[NeMo I 2022-05-25 03:43:08 data:209] Dataset contains 0.01 hours.
[NeMo I 2022-05-25 03:43:08 data:297] Pruned 0 files. Final dataset contains 10 files
[NeMo I 2022-05-25 03:43:08 data:299] Pruned 0.00 hours. Final dataset contains 0.01 hours.





  0%|          | 0/30 [00:00<?, ?it/s]

  0%|          | 0/10 [00:00<?, ?it/s]

In [30]:
import os
from omegaconf import DictConfig

try:
    from ruamel.yaml import YAML
except ModuleNotFoundError:
    from ruamel_yaml import YAML

yaml = YAML(typ='safe')

In [47]:
with open('./configs/mixer-tts.yaml') as f:
    spec_gen_cfg = yaml.load(f)

spec_generator = MixerTTSModel.from_config_dict(DictConfig(spec_gen_cfg['model']))

[NeMo I 2022-05-25 04:25:42 tokenize_and_classify:88] Creating ClassifyFst grammars.


[NeMo W 2022-05-25 04:25:45 g2ps:84] apply_to_oov_word=None, it means that some of words will remain unchanged if they are not handled by one of rule in self.parse_one_word(). It is useful when you use tokenizer with set of phonemes and chars together, otherwise it can be not.


[NeMo I 2022-05-25 04:25:45 data:173] Loading dataset from /media/boris/F/NeMo_own_research/transfer_learning/an4/train_manifest_tts.json.




0it [00:00, ?it/s][A[A

1it [00:00,  5.87it/s][A[A

2it [00:00,  5.78it/s][A[A

3it [00:00,  5.84it/s][A[A

4it [00:00,  5.85it/s][A[A

5it [00:00,  5.85it/s][A[A

6it [00:01,  5.88it/s][A[A

7it [00:01,  5.75it/s][A[A

8it [00:01,  5.71it/s][A[A

9it [00:01,  5.69it/s][A[A

10it [00:01,  5.72it/s][A[A

11it [00:01,  5.76it/s][A[A

12it [00:02,  5.76it/s][A[A

13it [00:02,  5.73it/s][A[A

14it [00:02,  5.78it/s][A[A

15it [00:02,  5.80it/s][A[A

16it [00:02,  5.47it/s][A[A


KeyboardInterrupt: 

In [39]:
from nemo.collections.common.callbacks import LogEpochTimeCallback
from nemo.collections.tts.models import MixerTTSModel
from nemo.core.config import hydra_runner
from nemo.utils.exp_manager import exp_manager

trainer = pl.Trainer(devices=1, accelerator='gpu', max_epochs=100, check_val_every_n_epoch=5, 
                    precision=16,
                    accumulate_grad_batches=1,
                    gradient_clip_val=1000,
                    enable_checkpointing=False,
                    logger=False,
                    )
exp_manager(trainer, spec_gen_cfg['exp_manager'])
spec_generator.set_trainer(trainer)
trainer.callbacks.extend([pl.callbacks.LearningRateMonitor(), LogEpochTimeCallback()])  # noqa
trainer.fit(spec_generator)

Using 16bit native Automatic Mixed Precision (AMP)
GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs


[NeMo I 2022-05-25 04:09:58 exp_manager:281] Experiments will be logged at /media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53
[NeMo I 2022-05-25 04:09:58 exp_manager:647] TensorboardLogger has been set up


      rank_zero_deprecation("`Trainer.weights_save_path` has been deprecated in v1.6 and will be removed in v1.8.")
    
[NeMo W 2022-05-25 04:09:58 exp_manager:881] The checkpoint callback was told to monitor a validation value and trainer's max_steps was set to -1. Please ensure that max_steps will run for at least 5 epochs to ensure that checkpointing will not error out.
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[NeMo W 2022-05-25 04:09:58 modelPT:496] The lightning trainer received accelerator: <pytorch_lightning.accelerators.gpu.GPUAccelerator object at 0x7fc1ee54f460>. We recommend to use 'ddp' instead.


[NeMo I 2022-05-25 04:09:58 modelPT:587] Optimizer config = Adam (
    Parameter Group 0
        amsgrad: False
        betas: [0.9, 0.98]
        eps: 1e-08
        foreach: None
        initial_lr: 0.1
        lr: 4.1109609582188935e-05
        maximize: False
        weight_decay: 1e-06
    )
[NeMo I 2022-05-25 04:09:58 lr_scheduler:833] Scheduler "<nemo.core.optim.lr_scheduler.NoamAnnealing object at 0x7fc2029ccc40>" 
    will be used during training (effective maximum steps = 1300) - 
    Parameters : 
    (warmup_steps: 1000
    last_epoch: -1
    d_model: 1
    max_steps: 1300
    )



   | Name               | Type                              | Params
--------------------------------------------------------------------------
0  | aligner            | AlignmentEncoder                  | 1.0 M 
1  | forward_sum_loss   | ForwardSumLoss                    | 0     
2  | bin_loss           | BinLoss                           | 0     
3  | encoder            | MixerTTSModule                    | 7.2 M 
4  | symbol_emb         | Embedding                         | 43.8 K
5  | duration_predictor | TemporalPredictor                 | 493 K 
6  | pitch_predictor    | TemporalPredictor                 | 493 K 
7  | pitch_emb          | Conv1d                            | 1.5 K 
8  | preprocessor       | AudioToMelSpectrogramPreprocessor | 0     
9  | decoder            | MixerTTSModule                    | 10.8 M
10 | proj               | Linear                            | 30.8 K
--------------------------------------------------------------------------
20.1 M    Trainable p

Sanity Checking: 0it [00:00, ?it/s]

      rank_zero_warn(
    
      rank_zero_warn(
    


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

Epoch 4, global step 65: 'val_mel_loss' reached 11.92645 (best 11.92645), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=11.9264-epoch=4.ckpt' as top 3
    


Validation: 0it [00:00, ?it/s]

Epoch 9, global step 130: 'val_mel_loss' reached 3.04858 (best 3.04858), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=3.0486-epoch=9.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 14, global step 195: 'val_mel_loss' reached 2.81283 (best 2.81283), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=2.8128-epoch=14.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 19, global step 260: 'val_mel_loss' reached 2.21573 (best 2.21573), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=2.2157-epoch=19.ckpt' as top 3


[NeMo I 2022-05-25 04:12:26 mixer_tts:404] Using hard attentions after epoch: 20


Validation: 0it [00:00, ?it/s]

Epoch 24, global step 325: 'val_mel_loss' reached 1.97981 (best 1.97981), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=1.9798-epoch=24.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 29, global step 390: 'val_mel_loss' reached 1.87528 (best 1.87528), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=1.8753-epoch=29.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 34, global step 455: 'val_mel_loss' reached 1.74654 (best 1.74654), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=1.7465-epoch=34.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 39, global step 520: 'val_mel_loss' reached 1.59404 (best 1.59404), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=1.5940-epoch=39.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 44, global step 585: 'val_mel_loss' reached 1.76552 (best 1.59404), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=1.7655-epoch=44.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 49, global step 650: 'val_mel_loss' reached 1.65988 (best 1.59404), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=1.6599-epoch=49.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 54, global step 715: 'val_mel_loss' reached 1.52360 (best 1.52360), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=1.5236-epoch=54.ckpt' as top 3


Validation: 0it [00:00, ?it/s]

Epoch 59, global step 780: 'val_mel_loss' reached 1.59536 (best 1.52360), saving model to '/media/boris/F/NeMo_own_research/transfer_learning/nemo_experiments/Mixer-TTS/2022-05-25_04-09-53/checkpoints/Mixer-TTS--val_mel_loss=1.5954-epoch=59.ckpt' as top 3
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in __del__
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/u

  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in __del__
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/l

Validation: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in __del__
    self._shutdown_workers()
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
    if w.is_alive():
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in __del__
    self._shutdown_workers()
  File "/media/boris/F/anaconda3/envs/nemo/lib/python



          File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
if w.is_alive():  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive

if w.is_alive():      File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    
if w.is_alive():    assert self._parent_pid == os.getpid(), 'can only test a child process'
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
assert self._parent_pid == os.getpid(), 'can only test a child process'  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive

    
    assert self._parent_pid == os.getpid(), 'can only test a child process'AssertionErrorAssertionError
assert self._parent_pid == os.getpid(), 'can only test a child process': AssertionError: can only test a ch

    if w.is_alive():
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in __del__
    self._shutdown_workers()
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
    if w.is_alive():
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288

self._shutdown_workers()self._shutdown_workers()

          File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
self._shutdown_workers()self._shutdown_workers()        

if w.is_alive():if w.is_alive():  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers


  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
      File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
        if w.is_alive():    if w.is_alive():assert self._parent_pid == os.getpid(), 'c

Validation: 0it [00:00, ?it/s]

Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in __del__
    self._shutdown_workers()
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1342, in _shutdown_workers
    if w.is_alive():
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/multiprocessing/process.py", line 160, in is_alive
    assert self._parent_pid == os.getpid(), 'can only test a child process'
AssertionError: can only test a child process
Exception ignored in: <function _MultiProcessingDataLoaderIter.__del__ at 0x7fc288194430>
Traceback (most recent call last):
  File "/media/boris/F/anaconda3/envs/nemo/lib/python3.9/site-packages/torch/utils/data/dataloader.py", line 1359, in __del__
    self._shutdown_workers()
  File "/media/boris/F/anaconda3/envs/nemo/lib/python

In [45]:
from nemo.collections.tts.models import MelGanModel
model = MelGanModel.from_pretrained(model_name="tts_melgan")

# Generate audio
import soundfile as sf
parsed = spec_generator.parse("a b c d")
spectrogram = spec_generator.generate_spectrogram(tokens=parsed)
audio = model.convert_spectrogram_to_audio(spec=spectrogram)

[NeMo I 2022-05-25 04:17:47 cloud:56] Found existing object /home/boris/.cache/torch/NeMo/NeMo_1.8.2/tts_melgan/38f156f172595e60f02169891e303590/tts_melgan.nemo.
[NeMo I 2022-05-25 04:17:47 cloud:62] Re-using file from: /home/boris/.cache/torch/NeMo/NeMo_1.8.2/tts_melgan/38f156f172595e60f02169891e303590/tts_melgan.nemo
[NeMo I 2022-05-25 04:17:47 common:747] Instantiating model from pre-trained checkpoint


[NeMo W 2022-05-25 04:17:48 modelPT:148] If you intend to do training or fine-tuning, please call the ModelPT.setup_training_data() method and provide a valid configuration file to setup the train data loader.
    Train config : 
    dataset:
      _target_: nemo.collections.tts.data.datalayers.AudioDataset
      manifest_filepath: /raid/LJSpeech/nvidia_ljspeech_train.json
      max_duration: null
      min_duration: 0.75
      n_segments: 16384
      trim: false
    dataloader_params:
      drop_last: false
      shuffle: true
      batch_size: 64
      num_workers: 4
    
[NeMo W 2022-05-25 04:17:48 modelPT:155] If you intend to do validation, please call the ModelPT.setup_validation_data() or ModelPT.setup_multiple_validation_data() method and provide a valid configuration file to setup the validation data loader(s). 
    Validation config : 
    dataset:
      _target_: nemo.collections.tts.data.datalayers.AudioDataset
      manifest_filepath: /raid/LJSpeech/nvidia_ljspeech_val.jso

[NeMo I 2022-05-25 04:17:48 features:259] PADDING: 0
[NeMo I 2022-05-25 04:17:48 features:276] STFT using torch
[NeMo I 2022-05-25 04:17:48 features:278] STFT using exact pad
[NeMo I 2022-05-25 04:17:48 save_restore_connector:209] Model MelGanModel was successfully restored from /home/boris/.cache/torch/NeMo/NeMo_1.8.2/tts_melgan/38f156f172595e60f02169891e303590/tts_melgan.nemo.


In [46]:
import IPython.display as ipd

ipd.display(ipd.Audio(audio.to('cpu').detach().numpy(), rate=22050))

In [32]:
import pytorch_lightning as pl


trainer = pl.Trainer(devices=1, accelerator='gpu', max_epochs=100, check_val_every_n_epoch=5)

spec_generator.set_trainer(trainer)

trainer.fit(spec_generator)

GPU available: True, used: True
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]
[NeMo W 2022-05-25 04:04:03 modelPT:496] The lightning trainer received accelerator: <pytorch_lightning.accelerators.cpu.CPUAccelerator object at 0x7fc20846d8b0>. We recommend to use 'ddp' instead.


[NeMo I 2022-05-25 04:04:03 modelPT:587] Optimizer config = Adam (
    Parameter Group 0
        amsgrad: False
        betas: [0.9, 0.98]
        eps: 1e-08
        foreach: None
        lr: 0.1
        maximize: False
        weight_decay: 1e-06
    )
[NeMo I 2022-05-25 04:04:03 lr_scheduler:833] Scheduler "<nemo.core.optim.lr_scheduler.NoamAnnealing object at 0x7fc2082406a0>" 
    will be used during training (effective maximum steps = 13000) - 
    Parameters : 
    (warmup_steps: 1000
    last_epoch: -1
    d_model: 1
    max_steps: 13000
    )



   | Name               | Type                              | Params
--------------------------------------------------------------------------
0  | aligner            | AlignmentEncoder                  | 1.0 M 
1  | forward_sum_loss   | ForwardSumLoss                    | 0     
2  | bin_loss           | BinLoss                           | 0     
3  | encoder            | MixerTTSModule                    | 7.2 M 
4  | symbol_emb         | Embedding                         | 43.8 K
5  | duration_predictor | TemporalPredictor                 | 493 K 
6  | pitch_predictor    | TemporalPredictor                 | 493 K 
7  | pitch_emb          | Conv1d                            | 1.5 K 
8  | preprocessor       | AudioToMelSpectrogramPreprocessor | 0     
9  | decoder            | MixerTTSModule                    | 10.8 M
10 | proj               | Linear                            | 30.8 K
--------------------------------------------------------------------------
20.1 M    Trainable p

Sanity Checking: 0it [00:00, ?it/s]

      rank_zero_warn(
    
      rank_zero_warn(
    


Training: 0it [00:00, ?it/s]

Validation: 0it [00:00, ?it/s]

      rank_zero_warn("Detected KeyboardInterrupt, attempting graceful shutdown...")
    


In [29]:
from nemo.collections.common.callbacks import LogEpochTimeCallback
from nemo.collections.tts.models import MixerTTSModel
from nemo.core.config import hydra_runner
from nemo.utils.exp_manager import exp_manager


trainer = pl.Trainer(spec_gen_cfg['trainer'])
exp_manager(trainer, cfg.get('exp_manager', None))
model = MixerTTSModel(cfg=cfg.model, trainer=trainer)
trainer.callbacks.extend([pl.callbacks.LearningRateMonitor(), LogEpochTimeCallback()])  # noqa
trainer.fit(model)

      rank_zero_warn(
    
GPU available: True, used: False
TPU available: False, using: 0 TPU cores
IPU available: False, using: 0 IPUs
HPU available: False, using: 0 HPUs
      rank_zero_warn(
    
