# NeMo Models

In [1]:
# Import NeMo and ASR collection
import nemo
import nemo.collections.asr as nemo_asr
nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.CPU)

[NeMo W 2020-05-20 14:43:03 audio_preprocessing:56] Could not import torchaudio. Some features might not work.
[NeMo W 2020-05-20 14:43:03 audio_preprocessing:61] Unable to import APEX. Mixed precision and distributed training will not work.


A *NeMoModel* is a kind of NeuralModule which contains other neural modules inside it.
NeMoModel can have other NeuralModules inside and their mode, and topology of connections can
depend on the mode (training, inference, etc.).

## I want an ASR model to serve with Jarvis. What do I do?

In [2]:
# Check what's available on NGC 
for checkpoint in nemo.collections.asr.models.QuartzNet.list_pretrained_models():
    print(checkpoint.pretrained_model_name)    

[NeMo W 2020-05-20 14:43:03 quartznet:150] THIS METHOD IS NOT DONE YET


QuartzNet15x5-En-BASE
QuartzNet15x5-Zh-BASE
Jasper10x5-En-BASE
ContextNet21x5-En-BASE


In [3]:
# Download the one I want from NGC
pre_trained_qn_model = nemo_asr.models.QuartzNet.from_pretrained(model_info="QuartzNet15x5-En-BASE")

[NeMo W 2020-05-20 14:43:04 quartznet:183] THIS METHOD IS NOT YET FINISHED


[NeMo I 2020-05-20 14:43:04 helpers:157] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperEncoder-STEP-243800.pt. Re-using
[NeMo I 2020-05-20 14:43:04 helpers:157] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-05-20 14:43:04 helpers:157] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-05-20 14:43:04 helpers:157] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/qn.yaml. Re-using
[NeMo I 2020-05-20 14:43:04 quartznet:198] Instantiating model from pre-trained checkpoint
[NeMo I 2020-05-20 14:43:04 neural_modules:341] Loading configuration of a new Neural Module from the `/Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/qn.yaml` file
[NeMo I 2020-05-20 14:43:05 features:144] PADDING: 16
[NeMo I 2020-0

In [4]:
type(pre_trained_qn_model)

nemo.collections.asr.models.quartznet.QuartzNet

In [5]:
# Export it as ".nemo" file
pre_trained_qn_model.export("asr.nemo", optimize_for_deployment=True)

[NeMo I 2020-05-15 14:43:38 neural_modules:232] Configuration of module `quartznet0` (QuartzNet) exported to .PYS964LHI7ILTPT8/.nemo_tmp/QuartzNet.yaml
[NeMo I 2020-05-15 14:43:38 neural_graph:480] Configuration of graph `neuralgraph1` (NeuralGraph) exported to .PYS964LHI7ILTPT8/.nemo_tmp/QuartzNet_eval_graph.yaml


[NeMo W 2020-05-15 14:43:38 nemo_model:135] Did not convert AudioToMelSpectrogramPreprocessor to .onnx
[NeMo W 2020-05-15 14:43:38 jasper:134] Turned off 12 masked convolutions


'AudioToMelSpectrogramPreprocessor' object has no attribute 'eval'


[NeMo W 2020-05-15 14:43:39 deprecated:68] Function ``local_parameters`` is deprecated. It is going to be removed in the 0.11 version.
  'Automatically generated names will be applied to each dynamic axes of input {}'.format(key))
  'Automatically generated names will be applied to each dynamic axes of input {}'.format(key))
  'Automatically generated names will be applied to each dynamic axes of input {}'.format(key))
  'Automatically generated names will be applied to each dynamic axes of input {}'.format(key))


[NeMo I 2020-05-15 14:43:44 nemo_model:143] Exported model QuartzNet to asr.nemo


In [7]:
# ".nemo" file is just a file with Modules in .onnx format and evaluation graph structure
! mv asr.nemo asr.tar.gz
! tar -xvf asr.tar.gz

mv: asr.nemo: No such file or directory
x .nemo_tmp/
x .nemo_tmp/JasperDecoderForCTC.onnx
x .nemo_tmp/JasperEncoder.onnx
x .nemo_tmp/QuartzNet.yaml
x .nemo_tmp/QuartzNet_eval_graph.yaml


## NeMoModel instantiation - method 1

### Because NeMoModel is a NeuralModule, regular constructor-based initialization applies

In [8]:
#First, load the config from YAML file
from ruamel.yaml import YAML
yaml = YAML(typ="safe")
with open("configs/jasper_an4.yaml") as file:
    model_definition = yaml.load(file)

In [9]:
qn_model = nemo.collections.asr.models.QuartzNet(
            preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
            encoder_params=model_definition['JasperEncoder'],
            decoder_params=model_definition['JasperDecoderForCTC'],
        )
print(qn_model.num_weights)
print(qn_model.input_ports)

[NeMo I 2020-05-15 14:52:28 features:144] PADDING: 16
[NeMo I 2020-05-15 14:52:28 features:152] STFT using conv
[NeMo I 2020-05-15 14:52:28 neural_modules:442] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor1` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-15 14:52:29 neural_modules:442] Instantiated a new Neural Module named `jasperencoder1` of type `JasperEncoder`
[NeMo I 2020-05-15 14:52:29 neural_modules:442] Instantiated a new Neural Module named `jasperdecoderforctc1` of type `JasperDecoderForCTC`
5771293
{'input_signal': <nemo.core.neural_types.neural_type.NeuralType object at 0x142194390>, 'length': <nemo.core.neural_types.neural_type.NeuralType object at 0x142184650>}


### Because NeMoModel is a NeuralModule, regular config import/export work

In [10]:
qn_model.export_to_config("qn.yaml")
qn_model2 = nemo.collections.asr.models.QuartzNet.import_from_config(config_file="qn.yaml")
print(qn_model2.num_weights)
print(qn_model.num_weights)

[NeMo I 2020-05-15 14:52:42 neural_modules:232] Configuration of module `quartznet1` (QuartzNet) exported to qn.yaml
[NeMo I 2020-05-15 14:52:42 neural_modules:342] Loading configuration of a new Neural Module from the `qn.yaml` file
[NeMo I 2020-05-15 14:52:42 features:144] PADDING: 16
[NeMo I 2020-05-15 14:52:42 features:152] STFT using conv
[NeMo I 2020-05-15 14:52:42 neural_modules:442] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor2` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-15 14:52:42 neural_modules:442] Instantiated a new Neural Module named `jasperencoder2` of type `JasperEncoder`
[NeMo I 2020-05-15 14:52:42 neural_modules:442] Instantiated a new Neural Module named `jasperdecoderforctc2` of type `JasperDecoderForCTC`
[NeMo I 2020-05-15 14:52:42 neural_modules:442] Instantiated a new Neural Module named `quartznet2` of type `QuartzNet`
5771293
5771293


## NeMoModel instantiation - method 2

In [11]:
# List all available models from NGC
for checkpoint in nemo.collections.asr.models.QuartzNet.list_pretrained_models():
    print(checkpoint.pretrained_model_name)

[NeMo W 2020-05-15 14:52:53 quartznet:151] THIS METHOD IS NOT DONE YET


QuartzNet15x5-En-BASE
QuartzNet15x5-Zh-BASE
Jasper10x5-En-BASE
ContextNet21x5-En-BASE


In [12]:
# Automagically go to NGC and instantiate a model and weights
pre_trained_qn_model = nemo_asr.models.QuartzNet.from_pretrained(model_info="QuartzNet15x5-En-BASE")

[NeMo W 2020-05-15 14:52:59 quartznet:184] THIS METHOD IS NOT YET FINISHED


[NeMo I 2020-05-15 14:52:59 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperEncoder-STEP-243800.pt. Re-using
[NeMo I 2020-05-15 14:52:59 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-05-15 14:52:59 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-05-15 14:52:59 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/qn.yaml. Re-using
[NeMo I 2020-05-15 14:52:59 quartznet:199] Instantiating model from pre-trained checkpoint
[NeMo I 2020-05-15 14:52:59 neural_modules:342] Loading configuration of a new Neural Module from the `/Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/qn.yaml` file
[NeMo I 2020-05-15 14:52:59 features:144] PADDING: 16
[NeMo I 2020-0

# Export model to ".nemo" format

## Export to ".nemo" file

In [14]:
pre_trained_qn_model.export('quartznet.nemo', optimize_for_deployment=False)

[NeMo I 2020-05-15 14:53:57 neural_modules:232] Configuration of module `quartznet3` (QuartzNet) exported to .64G9SZNAIUY6YTJF/.nemo_tmp/QuartzNet.yaml
[NeMo I 2020-05-15 14:53:57 neural_graph:480] Configuration of graph `neuralgraph6` (NeuralGraph) exported to .64G9SZNAIUY6YTJF/.nemo_tmp/QuartzNet_train_graph.yaml
[NeMo I 2020-05-15 14:53:57 neural_graph:480] Configuration of graph `neuralgraph7` (NeuralGraph) exported to .64G9SZNAIUY6YTJF/.nemo_tmp/QuartzNet_eval_graph.yaml
[NeMo I 2020-05-15 14:53:58 nemo_model:143] Exported model QuartzNet to quartznet.nemo


In [15]:
new_instance = nemo_asr.models.QuartzNet.from_pretrained(model_info='quartznet.nemo')

[NeMo W 2020-05-15 14:54:02 quartznet:184] THIS METHOD IS NOT YET FINISHED


[NeMo I 2020-05-15 14:54:02 neural_modules:342] Loading configuration of a new Neural Module from the `UGJB1452F2HA799B/.nemo_tmp/QuartzNet.yaml` file
[NeMo I 2020-05-15 14:54:02 features:144] PADDING: 16
[NeMo I 2020-05-15 14:54:02 features:152] STFT using conv
[NeMo I 2020-05-15 14:54:02 neural_modules:442] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor0` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-15 14:54:02 neural_modules:442] Instantiated a new Neural Module named `jasperencoder0` of type `JasperEncoder`
[NeMo I 2020-05-15 14:54:02 neural_modules:442] Instantiated a new Neural Module named `jasperdecoderforctc0` of type `JasperDecoderForCTC`
[NeMo I 2020-05-15 14:54:02 neural_modules:442] Instantiated a new Neural Module named `quartznet0` of type `QuartzNet`


In [16]:
# ".nemo" file is just a file with Modules in .onnx format and evaluation graph structure
! mv quartznet.nemo quartznet.tar.gz
! tar -xvf quartznet.tar.gz

x .nemo_tmp/
x .nemo_tmp/JasperDecoderForCTC.pt
x .nemo_tmp/JasperEncoder.pt
x .nemo_tmp/QuartzNet.yaml
x .nemo_tmp/QuartzNet_eval_graph.yaml
x .nemo_tmp/QuartzNet_train_graph.yaml


In [None]:
# nemo.core.DeploymentFormat.

## NeMoModels can be used just as any other Neural Module

In [17]:
train_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_train.json"
val_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_val.json"
labels = model_definition['labels']
data_layer = nemo_asr.AudioToTextDataLayer(manifest_filepath=train_manifest, labels=labels, batch_size=16)
data_layerE = nemo_asr.AudioToTextDataLayer(manifest_filepath=val_manifest, labels=labels, batch_size=16)
ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
greedy_decoder = nemo_asr.GreedyCTCDecoder()

[NeMo I 2020-05-15 14:58:38 collections:154] Dataset loaded with 897 files totalling 1.39 hours
[NeMo I 2020-05-15 14:58:38 collections:155] 0 files were filtered totalling 0.00 hours
[NeMo I 2020-05-15 14:58:38 collections:154] Dataset loaded with 130 files totalling 0.20 hours
[NeMo I 2020-05-15 14:58:38 collections:155] 0 files were filtered totalling 0.00 hours


In [18]:
audio_signal, audio_signal_len, transcript, transcript_len = data_layer()
log_probs, encoded_len = pre_trained_qn_model(input_signal=audio_signal, length=audio_signal_len)
predictions = greedy_decoder(log_probs=log_probs)
loss = ctc_loss(log_probs=log_probs, targets=transcript,
                input_length=encoded_len, target_length=transcript_len)

In [None]:
# START TRAINING 
tensors_to_evaluate=[predictions, transcript, transcript_len]
from functools import partial
from nemo.collections.asr.helpers import monitor_asr_train_progress
train_callback = nemo.core.SimpleLossLoggerCallback(
    tensors=[loss]+tensors_to_evaluate,
    print_func=partial(monitor_asr_train_progress, labels=labels))
nf.train(tensors_to_optimize=[loss],
                callbacks=[train_callback],
                optimizer="novograd",
                optimization_params={"num_epochs": 30, "lr": 1e-2,
                                    "weight_decay": 1e-3})

## OTHER

In [None]:
res=pre_trained_qn_model(audio_file='myaudio.wav')
pre_trained_qn_model.transcribe_from_microphone()