# NeMo Models

In [1]:
# Import NeMo and ASR collection
import nemo
import nemo.collections.asr as nemo_asr
try:
    nf = nemo.core.NeuralModuleFactory()
except:
    print("GPU was not detected. Running on CPU")
    nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.CPU)    

[NeMo W 2020-05-28 17:08:17 audio_preprocessing:56] Could not import torchaudio. Some features might not work.
[NeMo W 2020-05-28 17:08:17 audio_preprocessing:61] Unable to import APEX. Mixed precision and distributed training will not work.


GPU was not detected. Running on CPU


## NeMoModel instantiation without pre-trained weights

A *NeMoModel* is a kind of NeuralModule which contains other neural modules inside it.
NeMoModel can have other NeuralModules inside and their mode, and topology of connections can
depend on the mode in which NeMo model is used (training or evaluation).

### Because NeMoModel is a NeuralModule, regular constructor-based initialization applies

In [2]:
#First, load the config from YAML file
from ruamel.yaml import YAML
yaml = YAML(typ="safe")
with open("../configs/jasper_an4.yaml") as file:
    model_definition = yaml.load(file)

In [3]:
quartznet_model1 = nemo.collections.asr.models.QuartzNet(
    preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
    encoder_params=model_definition['JasperEncoder'],
    decoder_params=model_definition['JasperDecoderForCTC'])
print(f"Created QuartzNet model with {quartznet_model1.num_weights} weights")

[NeMo I 2020-05-28 17:08:18 features:144] PADDING: 16
[NeMo I 2020-05-28 17:08:18 features:152] STFT using conv
[NeMo I 2020-05-28 17:08:18 neural_modules:438] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor0` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-28 17:08:18 neural_modules:438] Instantiated a new Neural Module named `jasperencoder0` of type `JasperEncoder`
[NeMo I 2020-05-28 17:08:18 neural_modules:438] Instantiated a new Neural Module named `jasperdecoderforctc0` of type `JasperDecoderForCTC`
Created QuartzNet model with 5771293 weights


### Because NeMoModel is a NeuralModule, regular config import/export work

In [4]:
quartznet_model1.export_to_config("qn1.yaml")

[NeMo I 2020-05-28 17:08:18 neural_modules:228] Configuration of module `quartznet0` (QuartzNet) exported to 'qn1.yaml'


In [5]:
quartznet_model2 = nemo.collections.asr.models.QuartzNet.import_from_config(config_file="qn1.yaml")
print(f"Created QuartzNet model with {quartznet_model2.num_weights} weights")

[NeMo I 2020-05-28 17:08:18 neural_modules:338] Loading configuration of a new Neural Module from the `qn1.yaml` file
[NeMo I 2020-05-28 17:08:18 features:144] PADDING: 16
[NeMo I 2020-05-28 17:08:18 features:152] STFT using conv
[NeMo I 2020-05-28 17:08:18 neural_modules:438] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor1` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-28 17:08:18 neural_modules:438] Instantiated a new Neural Module named `jasperencoder1` of type `JasperEncoder`
[NeMo I 2020-05-28 17:08:18 neural_modules:438] Instantiated a new Neural Module named `jasperdecoderforctc1` of type `JasperDecoderForCTC`
[NeMo I 2020-05-28 17:08:18 neural_modules:438] Instantiated a new Neural Module named `quartznet1` of type `QuartzNet`
Created QuartzNet model with 5771293 weights


## NeMoModel instantiation with pre-trained weights

In [6]:
# List all available models from NGC
for checkpoint in nemo.collections.asr.models.ASRConvCTCModel.list_pretrained_models():
    print(checkpoint.pretrained_model_name)

[NeMo W 2020-05-28 17:08:18 asrconvctcmodel:153] TODO: CHANGE ME TO GRAB STUFF FROM NGC


QuartzNet15x5-En
QuartzNet15x5-Zh
JasperNet10x5-En


In [7]:
# Automagically go to NGC and instantiate a model and weights
quartznet_model3 = nemo_asr.models.QuartzNet.from_pretrained(model_info="QuartzNet15x5-En")
print(f"Created QuartzNet model with {quartznet_model3.num_weights} weights")

[NeMo W 2020-05-28 17:08:18 asrconvctcmodel:253] TODO: CHANGE ME TO GRAB STUFF FROM NGC


[NeMo I 2020-05-28 17:08:18 helpers:158] Found existing object /Users/okuchaiev/.cache/torch/NeMo/NEMO_0.11.0b1/QuartzNet15x5-En-Base.nemo.
[NeMo I 2020-05-28 17:08:18 helpers:164] Re-using file from: /Users/okuchaiev/.cache/torch/NeMo/NEMO_0.11.0b1/QuartzNet15x5-En-Base.nemo
[NeMo I 2020-05-28 17:08:18 asrconvctcmodel:220] Instantiating model from pre-trained checkpoint
[NeMo I 2020-05-28 17:08:19 neural_modules:338] Loading configuration of a new Neural Module from the `XDWRZPKBE8M0KPL5/.nemo_tmp/module.yaml` file
[NeMo I 2020-05-28 17:08:19 features:144] PADDING: 16
[NeMo I 2020-05-28 17:08:19 features:152] STFT using conv
[NeMo I 2020-05-28 17:08:19 neural_modules:438] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor2` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-28 17:08:19 neural_modules:438] Instantiated a new Neural Module named `jasperencoder2` of type `JasperEncoder`
[NeMo I 2020-05-28 17:08:19 neural_modules:438] Instantiated a new Neu

# Export model to ".nemo" format

## Export to ".nemo" file - all params, structure and weights

In [8]:
quartznet_model3.save_to('quartznet.nemo')

[NeMo I 2020-05-28 17:08:19 neural_modules:228] Configuration of module `quartznet2` (QuartzNet) exported to '.3T2NGRER8B4CDN97/.nemo_tmp/module.yaml'
[NeMo I 2020-05-28 17:08:19 neural_graph:480] Configuration of graph `neuralgraph4` (NeuralGraph) exported to '.3T2NGRER8B4CDN97/.nemo_tmp/train_graph.yaml'
[NeMo I 2020-05-28 17:08:19 neural_graph:480] Configuration of graph `neuralgraph5` (NeuralGraph) exported to '.3T2NGRER8B4CDN97/.nemo_tmp/eval_graph.yaml'
[NeMo I 2020-05-28 17:08:22 nemo_model:158] Exported model QuartzNet to quartznet.nemo


In [9]:
quartznet_model4 = nemo_asr.models.QuartzNet.from_pretrained(model_info='quartznet.nemo')

[NeMo I 2020-05-28 17:08:22 neural_modules:338] Loading configuration of a new Neural Module from the `3H31D39130JAIE5J/.nemo_tmp/module.yaml` file
[NeMo I 2020-05-28 17:08:22 features:144] PADDING: 16
[NeMo I 2020-05-28 17:08:22 features:152] STFT using conv
[NeMo I 2020-05-28 17:08:23 neural_modules:438] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor3` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-28 17:08:23 neural_modules:438] Instantiated a new Neural Module named `jasperencoder3` of type `JasperEncoder`
[NeMo I 2020-05-28 17:08:23 neural_modules:438] Instantiated a new Neural Module named `jasperdecoderforctc3` of type `JasperDecoderForCTC`
[NeMo I 2020-05-28 17:08:23 neural_modules:438] Instantiated a new Neural Module named `spectrogramaugmentation1` of type `SpectrogramAugmentation`
[NeMo I 2020-05-28 17:08:23 neural_modules:438] Instantiated a new Neural Module named `quartznet3` of type `QuartzNet`


In [10]:
# ".nemo" file is just an arxiv with all of the model's details and weights
! mv quartznet.nemo quartznet.tar.gz
! tar -xvf quartznet.tar.gz

x .nemo_tmp/
x .nemo_tmp/JasperDecoderForCTC.pt
x .nemo_tmp/JasperEncoder.pt
x .nemo_tmp/eval_graph.yaml
x .nemo_tmp/header.content
x .nemo_tmp/module.yaml
x .nemo_tmp/train_graph.yaml


## Export to ".nemo" file - for deployment with NVIDIA Jarvis

In [None]:
quartznet_model3.save_to('quartznet_for_Jarvis.nemo', optimize_for_deployment=True)

[NeMo I 2020-05-28 17:08:24 neural_modules:228] Configuration of module `quartznet2` (QuartzNet) exported to '.ATEC77VH4A9N853Z/.nemo_tmp/module.yaml'
[NeMo I 2020-05-28 17:08:24 neural_graph:480] Configuration of graph `neuralgraph5` (NeuralGraph) exported to '.ATEC77VH4A9N853Z/.nemo_tmp/eval_graph.yaml'


[NeMo W 2020-05-28 17:08:24 nemo_model:150] Did not convert AudioToMelSpectrogramPreprocessor to .onnx
[NeMo W 2020-05-28 17:08:24 nemo_model:150] Did not convert SpectrogramAugmentation to .onnx
[NeMo W 2020-05-28 17:08:24 jasper:147] Turned off 170 masked convolutions


'AudioToMelSpectrogramPreprocessor' object has no attribute 'eval'
'SpectrogramAugmentation' object has no attribute 'eval'


[NeMo W 2020-05-28 17:08:25 deprecated:68] Function ``local_parameters`` is deprecated. It is going to be removed in the 0.11 version.


In [None]:
# ".nemo" file optimized for deployment will only contain eval structure and .onnx files
! mv quartznet_for_Jarvis.nemo quartznet_for_Jarvis.nemo.tar.gz
! tar -xvf quartznet_for_Jarvis.nemo.tar.gz

## NeMoModels can be used just as any other Neural Module

In [None]:
# Change these to point to your training data
train_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_train.json"
val_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_val.json"
labels = model_definition['labels']
data_layer = nemo_asr.AudioToTextDataLayer(manifest_filepath=train_manifest, labels=labels, batch_size=16)
data_layerE = nemo_asr.AudioToTextDataLayer(manifest_filepath=val_manifest, labels=labels, batch_size=16)
ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
greedy_decoder = nemo_asr.GreedyCTCDecoder()

In [None]:
audio_signal, audio_signal_len, transcript, transcript_len = data_layer()
log_probs, encoded_len = quartznet_model4(input_signal=audio_signal, length=audio_signal_len)
predictions = greedy_decoder(log_probs=log_probs)
loss = ctc_loss(log_probs=log_probs, targets=transcript,
                input_length=encoded_len, target_length=transcript_len)

In [None]:
# START TRAINING 
tensors_to_evaluate=[predictions, transcript, transcript_len]
from functools import partial
from nemo.collections.asr.helpers import monitor_asr_train_progress
train_callback = nemo.core.SimpleLossLoggerCallback(
    tensors=[loss]+tensors_to_evaluate,
    print_func=partial(monitor_asr_train_progress, labels=labels))
nf.train(tensors_to_optimize=[loss],
                callbacks=[train_callback],
                optimizer="novograd",
                optimization_params={"num_epochs": 30, "lr": 1e-2,
                                    "weight_decay": 1e-3})