# NeMo Models

In [1]:
# Import NeMo and ASR collection
import nemo
import nemo.collections.asr as nemo_asr
nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.CPU)

[NeMo W 2020-05-20 23:02:28 audio_preprocessing:56] Could not import torchaudio. Some features might not work.
[NeMo W 2020-05-20 23:02:28 audio_preprocessing:61] Unable to import APEX. Mixed precision and distributed training will not work.


A *NeMoModel* is a kind of NeuralModule which contains other neural modules inside it.
NeMoModel can have other NeuralModules inside and their mode, and topology of connections can
depend on the mode (training, inference, etc.).

## I want an ASR model to serve with Jarvis. What do I do?

In [2]:
# Check what's available on NGC 
for checkpoint in nemo.collections.asr.models.QuartzNet.list_pretrained_models():
    print(checkpoint.pretrained_model_name)    

[NeMo W 2020-05-20 23:02:33 asrconvctcmodel:148] TODO: CHANGE ME TO GRAB STUFF FROM NGC


QuartzNet15x5-En-BASE
QuartzNet15x5-Zh-BASE
JasperNet10x5-En-Base


In [3]:
# Download the one I want from NGC
pre_trained_qn_model = nemo_asr.models.QuartzNet.from_pretrained(model_info="QuartzNet15x5-En-BASE")

[NeMo W 2020-05-20 23:02:40 asrconvctcmodel:148] TODO: CHANGE ME TO GRAB STUFF FROM NGC


[NeMo I 2020-05-20 23:02:40 helpers:158] Found existing object /Users/okuchaiev/.cache/torch/NeMo/NEMO_0.11.0b0/QuartzNet15x5-En-Base.nemo.
[NeMo I 2020-05-20 23:02:40 helpers:164] Re-using file from: /Users/okuchaiev/.cache/torch/NeMo/NEMO_0.11.0b0/QuartzNet15x5-En-Base.nemo
[NeMo I 2020-05-20 23:02:40 asrconvctcmodel:193] Instantiating model from pre-trained checkpoint
[NeMo I 2020-05-20 23:02:41 neural_modules:341] Loading configuration of a new Neural Module from the `JRDSTDVB33OSBXTE/.nemo_tmp/module.yaml` file
[NeMo I 2020-05-20 23:02:41 features:144] PADDING: 16
[NeMo I 2020-05-20 23:02:41 features:152] STFT using conv
[NeMo I 2020-05-20 23:02:41 neural_modules:441] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor0` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-20 23:02:41 neural_modules:441] Instantiated a new Neural Module named `jasperencoder0` of type `JasperEncoder`
[NeMo I 2020-05-20 23:02:41 neural_modules:441] Instantiated a new Neu

In [4]:
type(pre_trained_qn_model)

nemo.collections.asr.models.asrconvctcmodel.QuartzNet

In [None]:
# Export it as ".nemo" file
pre_trained_qn_model.export("asr.nemo", optimize_for_deployment=True)

In [None]:
# ".nemo" file is just a file with Modules in .onnx format and evaluation graph structure
! mv asr.nemo asr.tar.gz
! tar -xvf asr.tar.gz

## NeMoModel instantiation - method 1

### Because NeMoModel is a NeuralModule, regular constructor-based initialization applies

In [None]:
#First, load the config from YAML file
from ruamel.yaml import YAML
yaml = YAML(typ="safe")
with open("configs/jasper_an4.yaml") as file:
    model_definition = yaml.load(file)

In [None]:
qn_model = nemo.collections.asr.models.QuartzNet(
            preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
            encoder_params=model_definition['JasperEncoder'],
            decoder_params=model_definition['JasperDecoderForCTC'],
        )
print(qn_model.num_weights)
print(qn_model.input_ports)

### Because NeMoModel is a NeuralModule, regular config import/export work

In [None]:
qn_model.export_to_config("qn.yaml")
qn_model2 = nemo.collections.asr.models.QuartzNet.import_from_config(config_file="qn.yaml")
print(qn_model2.num_weights)
print(qn_model.num_weights)

## NeMoModel instantiation - method 2

In [None]:
# List all available models from NGC
for checkpoint in nemo.collections.asr.models.QuartzNet.list_pretrained_models():
    print(checkpoint.pretrained_model_name)

In [None]:
# Automagically go to NGC and instantiate a model and weights
pre_trained_qn_model = nemo_asr.models.QuartzNet.from_pretrained(model_info="QuartzNet15x5-En-BASE")

# Export model to ".nemo" format

## Export to ".nemo" file

In [None]:
pre_trained_qn_model.export('quartznet.nemo', optimize_for_deployment=False)

In [None]:
new_instance = nemo_asr.models.QuartzNet.from_pretrained(model_info='quartznet.nemo')

In [None]:
# ".nemo" file is just a file with Modules in .onnx format and evaluation graph structure
! mv quartznet.nemo quartznet.tar.gz
! tar -xvf quartznet.tar.gz

In [None]:
# nemo.core.DeploymentFormat.

## NeMoModels can be used just as any other Neural Module

In [None]:
train_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_train.json"
val_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_val.json"
labels = model_definition['labels']
data_layer = nemo_asr.AudioToTextDataLayer(manifest_filepath=train_manifest, labels=labels, batch_size=16)
data_layerE = nemo_asr.AudioToTextDataLayer(manifest_filepath=val_manifest, labels=labels, batch_size=16)
ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
greedy_decoder = nemo_asr.GreedyCTCDecoder()

In [None]:
audio_signal, audio_signal_len, transcript, transcript_len = data_layer()
log_probs, encoded_len = pre_trained_qn_model(input_signal=audio_signal, length=audio_signal_len)
predictions = greedy_decoder(log_probs=log_probs)
loss = ctc_loss(log_probs=log_probs, targets=transcript,
                input_length=encoded_len, target_length=transcript_len)

In [None]:
# START TRAINING 
tensors_to_evaluate=[predictions, transcript, transcript_len]
from functools import partial
from nemo.collections.asr.helpers import monitor_asr_train_progress
train_callback = nemo.core.SimpleLossLoggerCallback(
    tensors=[loss]+tensors_to_evaluate,
    print_func=partial(monitor_asr_train_progress, labels=labels))
nf.train(tensors_to_optimize=[loss],
                callbacks=[train_callback],
                optimizer="novograd",
                optimization_params={"num_epochs": 30, "lr": 1e-2,
                                    "weight_decay": 1e-3})

## OTHER

In [None]:
res=pre_trained_qn_model(audio_file='myaudio.wav')
pre_trained_qn_model.transcribe_from_microphone()