In [2]:
# Import NeMo and ASR collection
import nemo
import nemo.collections.asr as nemo_asr
nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.CPU)

# NeMo Models and JarvisModels

A *NeMoModel* is a kind of NeuralModule which contains other neural modules inside it.
NeMoModel can have other NeuralModules inside and their mode, and topology of connections can
depend on the mode (training, inference, etc.).

A *JarvisModel* is a kind of NeMo model which can be exported into ".jarvis" file for a deployment inside a Jarvis service

## NeMoModel instantiation - method 1

### Because NeMoModel is a NeuralModule, regular constructor-based initialization applies

In [3]:
#First, load the config from YAML file
from ruamel.yaml import YAML
yaml = YAML(typ="safe")
with open("configs/jasper_an4.yaml") as file:
    model_definition = yaml.load(file)

In [4]:
qn_model = nemo.collections.asr.models.QuartzNet(
            preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
            encoder_params=model_definition['JasperEncoder'],
            decoder_params=model_definition['JasperDecoderForCTC'],
        )
print(qn_model.num_weights)

[NeMo I 2020-04-13 16:53:11 features:144] PADDING: 16
[NeMo I 2020-04-13 16:53:11 features:152] STFT using conv
5771293


### Because NeMoModel is a NeuralModule, regular config import/export work

In [5]:
qn_model.export_to_config("qn.yaml")
qn_model2 = nemo.collections.asr.models.QuartzNet.import_from_config(config_file="qn.yaml")
print(qn_model2.num_weights)

[NeMo I 2020-04-13 16:53:13 neural_modules:267] Configuration of module 870976d9-552b-47b6-ba3d-7de944190c2d (QuartzNet) exported to qn.yaml
[NeMo I 2020-04-13 16:53:13 features:144] PADDING: 16
[NeMo I 2020-04-13 16:53:13 features:152] STFT using conv
[NeMo I 2020-04-13 16:53:13 neural_modules:368] Instantiated a new Neural Module of type `QuartzNet` using configuration loaded from the `qn.yaml` file
5771293


## NeMoModel instantiation - method 2

In [6]:
for checkpoint in nemo.collections.asr.models.QuartzNet.list_pretrained_models():
    print(checkpoint.pretrained_model_name)

[NeMo W 2020-04-13 16:53:17 quartznet:108] THIS METHOD IS NOT DONE YET


QuartzNet15x5-En-BASE
QuartzNet15x5-Zh-BASE


In [7]:
# WHAT happened "under-the-hood" - it went to NGC and downloaded configs and checkpoints and restored from it
pre_trained_qn_model = nemo_asr.models.QuartzNet.from_pretrained(model_info="QuartzNet15x5-En-BASE")

[NeMo W 2020-04-13 16:53:20 quartznet:134] THIS METHOD IS NOT DONE YET


[NeMo I 2020-04-13 16:53:20 helpers:158] Downloading from: https://api.ngc.nvidia.com/v2/models/nvidia/multidataset_quartznet15x5/versions/1/files/JasperEncoder-STEP-243800.pt to /Users/okuchaiev/.nemo_files/NEMO_0.11.0b0/QuartzNet15x5-En-BASE
[NeMo I 2020-04-13 16:53:56 helpers:158] Downloading from: https://api.ngc.nvidia.com/v2/models/nvidia/multidataset_quartznet15x5/versions/1/files/JasperDecoderForCTC-STEP-243800.pt to /Users/okuchaiev/.nemo_files/NEMO_0.11.0b0/QuartzNet15x5-En-BASE
[NeMo I 2020-04-13 16:53:57 helpers:154] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.11.0b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-04-13 16:53:57 helpers:158] Downloading from: https://nemo-public.s3.us-east-2.amazonaws.com/qn.yaml to /Users/okuchaiev/.nemo_files/NEMO_0.11.0b0/QuartzNet15x5-En-BASE
[NeMo I 2020-04-13 16:53:57 quartznet:152] Instantiating model from pre-trained checkpoint
[NeMo I 2020-04-13 16:53:57 features:144] PADDING: 16
[NeMo 

In [8]:
print(pre_trained_qn_model.num_weights)

5771293


## JarvisModel - can be easily exported to ".jarvis" file

In [9]:
nf.deployment_export(module=pre_trained_qn_model, d_format=nemo.core.DeploymentFormat.JARVIS, output="qn.jarvis")

[NeMo I 2020-04-13 16:54:02 neural_factory:620] Exporting model to Jarvis.


[NeMo W 2020-04-13 16:54:02 quartznet:158] THIS METHOD IS NOT DONE YET


[NeMo I 2020-04-13 16:54:02 neural_factory:622] Exported to qn.jarvis


## NeMoModels can be used just as any other Neural Module

In [10]:
train_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_train.json"
val_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_val.json"
labels = model_definition['labels']
data_layer = nemo_asr.AudioToTextDataLayer(manifest_filepath=train_manifest, labels=labels, batch_size=16)
ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
greedy_decoder = nemo_asr.GreedyCTCDecoder()

[NeMo I 2020-04-13 16:54:04 collections:142] Dataset loaded with 897 files totalling 1.39 hours
[NeMo I 2020-04-13 16:54:04 collections:143] 0 files were filtered totalling 0.00 hours


In [11]:
audio_signal, audio_signal_len, transcript, transcript_len = data_layer()
log_probs, encoded_len = pre_trained_qn_model(audio_signal=audio_signal, a_sig_length=audio_signal_len)
predictions = greedy_decoder(log_probs=log_probs)
loss = ctc_loss(log_probs=log_probs, targets=transcript,
                input_length=encoded_len, target_length=transcript_len)

In [None]:
# START TRAINING 
tensors_to_evaluate=[predictions, transcript, transcript_len]
from functools import partial
from nemo.collections.asr.helpers import monitor_asr_train_progress
train_callback = nemo.core.SimpleLossLoggerCallback(
    tensors=[loss]+tensors_to_evaluate,
    print_func=partial(monitor_asr_train_progress, labels=labels))
nf.train(tensors_to_optimize=[loss],
                callbacks=[train_callback],
                optimizer="novograd",
                optimization_params={"num_epochs": 30, "lr": 1e-2,
                                    "weight_decay": 1e-3})