# NeMo Models and JarvisModels

In [1]:
# Import NeMo and ASR collection
import nemo
import nemo.collections.asr as nemo_asr
nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.CPU)



A *NeMoModel* is a kind of NeuralModule which contains other neural modules inside it.
NeMoModel can have other NeuralModules inside and their mode, and topology of connections can
depend on the mode (training, inference, etc.).

## NeMoModel instantiation - method 1

### Because NeMoModel is a NeuralModule, regular constructor-based initialization applies

In [2]:
#First, load the config from YAML file
from ruamel.yaml import YAML
yaml = YAML(typ="safe")
with open("configs/jasper_an4.yaml") as file:
    model_definition = yaml.load(file)

In [3]:
qn_model = nemo.collections.asr.models.QuartzNet(
            preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
            encoder_params=model_definition['JasperEncoder'],
            decoder_params=model_definition['JasperDecoderForCTC'],
        )
print(qn_model.num_weights)
print(qn_model.input_ports)

[NeMo I 2020-05-13 17:01:45 features:144] PADDING: 16
[NeMo I 2020-05-13 17:01:45 features:152] STFT using conv
[NeMo I 2020-05-13 17:01:45 neural_modules:442] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor0` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-13 17:01:45 neural_modules:442] Instantiated a new Neural Module named `jasperencoder0` of type `JasperEncoder`
[NeMo I 2020-05-13 17:01:45 neural_modules:442] Instantiated a new Neural Module named `jasperdecoderforctc0` of type `JasperDecoderForCTC`
5771293
{'input_signal': <nemo.core.neural_types.neural_type.NeuralType object at 0x139a36f50>, 'length': <nemo.core.neural_types.neural_type.NeuralType object at 0x139a47490>}


### Because NeMoModel is a NeuralModule, regular config import/export work

In [4]:
qn_model.export_to_config("qn.yaml")
qn_model2 = nemo.collections.asr.models.QuartzNet.import_from_config(config_file="qn.yaml")
print(qn_model2.num_weights)

[NeMo I 2020-05-13 17:01:48 neural_modules:232] Configuration of module `quartznet0` (QuartzNet) exported to qn.yaml
[NeMo I 2020-05-13 17:01:48 neural_modules:342] Loading configuration of a new Neural Module from the `qn.yaml` file
[NeMo I 2020-05-13 17:01:49 features:144] PADDING: 16
[NeMo I 2020-05-13 17:01:49 features:152] STFT using conv
[NeMo I 2020-05-13 17:01:49 neural_modules:442] Instantiated a new Neural Module named `audiotomelspectrogrampreprocessor1` of type `AudioToMelSpectrogramPreprocessor`
[NeMo I 2020-05-13 17:01:49 neural_modules:442] Instantiated a new Neural Module named `jasperencoder1` of type `JasperEncoder`
[NeMo I 2020-05-13 17:01:49 neural_modules:442] Instantiated a new Neural Module named `jasperdecoderforctc1` of type `JasperDecoderForCTC`
[NeMo I 2020-05-13 17:01:49 neural_modules:442] Instantiated a new Neural Module named `quartznet1` of type `QuartzNet`
5771293


## NeMoModel instantiation - method 2

In [5]:
# List all available models from NGC
for checkpoint in nemo.collections.asr.models.QuartzNet.list_pretrained_models():
    print(checkpoint.pretrained_model_name)

[NeMo W 2020-05-13 17:01:51 quartznet:151] THIS METHOD IS NOT DONE YET


QuartzNet15x5-En-BASE
QuartzNet15x5-Zh-BASE


In [6]:
# Automagically go to NGC and instantiate a model and weights
pre_trained_qn_model = nemo_asr.models.QuartzNet.from_pretrained(model_info="QuartzNet15x5-En-BASE")

[NeMo W 2020-05-13 17:01:54 quartznet:174] THIS METHOD IS NOT YET FINISHED


[NeMo I 2020-05-13 17:01:54 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperEncoder-STEP-243800.pt. Re-using
[NeMo I 2020-05-13 17:01:54 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-05-13 17:01:54 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-05-13 17:01:54 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/qn.yaml. Re-using
[NeMo I 2020-05-13 17:01:54 quartznet:189] Instantiating model from pre-trained checkpoint
[NeMo I 2020-05-13 17:01:54 neural_modules:342] Loading configuration of a new Neural Module from the `/Users/okuchaiev/.nemo_files/NEMO_0.10.2b0/QuartzNet15x5-En-BASE/qn.yaml` file
[NeMo I 2020-05-13 17:01:54 features:144] PADDING: 16
[NeMo I 2020-0

# Export model to ".nemo" format

## Export to ".nemo" file

In [None]:
pre_trained_qn_model.export('quartznet.nemo', optimize_for_deployment=True)

In [None]:
new_instance = nemo_asr.models.QuartzNet.from_pretrained(model_info='aaaa.nemo')

".nemo" file is an arxiv which contains the following:

* weights per module
* hyperparameters (e.g. constructor arguments) for all modules
* topology (e.g. NeuralGraph) description for inference and for trainig modes

## NeMoModels can be used just as any other Neural Module

In [None]:
train_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_train.json"
val_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_val.json"
labels = model_definition['labels']
data_layer = nemo_asr.AudioToTextDataLayer(manifest_filepath=train_manifest, labels=labels, batch_size=16)
data_layerE = nemo_asr.AudioToTextDataLayer(manifest_filepath=val_manifest, labels=labels, batch_size=16)
ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
greedy_decoder = nemo_asr.GreedyCTCDecoder()

In [None]:
audio_signal, audio_signal_len, transcript, transcript_len = data_layer()
log_probs, encoded_len = pre_trained_qn_model(input_signal=audio_signal, length=audio_signal_len)
predictions = greedy_decoder(log_probs=log_probs)
loss = ctc_loss(log_probs=log_probs, targets=transcript,
                input_length=encoded_len, target_length=transcript_len)


# # Evaluation
# audio_signal, audio_signal_len, transcript, transcript_len = data_layerE()
# log_probs, encoded_len = pre_trained_qn_model(input_signal=audio_signal, length=audio_signal_len)
# predictions = greedy_decoder(log_probs=log_probs)
# lossE = ctc_loss(log_probs=log_probs, targets=transcript,
#                 input_length=encoded_len, target_length=transcript_len)

In [None]:
# START TRAINING 
tensors_to_evaluate=[predictions, transcript, transcript_len]
from functools import partial
from nemo.collections.asr.helpers import monitor_asr_train_progress
train_callback = nemo.core.SimpleLossLoggerCallback(
    tensors=[loss]+tensors_to_evaluate,
    print_func=partial(monitor_asr_train_progress, labels=labels))
nf.train(tensors_to_optimize=[loss],
                callbacks=[train_callback],
                optimizer="novograd",
                optimization_params={"num_epochs": 30, "lr": 1e-2,
                                    "weight_decay": 1e-3})

## OTHER

In [None]:
pre_trained_qn_model.transcribe('myaudio.wav')
pre_trained_qn_model.transcribe_from_microphone()