# NeMo Models and JarvisModels

In [1]:
# Import NeMo and ASR collection
import nemo
import nemo.collections.asr as nemo_asr
nf = nemo.core.NeuralModuleFactory(placement=nemo.core.DeviceType.CPU)



A *NeMoModel* is a kind of NeuralModule which contains other neural modules inside it.
NeMoModel can have other NeuralModules inside and their mode, and topology of connections can
depend on the mode (training, inference, etc.).

## NeMoModel instantiation - method 1

### Because NeMoModel is a NeuralModule, regular constructor-based initialization applies

In [2]:
#First, load the config from YAML file
from ruamel.yaml import YAML
yaml = YAML(typ="safe")
with open("configs/jasper_an4.yaml") as file:
    model_definition = yaml.load(file)

In [3]:
qn_model = nemo.collections.asr.models.QuartzNet(
            preprocessor_params=model_definition['AudioToMelSpectrogramPreprocessor'],
            encoder_params=model_definition['JasperEncoder'],
            decoder_params=model_definition['JasperDecoderForCTC'],
        )
print(qn_model.num_weights)
print(qn_model.input_ports)

[NeMo I 2020-04-15 16:27:59 features:144] PADDING: 16
[NeMo I 2020-04-15 16:27:59 features:152] STFT using conv
5771293
{'input_signal': <nemo.core.neural_types.neural_type.NeuralType object at 0x13a7bb850>, 'length': <nemo.core.neural_types.neural_type.NeuralType object at 0x13a7bb8d0>}


### Because NeMoModel is a NeuralModule, regular config import/export work

In [4]:
qn_model.export_to_config("qn.yaml")
qn_model2 = nemo.collections.asr.models.QuartzNet.import_from_config(config_file="qn.yaml")
print(qn_model2.num_weights)

[NeMo I 2020-04-15 16:28:02 neural_modules:273] Configuration of module 5efb29d4-aefd-45fd-bf51-98bb7fff04eb (QuartzNet) exported to qn.yaml
[NeMo I 2020-04-15 16:28:03 features:144] PADDING: 16
[NeMo I 2020-04-15 16:28:03 features:152] STFT using conv
[NeMo I 2020-04-15 16:28:03 neural_modules:374] Instantiated a new Neural Module of type `QuartzNet` using configuration loaded from the `qn.yaml` file
5771293


## NeMoModel instantiation - method 2

In [5]:
# List all available models from NGC
for checkpoint in nemo.collections.asr.models.QuartzNet.list_pretrained_models():
    print(checkpoint.pretrained_model_name)

[NeMo W 2020-04-15 16:28:06 quartznet:121] THIS METHOD IS NOT DONE YET


QuartzNet15x5-En-BASE
QuartzNet15x5-Zh-BASE


In [6]:
# Automagically go to NGC and instantiate a model and weights
pre_trained_qn_model = nemo_asr.models.QuartzNet.from_pretrained(model_info="QuartzNet15x5-En-BASE")

[NeMo W 2020-04-15 16:28:07 quartznet:144] THIS METHOD IS NOT DONE YET


[NeMo I 2020-04-15 16:28:07 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.11.0b0/QuartzNet15x5-En-BASE/JasperEncoder-STEP-243800.pt. Re-using
[NeMo I 2020-04-15 16:28:07 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.11.0b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-04-15 16:28:07 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.11.0b0/QuartzNet15x5-En-BASE/JasperDecoderForCTC-STEP-243800.pt. Re-using
[NeMo I 2020-04-15 16:28:07 helpers:155] Found existing object /Users/okuchaiev/.nemo_files/NEMO_0.11.0b0/QuartzNet15x5-En-BASE/qn.yaml. Re-using
[NeMo I 2020-04-15 16:28:07 quartznet:159] Instantiating model from pre-trained checkpoint
[NeMo I 2020-04-15 16:28:07 features:144] PADDING: 16
[NeMo I 2020-04-15 16:28:07 features:152] STFT using conv
[NeMo I 2020-04-15 16:28:07 neural_modules:374] Instantiated a new Neural Module of type `QuartzNet` using configuration loaded from the `

# Export model to ".nemo" format

## Export to ".nemo" file

In [7]:
pre_trained_qn_model.export('aaaa.nemo')

[NeMo I 2020-04-15 16:28:11 neural_modules:273] Configuration of module 68d18a78-dac8-4c49-a099-129082ae3749 (QuartzNet) exported to .FBY2ZJCV6GKM1YGN/.nemo_tmp/QuartzNet.yaml
[NeMo I 2020-04-15 16:28:11 neural_modules:781] Exported model QuartzNet to aaaa.nemo


In [8]:
new_instance = nemo_asr.models.QuartzNet.from_pretrained(model_info='aaaa.nemo')

[NeMo W 2020-04-15 16:28:14 quartznet:144] THIS METHOD IS NOT DONE YET


[NeMo I 2020-04-15 16:28:14 features:144] PADDING: 16
[NeMo I 2020-04-15 16:28:14 features:152] STFT using conv
[NeMo I 2020-04-15 16:28:14 neural_modules:374] Instantiated a new Neural Module of type `QuartzNet` using configuration loaded from the `306E7MGSGK3Q8JMP/.nemo_tmp/QuartzNet.yaml` file


In [None]:
print(res)

".nemo" file is an arxiv which contains the following:

* weights per module
* hyperparameters (e.g. constructor arguments) for all modules
* topology (e.g. NeuralGraph) description for inference and for trainig modes

In [None]:
# this instantiates model from disk instead of NGC:
pre_trained_qn_model = nemo_asr.models.QuartzNet.from_pretrained(model_info="quartznet.nemo")

## NeMoModels can be used just as any other Neural Module

In [None]:
train_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_train.json"
val_manifest = "/Users/okuchaiev/Data/an4_dataset/an4_val.json"
labels = model_definition['labels']
data_layer = nemo_asr.AudioToTextDataLayer(manifest_filepath=train_manifest, labels=labels, batch_size=16)
ctc_loss = nemo_asr.CTCLossNM(num_classes=len(labels))
greedy_decoder = nemo_asr.GreedyCTCDecoder()

In [None]:
audio_signal, audio_signal_len, transcript, transcript_len = data_layer()
log_probs, encoded_len = pre_trained_qn_model(audio_signal=audio_signal, a_sig_length=audio_signal_len)
predictions = greedy_decoder(log_probs=log_probs)
loss = ctc_loss(log_probs=log_probs, targets=transcript,
                input_length=encoded_len, target_length=transcript_len)


# Evaluation
audio_signal, audio_signal_len, transcript, transcript_len = data_layerE()
log_probs, encoded_len = pre_trained_qn_model(audio_signal=audio_signal, a_sig_length=audio_signal_len)
predictions = greedy_decoder(log_probs=log_probs)
lossE = ctc_loss(log_probs=log_probs, targets=transcript,
                input_length=encoded_len, target_length=transcript_len)

In [None]:
# START TRAINING 
tensors_to_evaluate=[predictions, transcript, transcript_len]
from functools import partial
from nemo.collections.asr.helpers import monitor_asr_train_progress
train_callback = nemo.core.SimpleLossLoggerCallback(
    tensors=[loss]+tensors_to_evaluate,
    print_func=partial(monitor_asr_train_progress, labels=labels))
nf.train(tensors_to_optimize=[loss],
                callbacks=[train_callback],
                optimizer="novograd",
                optimization_params={"num_epochs": 30, "lr": 1e-2,
                                    "weight_decay": 1e-3})

## OTHER

In [None]:
pre_trained_qn_model.transcribe('myaudio.wav')
pre_trained_qn_model.transcribe_from_microphone()