diff --git a/.github/blossom-ci.yml b/.github/blossom-ci.yml new file mode 100644 index 000000000000..d285682f4261 --- /dev/null +++ b/.github/blossom-ci.yml @@ -0,0 +1,105 @@ +# Copyright (c) 2020-2021, NVIDIA CORPORATION. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# A workflow to trigger ci on hybrid infra (github + self hosted runner) +name: Blossom-CI +on: + issue_comment: + types: [created] + workflow_dispatch: + inputs: + platform: + description: 'runs-on argument' + required: false + args: + description: 'argument' + required: false +jobs: + Authorization: + name: Authorization + runs-on: blossom + outputs: + args: ${{ env.args }} + + # This job only runs for pull request comments + if: | + contains( 'ravitestgit,', format('{0},', github.actor)) && + github.event.comment.body == '/blossom-ci' + steps: + - name: Check if comment is issued by authorized person + run: blossom-ci + env: + OPERATION: 'AUTH' + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }} + + Vulnerability-scan: + name: Vulnerability scan + needs: [Authorization] + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + with: + repository: ${{ fromJson(needs.Authorization.outputs.args).repo }} + ref: ${{ fromJson(needs.Authorization.outputs.args).ref }} + lfs: 'true' + + # repo specific steps + #- name: Setup java + # uses: actions/setup-java@v1 + # with: + # java-version: 1.8 + + # add blackduck properties https://synopsys.atlassian.net/wiki/spaces/INTDOCS/pages/631308372/Methods+for+Configuring+Analysis#Using-a-configuration-file + #- name: Setup blackduck properties + # run: | + # PROJECTS=$(mvn -am dependency:tree | grep maven-dependency-plugin | awk '{ out="com.nvidia:"$(NF-1);print out }' | grep rapids | xargs | sed -e 's/ /,/g') + # echo detect.maven.build.command="-pl=$PROJECTS -am" >> application.properties + # echo detect.maven.included.scopes=compile >> application.properties + + - name: Run blossom action + uses: NVIDIA/blossom-action@main + env: + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} + REPO_KEY_DATA: ${{ secrets.BLOSSOM_KEY }} + with: + args1: ${{ fromJson(needs.Authorization.outputs.args).args1 }} + args2: ${{ fromJson(needs.Authorization.outputs.args).args2 }} + args3: ${{ fromJson(needs.Authorization.outputs.args).args3 }} + + Job-trigger: + name: Start ci job + needs: [Vulnerability-scan] + runs-on: blossom + steps: + - name: Start ci job + run: blossom-ci + env: + OPERATION: 'START-CI-JOB' + CI_SERVER: ${{ secrets.CI_SERVER }} + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} + + Post-processing: + name: Post processing + runs-on: blossom + if : github.event_name == 'workflow_dispatch' + steps: + - name: Start post processing + run: blossom-ci + env: + OPERATION: 'POST-PROCESSING' + CI_SERVER: ${{ secrets.CI_SERVER }} + REPO_TOKEN: ${{ secrets.GITHUB_TOKEN }} + diff --git a/Jenkinsfile b/Jenkinsfile index 270da45eb758..7b0716a6407f 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -6,7 +6,7 @@ pipeline { } } options { - timeout(time: 1, unit: 'HOURS') + timeout(time: 2, unit: 'HOURS') disableConcurrentBuilds() } stages { diff --git a/examples/asr/asr_webapp/Dockerfile b/examples/asr/asr_webapp/Dockerfile index 8e5ee21a7f28..ea8cda29c072 100644 --- a/examples/asr/asr_webapp/Dockerfile +++ b/examples/asr/asr_webapp/Dockerfile @@ -14,7 +14,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -ARG BASE_IMAGE=nvcr.io/nvidia/nemo:1.0.0rc1 +ARG BASE_IMAGE=nvcr.io/nvidia/nemo:1.0.1 # build an image that includes only the nemo dependencies, ensures that dependencies # are included first for optimal caching, and useful for building a development diff --git a/examples/nlp/machine_translation/enc_dec_nmt.py b/examples/nlp/machine_translation/enc_dec_nmt.py index e696b610dd2c..fed0c81c856d 100644 --- a/examples/nlp/machine_translation/enc_dec_nmt.py +++ b/examples/nlp/machine_translation/enc_dec_nmt.py @@ -111,7 +111,7 @@ def main(cfg: MTEncDecConfig) -> None: # training is managed by PyTorch Lightning trainer_cfg = OmegaConf.to_container(cfg.trainer) trainer_cfg.pop('plugins', None) - trainer = Trainer(plugins=[NLPDDPPlugin()], **trainer_cfg) + trainer = Trainer(plugins=[NLPDDPPlugin(num_nodes=cfg.trainer.num_nodes)], **trainer_cfg) # tokenizers will be trained and and tarred training data will be created if needed # model config is then updated diff --git a/examples/speaker_recognition/extract_speaker_embeddings.py b/examples/speaker_recognition/extract_speaker_embeddings.py index ce7ce0ac5ff4..7a5ac17fe39a 100644 --- a/examples/speaker_recognition/extract_speaker_embeddings.py +++ b/examples/speaker_recognition/extract_speaker_embeddings.py @@ -89,7 +89,7 @@ def main(): labels=None, batch_size=1, shuffle=False, - time_length=8, + time_length=20, embedding_dir=args.embedding_dir, ) ) diff --git a/examples/speaker_recognition/voxceleb_eval.py b/examples/speaker_recognition/voxceleb_eval.py index cdfd1db0336f..00cbb8561e87 100644 --- a/examples/speaker_recognition/voxceleb_eval.py +++ b/examples/speaker_recognition/voxceleb_eval.py @@ -70,7 +70,7 @@ def get_acc(trial_file='', emb='', save_kaldi_emb=False): keys.append(y_speaker) trial_embs.extend([Y]) - score = (X @ Y.T) / (((X @ X.T) * (Y @ Y.T)) ** 0.5) + score = np.dot(X, Y) / ((np.dot(X, X) * np.dot(Y, Y)) ** 0.5) score = (score + 1) / 2 all_scores.append(score) diff --git a/nemo/collections/asr/models/ctc_models.py b/nemo/collections/asr/models/ctc_models.py index b2c335510a4d..49b434604ffe 100644 --- a/nemo/collections/asr/models/ctc_models.py +++ b/nemo/collections/asr/models/ctc_models.py @@ -127,6 +127,13 @@ def list_available_models(cls) -> Optional[PretrainedModelInfo]: ) results.append(model) + model = PretrainedModelInfo( + pretrained_model_name="stt_zh_citrinet_1024_gamma_0_25", + description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:stt_zh_citrinet_1024_gamma_0_25", + location="https://api.ngc.nvidia.com/v2/models/nvidia/nemo/stt_zh_citrinet_1024_gamma_0_25/versions/1.0.0/files/stt_zh_citrinet_1024_gamma_0_25.nemo", + ) + results.append(model) + model = PretrainedModelInfo( pretrained_model_name="asr_talknet_aligner", description="For details about this model, please visit https://ngc.nvidia.com/catalog/models/nvidia:nemo:asr_talknet_aligner", diff --git a/nemo/collections/asr/models/label_models.py b/nemo/collections/asr/models/label_models.py index 8ae5707ad1a8..d01a174597a3 100644 --- a/nemo/collections/asr/models/label_models.py +++ b/nemo/collections/asr/models/label_models.py @@ -29,6 +29,7 @@ from nemo.collections.asr.models.asr_model import ExportableEncDecModel from nemo.collections.asr.parts.preprocessing.features import WaveformFeaturizer from nemo.collections.asr.parts.preprocessing.perturb import process_augmentations +from nemo.collections.asr.parts.utils.speaker_utils import embedding_normalize from nemo.collections.common.losses import CrossEntropyLoss as CELoss from nemo.collections.common.metrics import TopKClassificationAccuracy from nemo.core.classes import ModelPT @@ -381,6 +382,7 @@ def test_epoch_end(self, outputs): slices = torch.cat([x['slices'] for x in outputs]) emb_shape = embs.shape[-1] embs = embs.view(-1, emb_shape).cpu().numpy() + embs = embedding_normalize(embs) out_embeddings = {} start_idx = 0 with open(self.test_manifest, 'r') as manifest: diff --git a/nemo/collections/asr/modules/audio_preprocessing.py b/nemo/collections/asr/modules/audio_preprocessing.py index f7ad62d8db0e..8f887e03c1bb 100644 --- a/nemo/collections/asr/modules/audio_preprocessing.py +++ b/nemo/collections/asr/modules/audio_preprocessing.py @@ -434,7 +434,7 @@ def input_types(self): """ return { "input_spec": NeuralType(('B', 'D', 'T'), SpectrogramType()), - "length": NeuralType(tuple('B'), LengthsType(), optional=True), + "length": NeuralType(tuple('B'), LengthsType()), } @property @@ -474,7 +474,7 @@ def __init__( mask_value=mask_value, ) else: - self.spec_augment = lambda input_spec: input_spec + self.spec_augment = lambda input_spec, length: input_spec # Check if numba is supported, and use a Numba kernel if it is if use_numba_spec_augment and numba_utils.numba_cuda_is_supported(__NUMBA_MINIMUM_VERSION__): @@ -490,7 +490,7 @@ def __init__( self.spec_augment_numba = None @typecheck() - def forward(self, input_spec, length=None): + def forward(self, input_spec, length): augmented_spec = self.spec_cutout(input_spec=input_spec) # To run the Numba kernel, correct numba version is required as well as @@ -498,7 +498,7 @@ def forward(self, input_spec, length=None): if self.spec_augment_numba is not None and spec_augment_launch_heuristics(augmented_spec, length): augmented_spec = self.spec_augment_numba(input_spec=augmented_spec, length=length) else: - augmented_spec = self.spec_augment(input_spec=augmented_spec) + augmented_spec = self.spec_augment(input_spec=augmented_spec, length=length) return augmented_spec diff --git a/nemo/collections/asr/parts/numba/spec_augment/spec_aug_numba.py b/nemo/collections/asr/parts/numba/spec_augment/spec_aug_numba.py index 2edf2e8d7b42..0b45158d1527 100644 --- a/nemo/collections/asr/parts/numba/spec_augment/spec_aug_numba.py +++ b/nemo/collections/asr/parts/numba/spec_augment/spec_aug_numba.py @@ -251,11 +251,6 @@ def forward(self, input_spec, length): sh = input_spec.shape bs = sh[0] - if self.adaptive_temporal_width: - time_width = max(1, int(sh[2] * self.time_width)) - else: - time_width = self.time_width - # Construct the freq and time masks as well as start positions if self.freq_masks > 0: freq_starts = torch.randint( @@ -267,10 +262,30 @@ def forward(self, input_spec, length): freq_lengths = torch.zeros([bs, 1], dtype=torch.int64, device=input_spec.device) if self.time_masks > 0: - time_starts = torch.randint( - 0, sh[2] - time_width + 1, size=[bs, self.time_masks], device=input_spec.device - ) - time_lengths = torch.randint(0, time_width + 1, size=[bs, self.time_masks], device=input_spec.device) + if self.adaptive_temporal_width: + time_width = (length * self.time_width).int().clamp(min=1) + else: + time_width = ( + torch.tensor(self.time_width, dtype=torch.int32, device=input_spec.device) + .unsqueeze(0) + .repeat(sh[0]) + ) + + time_starts = [] + time_lengths = [] + for idx in range(sh[0]): + time_starts.append( + torch.randint( + 0, max(1, length[idx] - time_width[idx]), size=[1, self.time_masks], device=input_spec.device + ) + ) + time_lengths.append( + torch.randint(0, time_width[idx] + 1, size=[1, self.time_masks], device=input_spec.device) + ) + + time_starts = torch.cat(time_lengths, 0) + time_lengths = torch.cat(time_lengths, 0) + else: time_starts = torch.zeros([bs, 1], dtype=torch.int64, device=input_spec.device) time_lengths = torch.zeros([bs, 1], dtype=torch.int64, device=input_spec.device) diff --git a/nemo/collections/asr/parts/submodules/spectr_augment.py b/nemo/collections/asr/parts/submodules/spectr_augment.py index ef9821e3a338..48db4f24f777 100644 --- a/nemo/collections/asr/parts/submodules/spectr_augment.py +++ b/nemo/collections/asr/parts/submodules/spectr_augment.py @@ -18,7 +18,7 @@ import torch.nn as nn from nemo.core.classes import Typing, typecheck -from nemo.core.neural_types import NeuralType, SpectrogramType +from nemo.core.neural_types import LengthsType, NeuralType, SpectrogramType class SpecAugment(nn.Module, Typing): @@ -43,7 +43,10 @@ class SpecAugment(nn.Module, Typing): def input_types(self): """Returns definitions of module input types """ - return {"input_spec": NeuralType(('B', 'D', 'T'), SpectrogramType())} + return { + "input_spec": NeuralType(('B', 'D', 'T'), SpectrogramType()), + "length": NeuralType(tuple('B'), LengthsType()), + } @property def output_types(self): @@ -54,7 +57,7 @@ def output_types(self): def __init__( self, freq_masks=0, time_masks=0, freq_width=10, time_width=10, rng=None, mask_value=0.0, ): - super(SpecAugment, self).__init__() + super().__init__() self._rng = random.Random() if rng is None else rng @@ -76,14 +79,9 @@ def __init__( @typecheck() @torch.no_grad() - def forward(self, input_spec): + def forward(self, input_spec, length): sh = input_spec.shape - if self.adaptive_temporal_width: - time_width = max(1, int(sh[2] * self.time_width)) - else: - time_width = self.time_width - for idx in range(sh[0]): for i in range(self.freq_masks): x_left = self._rng.randint(0, sh[1] - self.freq_width) @@ -93,7 +91,12 @@ def forward(self, input_spec): input_spec[idx, x_left : x_left + w, :] = self.mask_value for i in range(self.time_masks): - y_left = self._rng.randint(0, sh[2] - time_width) + if self.adaptive_temporal_width: + time_width = max(1, int(length[idx] * self.time_width)) + else: + time_width = self.time_width + + y_left = self._rng.randint(0, max(1, length[idx] - time_width)) w = self._rng.randint(0, time_width) diff --git a/nemo/collections/asr/parts/utils/speaker_utils.py b/nemo/collections/asr/parts/utils/speaker_utils.py index 09a32b9b89f0..7d93ea6953aa 100644 --- a/nemo/collections/asr/parts/utils/speaker_utils.py +++ b/nemo/collections/asr/parts/utils/speaker_utils.py @@ -398,3 +398,20 @@ def write_rttm2manifest(paths2audio_files, paths2rttm_files, manifest_file): outfile.write("\n") f.close() return manifest_file + + +def embedding_normalize(embs, use_std=False, eps=1e-10): + """ + mean and l2 length normalize the input speaker embeddings + input: + embs: embeddings of shape (Batch,emb_size) + output: + embs: normalized embeddings of shape (Batch,emb_size) + """ + embs = embs - embs.mean(axis=0) + if use_std: + embs = embs / (embs.std(axis=0) + eps) + embs_l2_norm = np.expand_dims(np.linalg.norm(embs, ord=2, axis=-1), axis=1) + embs = embs / embs_l2_norm + + return embs diff --git a/nemo/collections/common/tokenizers/bytelevel_tokenizers.py b/nemo/collections/common/tokenizers/bytelevel_tokenizers.py index f834b7b97e92..277c1822a992 100644 --- a/nemo/collections/common/tokenizers/bytelevel_tokenizers.py +++ b/nemo/collections/common/tokenizers/bytelevel_tokenizers.py @@ -15,6 +15,7 @@ import re from pathlib import Path from typing import List + from nemo.collections.common.tokenizers.tokenizer_spec import TokenizerSpec __all__ = ['ByteLevelProcessor', 'ByteLevelTokenizer'] diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index d8f03e4a2813..fc7798f71752 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -184,7 +184,7 @@ def distributed_sampler_kwargs(self): return distributed_sampler_kwargs else: - return super().distributed_sampler_kwargs + return super(NLPDDPPlugin, self).distributed_sampler_kwargs class NLPCheckpointConnector(CheckpointConnector): diff --git a/scripts/speech_recognition/convert_to_tarred_audio_dataset.py b/scripts/speech_recognition/convert_to_tarred_audio_dataset.py index 4e13c0536c28..f43287bb4c4e 100644 --- a/scripts/speech_recognition/convert_to_tarred_audio_dataset.py +++ b/scripts/speech_recognition/convert_to_tarred_audio_dataset.py @@ -107,8 +107,9 @@ parser.add_argument( '--max_duration', default=None, + required=True, type=float, - help='Maximum duration of audio clip in the dataset. By default, it is None and will not filter files.', + help='Maximum duration of audio clip in the dataset. By default, it is None and is required to be set.', ) parser.add_argument( '--min_duration', diff --git a/scripts/tokenizers/process_asr_text_tokenizer.py b/scripts/tokenizers/process_asr_text_tokenizer.py index 8c645071746a..ae5e4d9204f3 100644 --- a/scripts/tokenizers/process_asr_text_tokenizer.py +++ b/scripts/tokenizers/process_asr_text_tokenizer.py @@ -106,6 +106,9 @@ help="Character coverage percentage for SentencePiece tokenization. For languages " "with large vocabulary, should be close to 0.9995, otherwise kept as 1.0", ) +parser.add_argument('--spe_bos', action='store_true', help='Add token to SentencePiece Tokenizer.') +parser.add_argument('--spe_eos', action='store_true', help='Add token to SentencePiece Tokenizer.') +parser.add_argument('--spe_pad', action='store_true', help='Add token to SentencePiece Tokenizer.') parser.add_argument( '--spe_sample_size', type=int, @@ -173,6 +176,9 @@ def __process_data( spe_train_extremely_large_corpus: bool, spe_sample_size: int, spe_max_sentencepiece_length: int, + spe_bos: bool, + spe_eos: bool, + spe_pad: bool, lower_case: bool, ): """ @@ -191,6 +197,9 @@ def __process_data( this flag can be set to try to trained the tokenizer. Will silently fail if it runs out of RAM. spe_max_sentencepiece_length: Limits the maximum length of the SentencePiece subword that can be constructed. By default, no limit is placed. + spe_bos: Bool flag, whether to add to SentencePiece tokenizer vocabulary. + spe_eos: Bool flag, whether to add to SentencePiece tokenizer vocabulary. + spe_pad: Bool flag, whether to add to SentencePiece tokenizer vocabulary. lower_case: whether to tokenize with lower case character set only (for english) Returns: @@ -222,6 +231,9 @@ def __process_data( character_coverage=spe_character_coverage, train_extremely_large_corpus=spe_train_extremely_large_corpus, max_sentencepiece_length=spe_max_sentencepiece_length, + bos=spe_bos, + eos=spe_eos, + pad=spe_pad, ) else: @@ -249,6 +261,7 @@ def main(): spe_sample_size = args.spe_sample_size spe_train_extremely_large_corpus = args.spe_train_extremely_large_corpus spe_max_sentencepiece_length = args.spe_max_sentencepiece_length + spe_bos, spe_eos, spe_pad = args.spe_bos, args.spe_eos, args.spe_pad lower_case = args.lower_case if not os.path.exists(data_root): @@ -272,6 +285,9 @@ def main(): spe_sample_size=spe_sample_size, spe_train_extremely_large_corpus=spe_train_extremely_large_corpus, spe_max_sentencepiece_length=spe_max_sentencepiece_length, + spe_bos=spe_bos, + spe_eos=spe_eos, + spe_pad=spe_pad, ) print("Serialized tokenizer at location :", tokenizer_path) diff --git a/tests/collections/asr/numba/spec_augment/test_spec_aug_numba.py b/tests/collections/asr/numba/spec_augment/test_spec_aug_numba.py index 17b6d6c9ccf3..4a1d190755d1 100644 --- a/tests/collections/asr/numba/spec_augment/test_spec_aug_numba.py +++ b/tests/collections/asr/numba/spec_augment/test_spec_aug_numba.py @@ -57,10 +57,7 @@ def prepare_data(b, f, t, device='cuda', freq_masks=0, time_masks=0, freq_width= adaptive_temporal_width = True - if adaptive_temporal_width: - time_width = max(1, int(sh[2] * time_width)) - else: - time_width = time_width + orginal_time_width = time_width # Construct the freq and time masks as well as start positions if freq_masks > 0: @@ -71,8 +68,29 @@ def prepare_data(b, f, t, device='cuda', freq_masks=0, time_masks=0, freq_width= freq_lengths = torch.zeros([bs, 1], dtype=torch.int64, device=x.device) if time_masks > 0: - time_starts = torch.randint(0, sh[2] - time_width + 1, size=[bs, time_masks], device=x.device) - time_lengths = torch.randint(0, time_width + 1, size=[bs, time_masks], device=x.device) + if adaptive_temporal_width: + time_width = (x_len * orginal_time_width).int().clamp(min=1) + else: + time_width = ( + torch.tensor(orginal_time_width, dtype=torch.int32, device=x.device) + .unsqueeze(0) + .repeat(sh[0]) + ) + + time_starts = [] + time_lengths = [] + for idx in range(sh[0]): + time_starts.append( + torch.randint( + 0, max(1, x_len[idx] - time_width[idx]), size=[1, time_masks], device=x.device + ) + ) + time_lengths.append( + torch.randint(0, time_width[idx] + 1, size=[1, time_masks], device=x.device) + ) + + time_starts = torch.cat(time_lengths, 0) + time_lengths = torch.cat(time_lengths, 0) else: time_starts = torch.zeros([bs, 1], dtype=torch.int64, device=x.device) time_lengths = torch.zeros([bs, 1], dtype=torch.int64, device=x.device) diff --git a/tests/collections/asr/test_asr_modules.py b/tests/collections/asr/test_asr_modules.py index b4dd12b3c70f..4e112239b90d 100644 --- a/tests/collections/asr/test_asr_modules.py +++ b/tests/collections/asr/test_asr_modules.py @@ -125,7 +125,7 @@ def test_SpectrogramAugmentationr(self): input_signal = torch.randn(size=(4, 512)) length = torch.randint(low=161, high=500, size=[4]) res0 = instance0(input_signal=input_signal, length=length) - res = instance1(input_spec=res0[0]) + res = instance1(input_spec=res0[0], length=length) assert res.shape == res0[0].shape diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index db3bdfa81424..81402cd8864c 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -67,7 +67,7 @@ "!pip install unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb index 8ba8fcb38310..089646053c92 100644 --- a/tutorials/01_NeMo_Models.ipynb +++ b/tutorials/01_NeMo_Models.ipynb @@ -37,7 +37,7 @@ "!pip install unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", @@ -1637,16 +1637,34 @@ "-----\n", "Wait, why did OmegaConf not fill in the value of the variable interpolation for the configs yet?\n", "\n", - "This is because OmegaConf takes a deferred approach to variable interpolation. To force it ahead of time, we can use the following snippet - " + "This is because OmegaConf takes a deferred approach to variable interpolation. First, we fill in temporary values of the required fields (those marked by `???`). Then, to force resolution ahead of time, we can use the following snippet - " ] }, + { + "cell_type": "code", + "metadata": { + "id": "0X4C76JyOAnN" + }, + "source": [ + "import copy" + ], + "execution_count": null, + "outputs": [] + }, { "cell_type": "code", "metadata": { "id": "ugxA0TPtbHVZ" }, "source": [ - "temp_config = OmegaConf.create(OmegaConf.to_container(model_config, resolve=True))\n", + "temp_config = copy.deepcopy(model_config)\n", + "temp_config.model.vocab_size = 10\n", + "temp_config.model.block_size = 4\n", + "temp_config.model.n_layer = 1\n", + "temp_config.model.n_embd = 32\n", + "temp_config.model.n_head = 4\n", + "\n", + "temp_config = OmegaConf.create(OmegaConf.to_container(temp_config, resolve=True))\n", "print(OmegaConf.to_yaml(temp_config))" ], "execution_count": null, @@ -1662,17 +1680,6 @@ "Now that we have a config, let's try to create an object of the NeMo Model !" ] }, - { - "cell_type": "code", - "metadata": { - "id": "g_jM0oOmpOUT" - }, - "source": [ - "import copy" - ], - "execution_count": null, - "outputs": [] - }, { "cell_type": "code", "metadata": { @@ -2631,6 +2638,17 @@ "------\n", "There we go ! Now our models can be serialized and de-serialized without any issue, even with an external vocab file !" ] + }, + { + "cell_type": "code", + "metadata": { + "id": "ZjCV5u3_OO7a" + }, + "source": [ + "" + ], + "execution_count": null, + "outputs": [] } ] } \ No newline at end of file diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index 6d1940c33965..4507c4ceb965 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb index 54d540c457e7..b94cf57eea23 100644 --- a/tutorials/VoiceSwapSample.ipynb +++ b/tutorials/VoiceSwapSample.ipynb @@ -39,7 +39,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, diff --git a/tutorials/asr/01_ASR_with_NeMo.ipynb b/tutorials/asr/01_ASR_with_NeMo.ipynb index 9813f1404171..3dd4b23b6224 100644 --- a/tutorials/asr/01_ASR_with_NeMo.ipynb +++ b/tutorials/asr/01_ASR_with_NeMo.ipynb @@ -62,7 +62,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/02_Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/02_Online_ASR_Microphone_Demo.ipynb index acf8c586e021..a35c5e5600b7 100644 --- a/tutorials/asr/02_Online_ASR_Microphone_Demo.ipynb +++ b/tutorials/asr/02_Online_ASR_Microphone_Demo.ipynb @@ -26,7 +26,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/03_Speech_Commands.ipynb b/tutorials/asr/03_Speech_Commands.ipynb index 5e76c27d60de..846368c9b40c 100644 --- a/tutorials/asr/03_Speech_Commands.ipynb +++ b/tutorials/asr/03_Speech_Commands.ipynb @@ -60,7 +60,7 @@ "!pip install unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -333,7 +333,7 @@ "source": [ "# Preserve some useful parameters\n", "labels = config.model.labels\n", - "sample_rate = config.sample_rate" + "sample_rate = config.model.sample_rate" ], "execution_count": null, "outputs": [] @@ -1625,4 +1625,4 @@ ] } ] -} \ No newline at end of file +} diff --git a/tutorials/asr/04_Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/04_Online_Offline_Speech_Commands_Demo.ipynb index 1416e016c16d..a3531222492d 100644 --- a/tutorials/asr/04_Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/04_Online_Offline_Speech_Commands_Demo.ipynb @@ -28,7 +28,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -39,11 +39,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "\n", - "This notebook demonstrates offline and online (from a microphone's stream in NeMo) speech commands recognition \n", - "\n", - "It is **not a recommended** way to do inference in production workflows. If you are interested in \n", - "production-level inference using NeMo ASR models, please sign-up to Jarvis early access program: https://developer.nvidia.com/nvidia-jarvis" + "This notebook demonstrates offline and online (from a microphone's stream in NeMo) speech commands recognition " ] }, { diff --git a/tutorials/asr/05_Online_Noise_Augmentation.ipynb b/tutorials/asr/05_Online_Noise_Augmentation.ipynb index 02675844c39e..c052de0046d4 100644 --- a/tutorials/asr/05_Online_Noise_Augmentation.ipynb +++ b/tutorials/asr/05_Online_Noise_Augmentation.ipynb @@ -31,7 +31,7 @@ "!pip install unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -334,7 +334,7 @@ "outputs": [], "source": [ "# Import the data augmentation component from ASR collection\n", - "from nemo.collections.asr.parts import perturb, segment" + "from nemo.collections.asr.parts.preprocessing import perturb, segment" ] }, { @@ -1188,7 +1188,7 @@ "outputs": [], "source": [ "# We use a NeMo utility to parse the manifest file for us\n", - "from nemo.collections.asr.parts import collections, parsers\n", + "from nemo.collections.common.parts.preprocessing import collections, parsers\n", "\n", "class NoisePerturbationEval(perturb.Perturbation):\n", " def __init__(\n", @@ -1303,18 +1303,18 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.6" + "version": "3.7.7" }, "pycharm": { "stem_cell": { "cell_type": "raw", - "source": [], "metadata": { "collapsed": false - } + }, + "source": [] } } }, "nbformat": 4, "nbformat_minor": 4 -} \ No newline at end of file +} diff --git a/tutorials/asr/06_Voice_Activiy_Detection.ipynb b/tutorials/asr/06_Voice_Activiy_Detection.ipynb index e067b1623619..f2d5927413b2 100644 --- a/tutorials/asr/06_Voice_Activiy_Detection.ipynb +++ b/tutorials/asr/06_Voice_Activiy_Detection.ipynb @@ -27,7 +27,7 @@ "!pip install unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -329,7 +329,7 @@ "source": [ "# Preserve some useful parameters\n", "labels = config.model.labels\n", - "sample_rate = config.sample_rate" + "sample_rate = config.model.sample_rate" ] }, { @@ -1157,13 +1157,13 @@ "pycharm": { "stem_cell": { "cell_type": "raw", - "source": [], "metadata": { "collapsed": false - } + }, + "source": [] } } }, "nbformat": 4, "nbformat_minor": 1 -} \ No newline at end of file +} diff --git a/tutorials/asr/07_Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/07_Online_Offline_Microphone_VAD_Demo.ipynb index 9cec4e5aa151..2e606d3d6740 100644 --- a/tutorials/asr/07_Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/07_Online_Offline_Microphone_VAD_Demo.ipynb @@ -26,7 +26,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -45,11 +45,7 @@ "2. [finetuning](#Finetune) and use [posterior](#Posterior);\n", "2. [threshold tuning](#Tuning-threshold);\n", "4. [online streaming inference](#Online-streaming-inference);\n", - "3. [online streaming inference from a microphone's stream](#Online-streaming-inference-through-microphone).\n", - "\n", - "\n", - "It is **not a recommended** way to do inference in production workflows. If you are interested in \n", - "production-level inference using NeMo ASR models, please sign-up to Jarvis early access program: https://developer.nvidia.com/nvidia-jarvis" + "3. [online streaming inference from a microphone's stream](#Online-streaming-inference-through-microphone).\n" ] }, { diff --git a/tutorials/asr/10_ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/10_ASR_CTC_Language_Finetuning.ipynb index 6310f0ee54fa..65e76e1832f3 100644 --- a/tutorials/asr/10_ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/10_ASR_CTC_Language_Finetuning.ipynb @@ -39,7 +39,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = \"r1.0.2\"\n", + "BRANCH = \"r1.1.0\"\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -899,7 +899,7 @@ "\n", "------\n", "\n", - "It is also important to note that if the language remains the same, and some specific domain of text must be targetted for ASR, it is often easier to add a domain-specific language model to guide the generic ASR model than to attempt fine-tuning a full ASR model on limited data from that specific domain. " + "It is also important to note that if the language remains the same, and some specific domain of text must be adapted for ASR, it is often easier to add a domain-specific language model to guide the generic ASR model than to attempt fine-tuning a full ASR model on limited data from that specific domain. " ] }, { @@ -1135,7 +1135,7 @@ "id": "Waz64_NXfkIQ" }, "source": [ - "### Settng up augmentation\n", + "### Setting up augmentation\n", "\n", "Remember that the model was trained on several thousands of hours of data, so the regularization provided to it might not suit the current dataset. We can easily change it as we see fit.\n", "\n", @@ -1778,7 +1778,7 @@ "id": "t2iOQlFfs2Ig" }, "source": [ - "### Analyse dataset outliers\n", + "### Examine dataset outliers\n", "\n", "In general, there are minor differences between the Character encoding and Sub-word encoding models. Since sub-words can encode larger sequence of tokens into a single subword, they substantially reduce the target sequence length.\n", "\n", diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb index caa7f3126141..5316d8a90344 100644 --- a/tutorials/asr/ASR_for_telephony_speech.ipynb +++ b/tutorials/asr/ASR_for_telephony_speech.ipynb @@ -27,7 +27,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -65,7 +65,7 @@ "* National Speech Corpus - 1\n", "* Mozilla Common Voice\n", "\n", - "Among these, Fisher and Switchboard datasets are conversational telephone speech datasets with audio sampled at 8 kHz while the other datasets were originally sampled at at least 16 kHz. Before training, all audio files from Fisher and Switchboard datasets were upsampled to 16 kHz. Because of this mixed sample rate training, our models can be used to recognize both narrowband (8kHz) and wideband speech (16kHz)" + "Among these, Fisher and Switchboard datasets are conversational telephone speech datasets with audio sampled at 8 kHz while the other datasets were originally sampled at least 16 kHz. Before training, all audio files from Fisher and Switchboard datasets were upsampled to 16 kHz. Because of this mixed sample rate training, our models can be used to recognize both narrowband (8kHz) and wideband speech (16kHz)" ] }, { diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index ae7bf64541f9..d261b4de91c5 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -51,7 +51,7 @@ "id": "I9eIxAyKHREB" }, "source": [ - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "try:\n", " # Import NeMo Speech Recognition collection\n", " import nemo.collections.asr as nemo_asr\n", diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb index c156d516b5fd..36d8f79ebe92 100644 --- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb +++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb @@ -26,7 +26,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" + "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, { diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb index 23407db11dbc..7724ca9f94f2 100644 --- a/tutorials/nlp/02_NLP_Tokenizers.ipynb +++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'v1.0.2'" + "BRANCH = 'main'" ] }, { @@ -35,7 +35,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index 233df0507aad..2de1cefcc0e3 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -22,13 +22,13 @@ "!pip install faiss-gpu\n", "\n", "## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -88,7 +88,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -112,68 +112,9 @@ }, { "cell_type": "code", - "execution_count": 9, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " ID CONCEPT\n", - "0 1 Head ache\n", - "1 1 Headache\n", - "2 1 Migraine\n", - "3 1 Pain in the head\n", - "4 1 cephalgia\n", - "5 1 cephalalgia\n", - "6 2 heart attack\n", - "7 2 Myocardial infraction\n", - "8 2 necrosis of heart muscle\n", - "9 2 MI\n", - "10 3 CAD\n", - "11 3 Coronary artery disease\n", - "12 3 atherosclerotic heart disease\n", - "13 3 heart disease\n", - "14 3 damage of major heart blood vessels\n", - "15 4 myocardial ischemia\n", - "16 4 cardiac ischemia\n", - "17 4 reduced ability to pump blood\n", - "18 5 gradual loss of kidney function\n", - "19 5 kidneys cannot filter blood\n", - "20 5 chronic kidney disease\n", - "21 5 chronic kidney failure\n", - "22 5 CKD\n", - "23 6 alchohol intoxication\n", - "24 6 acute alchohol intoxication\n", - "25 6 alchohol poisoning\n", - "26 6 severe drunkenness\n", - "27 6 over consumption of alcohol\n", - "28 7 diabetes mellitus\n", - "29 7 diabetes\n", - "30 7 inability to process glucose\n", - "31 7 unable to take up sugar\n", - "32 7 Type 2 diabetes\n", - "33 8 Hyperinsulinemia\n", - "34 8 High blood sugar\n", - "35 8 abnormally high levels of insulin\n", - "36 9 Dipeptidyl peptidase-4 inhibitor\n", - "37 9 dpp-4 inhibitor\n", - "38 9 alogliptin\n", - "39 9 Nesina\n", - "40 9 Vipidia\n", - "41 10 hypoglycemia\n", - "42 10 low blood sugar\n", - "43 11 anticoagulants\n", - "44 11 blood thinners\n", - "45 11 Apixaban\n", - "46 11 Eliquis\n", - "47 12 Ibuprofen\n", - "48 12 Aspirin\n", - "49 12 over the counter nonsteroidal anti-inflammator...\n", - "50 12 NSAID\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "raw_data = pd.read_csv(os.path.join(DATA_DIR, \"tiny_example_dev_data.csv\"), names=[\"ID\", \"CONCEPT\"], index_col=False)\n", "print(raw_data)" @@ -188,27 +129,9 @@ }, { "cell_type": "code", - "execution_count": 10, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " ID CONCEPT_SYN1 CONCEPT_SYN2\n", - "0 1 Pain in the head cephalgia\n", - "1 1 Pain in the head cephalalgia\n", - "2 1 Migraine cephalgia\n", - "3 1 Head ache Pain in the head\n", - "4 1 Head ache Migraine\n", - "5 1 Head ache cephalalgia\n", - "6 1 Headache Migraine\n", - "7 1 Migraine cephalalgia\n", - "8 1 cephalgia cephalalgia\n", - "9 1 Headache Pain in the head\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "training_data = pd.read_table(os.path.join(DATA_DIR, \"tiny_example_train_pairs.tsv\"), names=[\"ID\", \"CONCEPT_SYN1\", \"CONCEPT_SYN2\"], delimiter='\\t')\n", "print(training_data.head(10))" @@ -240,7 +163,7 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -255,12 +178,15 @@ "cfg.project_dir = PROJECT_DIR\n", "cfg.model.nemo_path = os.path.join(PROJECT_DIR, \"tiny_example_sap_bert_model.nemo\")\n", "cfg.model.train_ds.data_file = os.path.join(DATA_DIR, \"tiny_example_train_pairs.tsv\")\n", - "cfg.model.validation_ds.data_file = os.path.join(DATA_DIR, \"tiny_example_validation_pairs.tsv\")" + "cfg.model.validation_ds.data_file = os.path.join(DATA_DIR, \"tiny_example_validation_pairs.tsv\")\n", + "\n", + "# remove distributed training flags\n", + "cfg.trainer.accelerator = None" ] }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -272,7 +198,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -304,7 +230,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -336,7 +262,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -360,7 +286,7 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -401,7 +327,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -465,7 +391,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -493,7 +419,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": { "scrolled": true }, @@ -521,7 +447,7 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -563,7 +489,7 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -574,7 +500,7 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -589,75 +515,18 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{0: 'Headache',\n", - " 1: 'Myocardial infraction',\n", - " 2: 'Coronary artery disease',\n", - " 3: 'myocardial ischemia',\n", - " 4: 'chronic kidney disease',\n", - " 5: 'alchohol intoxication',\n", - " 6: 'diabetes',\n", - " 7: 'Hyperinsulinemia',\n", - " 8: 'Nesina',\n", - " 9: 'hypoglycemia',\n", - " 10: 'anticoagulants',\n", - " 11: 'Ibuprofen'}" - ] - }, - "execution_count": 23, - "metadata": {}, - "output_type": "execute_result" - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "id2string" ] }, { "cell_type": "code", - "execution_count": 24, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "BERT Base output before Self Alignment Pretraining:\n", - "\n", - "The most similar concepts to high blood sugar are:\n", - "6 diabetes 0.9095035567879677\n", - "0 Headache 0.9046077281236649\n", - "8 Nesina 0.8512845635414124\n", - "\n", - "The most similar concepts to head pain are:\n", - "1 Myocardial infraction 0.7848672568798065\n", - "4 chronic kidney disease 0.7667323648929596\n", - "3 myocardial ischemia 0.761662557721138\n", - "\n", - "--------------------------------------------------\n", - "\n", - "SAP BERT output after Self Alignment Pretraining:\n", - "\n", - "The most similar concepts to high blood sugar are:\n", - "7 Hyperinsulinemia 0.28035879135131836\n", - "3 myocardial ischemia 0.054734230041503906\n", - "6 diabetes 0.053402602672576904\n", - "\n", - "The most similar concepts to head pain are:\n", - "0 Headache 0.6553303301334381\n", - "6 diabetes 0.19157332181930542\n", - "2 Coronary artery disease 0.12962108850479126\n", - "\n", - "--------------------------------------------------\n", - "\n" - ] - } - ], + "execution_count": null, + "metadata": {}, + "outputs": [], "source": [ "# Some sample queries\n", "queries = [\"high blood sugar\", \"head pain\"]\n", @@ -748,7 +617,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index 59bc67034a6b..818bab153a8c 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -44,7 +44,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index d3b1478893c8..2290d2196ee6 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/Neural_Machine_Translation-Text2Sparql.ipynb b/tutorials/nlp/Neural_Machine_Translation-Text2Sparql.ipynb index 3ca08bbf85e3..ea24af73fb22 100644 --- a/tutorials/nlp/Neural_Machine_Translation-Text2Sparql.ipynb +++ b/tutorials/nlp/Neural_Machine_Translation-Text2Sparql.ipynb @@ -20,7 +20,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, @@ -149,7 +149,7 @@ "WORK_DIR = \"PATH_TO_CHECKPOINTS_AND_LOGS\"\n", "\n", "# NeMo Version\n", - "BRANCH = 'v1.0.2'\n" + "BRANCH = 'main'\n" ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 359d3deb4730..3d2f503ed30d 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -32,7 +32,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'v1.0.2'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/nlp/Question_Answering_Squad.ipynb b/tutorials/nlp/Question_Answering_Squad.ipynb index 1d2d1e76193b..3b3c6c275ff8 100755 --- a/tutorials/nlp/Question_Answering_Squad.ipynb +++ b/tutorials/nlp/Question_Answering_Squad.ipynb @@ -46,7 +46,7 @@ "id": "uRLPr0TnIAHO" }, "source": [ - "BRANCH = 'v1.0.2'" + "BRANCH = 'main'" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index faf0244bfff3..61ea73541b51 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ] }, @@ -392,7 +392,7 @@ "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "\n", "# remove distributed training flags\n", - "config.trainer.accelerator = 'DDP'\n", + "config.trainer.accelerator = None\n", "\n", "trainer = pl.Trainer(**config.trainer)" ] @@ -613,7 +613,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.5" + "version": "3.8.8" }, "pycharm": { "stem_cell": { diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index d459f83188be..e449538d6615 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -20,7 +20,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n", "\n" ] diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index c683005189f1..d96ca50c2c30 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'v1.0.2'" + "BRANCH = 'main'" ] }, { @@ -413,7 +413,7 @@ "config.trainer.precision = 16 if torch.cuda.is_available() else 32\n", "\n", "# remove distributed training flags\n", - "config.trainer.accelerator = 'ddp'\n", + "config.trainer.accelerator = None\n", "\n", "trainer = pl.Trainer(**config.trainer)" ] @@ -640,7 +640,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.9" + "version": "3.8.8" } }, "nbformat": 4, diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index d1105bcddddd..a4dbfe7714dd 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'v1.0.2'" + "BRANCH = 'main'" ] }, { @@ -53,7 +53,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/speaker_recognition/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_recognition/ASR_with_SpeakerDiarization.ipynb index 532692bf80ae..0fbcf1b6702f 100644 --- a/tutorials/speaker_recognition/ASR_with_SpeakerDiarization.ipynb +++ b/tutorials/speaker_recognition/ASR_with_SpeakerDiarization.ipynb @@ -30,7 +30,7 @@ "!pip install unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -393,7 +393,7 @@ "metadata": {}, "outputs": [], "source": [ - "from nemo.collections.asr.parts.speaker_utils import write_rttm2manifest\n", + "from nemo.collections.asr.parts.utils.speaker_utils import write_rttm2manifest\n", "output_dir = os.path.join(ROOT, 'oracle_vad')\n", "os.makedirs(output_dir,exist_ok=True)\n", "oracle_manifest = os.path.join(output_dir,'oracle_manifest.json')\n", @@ -466,7 +466,7 @@ "metadata": {}, "outputs": [], "source": [ - "from nemo.collections.asr.parts.speaker_utils import rttm_to_labels\n", + "from nemo.collections.asr.parts.utils.speaker_utils import rttm_to_labels\n", "pred_rttm=os.path.join(output_dir,'pred_rttms',uniq_id+'.rttm')\n", "labels=rttm_to_labels(pred_rttm)\n", "print(\"speaker labels with time stamps\\n\",labels)" @@ -522,13 +522,6 @@ "\n", "print(words[j+1],end=\" \")" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { @@ -547,7 +540,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.7.7" } }, "nbformat": 4, diff --git a/tutorials/speaker_recognition/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_recognition/Speaker_Diarization_Inference.ipynb index ad2ec4f8a89a..675f6f3ceede 100644 --- a/tutorials/speaker_recognition/Speaker_Diarization_Inference.ipynb +++ b/tutorials/speaker_recognition/Speaker_Diarization_Inference.ipynb @@ -23,7 +23,7 @@ "!pip install unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -115,7 +115,7 @@ "metadata": {}, "outputs": [], "source": [ - "from nemo.collections.asr.parts.speaker_utils import rttm_to_labels, labels_to_pyannote_object" + "from nemo.collections.asr.parts.utils.speaker_utils import rttm_to_labels, labels_to_pyannote_object" ] }, { @@ -212,7 +212,7 @@ "metadata": {}, "outputs": [], "source": [ - "from nemo.collections.asr.parts.speaker_utils import write_rttm2manifest\n", + "from nemo.collections.asr.parts.utils.speaker_utils import write_rttm2manifest\n", "output_dir = os.path.join(ROOT, 'oracle_vad')\n", "os.makedirs(output_dir,exist_ok=True)" ] @@ -485,7 +485,7 @@ "outputs": [], "source": [ "# VAD predicted time stamps\n", - "from nemo.collections.asr.parts.vad_utils import extract_labels, plot\n", + "from nemo.collections.asr.parts.utils.vad_utils import extract_labels, plot\n", "\n", "plot(paths2audio_files[0],\n", " 'outputs/vad_outputs/overlap_smoothing_output_median_0.875/an4_diarize_test.median', \n", @@ -615,7 +615,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.7.10" + "version": "3.7.7" }, "pycharm": { "stem_cell": { diff --git a/tutorials/speaker_recognition/Speaker_Recognition_Verification.ipynb b/tutorials/speaker_recognition/Speaker_Recognition_Verification.ipynb index 38e0be3c9d9c..e2eeedf6c88b 100644 --- a/tutorials/speaker_recognition/Speaker_Recognition_Verification.ipynb +++ b/tutorials/speaker_recognition/Speaker_Recognition_Verification.ipynb @@ -27,7 +27,7 @@ "!pip install unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/text_processing/Inverse_Text_Normalization.ipynb b/tutorials/text_processing/Inverse_Text_Normalization.ipynb index 1e3fb55d494b..ce5378022897 100755 --- a/tutorials/text_processing/Inverse_Text_Normalization.ipynb +++ b/tutorials/text_processing/Inverse_Text_Normalization.ipynb @@ -56,7 +56,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "\"\"\"\n", "\n", - "BRANCH = 'v1.0.2'" + "BRANCH = 'main'" ], "id": "YxVLI-f97Kxl", "execution_count": null, diff --git a/tutorials/text_processing/Text_Normalization.ipynb b/tutorials/text_processing/Text_Normalization.ipynb index 98b2aab327e0..607bad00b99a 100755 --- a/tutorials/text_processing/Text_Normalization.ipynb +++ b/tutorials/text_processing/Text_Normalization.ipynb @@ -64,7 +64,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'v1.0.2'\n", + "BRANCH = 'main'\n", "if 'google.colab' in str(get_ipython()):\n", " !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ], diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 8814fad56d7e..597dd41c2f10 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -44,7 +44,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'v1.0.2'" + "BRANCH = 'main'" ] }, { diff --git a/tutorials/tools/label-studio/setup-asr-preannotations.ipynb b/tutorials/tools/label-studio/setup-asr-preannotations.ipynb index da506b08ddd1..e377c1a3eeb5 100644 --- a/tutorials/tools/label-studio/setup-asr-preannotations.ipynb +++ b/tutorials/tools/label-studio/setup-asr-preannotations.ipynb @@ -25,7 +25,7 @@ "source": [ "!docker run --gpus all -it --rm --shm-size=8g \\\n", "-p 8888:8888 -p 6006:6006 -p 8080:8080 --ulimit memlock=-1 --ulimit \\\n", - "stack=67108864 --device=/dev/snd nvcr.io/nvidia/nemo:1.0.0rc1" + "stack=67108864 --device=/dev/snd nvcr.io/nvidia/nemo:1.0.1" ] }, { diff --git a/tutorials/tts/1_TTS_inference.ipynb b/tutorials/tts/1_TTS_inference.ipynb index 55484f358ff4..28ccf5d99d93 100644 --- a/tutorials/tts/1_TTS_inference.ipynb +++ b/tutorials/tts/1_TTS_inference.ipynb @@ -49,7 +49,7 @@ "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode\n", - "# BRANCH = 'v1.0.2'\n", + "# BRANCH = 'main'\n", "# !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[tts]" ] }, diff --git a/tutorials/tts/2_TTS_Tacotron2_Training.ipynb b/tutorials/tts/2_TTS_Tacotron2_Training.ipynb index 29fa784f4f90..ba9802566621 100644 --- a/tutorials/tts/2_TTS_Tacotron2_Training.ipynb +++ b/tutorials/tts/2_TTS_Tacotron2_Training.ipynb @@ -57,7 +57,7 @@ "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode\n", - "# BRANCH = 'v1.0.2'\n", + "# BRANCH = 'main'\n", "# !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[tts]" ] }, diff --git a/tutorials/tts/3_TTS_TalkNet_Training.ipynb b/tutorials/tts/3_TTS_TalkNet_Training.ipynb index 03fd2903dc5a..312109fae395 100644 --- a/tutorials/tts/3_TTS_TalkNet_Training.ipynb +++ b/tutorials/tts/3_TTS_TalkNet_Training.ipynb @@ -50,7 +50,7 @@ "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode pysptk\n", - "# BRANCH = 'v1.0.2'\n", + "# BRANCH = 'main'\n", "# !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] },