From 408466996576a3be98a0da5d803139bae269633b Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Wed, 17 May 2023 15:13:07 -0700 Subject: [PATCH 01/28] Cut branch r1.19.0 Signed-off-by: smajumdar --- Jenkinsfile | 330 +++++++++--------- README.rst | 60 ++-- nemo/package_info.py | 2 +- tutorials/00_NeMo_Primer.ipynb | 8 +- tutorials/01_NeMo_Models.ipynb | 2 +- tutorials/02_NeMo_Adapters.ipynb | 2 +- tutorials/AudioTranslationSample.ipynb | 4 +- ...blish_NeMo_Model_On_Hugging_Face_Hub.ipynb | 2 +- tutorials/VoiceSwapSample.ipynb | 4 +- .../asr/ASR_CTC_Language_Finetuning.ipynb | 4 +- .../ASR_Example_CommonVoice_Finetuning.ipynb | 10 +- tutorials/asr/ASR_for_telephony_speech.ipynb | 2 +- tutorials/asr/ASR_with_NeMo.ipynb | 4 +- .../asr/ASR_with_Subword_Tokenization.ipynb | 2 +- tutorials/asr/ASR_with_Transducers.ipynb | 2 +- .../asr/Buffered_Transducer_Inference.ipynb | 2 +- ..._Transducer_Inference_with_LCS_Merge.ipynb | 2 +- tutorials/asr/Intro_to_Transducers.ipynb | 4 +- tutorials/asr/Multilang_ASR.ipynb | 8 +- tutorials/asr/Offline_ASR.ipynb | 4 +- .../Offline_ASR_with_VAD_for_CTC_models.ipynb | 2 +- .../asr/Online_ASR_Microphone_Demo.ipynb | 2 +- tutorials/asr/Online_Noise_Augmentation.ipynb | 2 +- .../Online_Offline_Microphone_VAD_Demo.ipynb | 4 +- .../Online_Offline_Speech_Commands_Demo.ipynb | 4 +- .../asr/Self_Supervised_Pre_Training.ipynb | 10 +- tutorials/asr/Speech_Commands.ipynb | 2 +- tutorials/asr/Streaming_ASR.ipynb | 4 +- tutorials/asr/Voice_Activity_Detection.ipynb | 2 +- .../asr/asr_adapters/ASR_with_Adapters.ipynb | 4 +- ...netuning_at_Scale_with_AWS_SageMaker.ipynb | 12 +- .../cloud/aws/SageMaker_ASR_Training.ipynb | 6 +- ...Language_Models_for_Downstream_Tasks.ipynb | 12 +- tutorials/nlp/02_NLP_Tokenizers.ipynb | 4 +- ...a_Preprocessing_and_Cleaning_for_NMT.ipynb | 6 +- tutorials/nlp/Dialogue.ipynb | 2 +- tutorials/nlp/Entity_Linking_Medical.ipynb | 4 +- tutorials/nlp/GLUE_Benchmark.ipynb | 2 +- tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb | 12 +- ...Joint_Intent_and_Slot_Classification.ipynb | 2 +- tutorials/nlp/MegatronBert_export.ipynb | 4 +- ...on_Synthetic_Tabular_Data_Generation.ipynb | 2 +- .../nlp/Multitask_Prompt_and_PTuning.ipynb | 8 +- .../nlp/Punctuation_and_Capitalization.ipynb | 8 +- ...ion_and_Capitalization_Lexical_Audio.ipynb | 8 +- tutorials/nlp/Question_Answering.ipynb | 2 +- .../nlp/Relation_Extraction-BioMegatron.ipynb | 2 +- ...xt_Classification_Sentiment_Analysis.ipynb | 2 +- .../Token_Classification-BioMegatron.ipynb | 2 +- ...ssification_Named_Entity_Recognition.ipynb | 4 +- .../nlp/Zero_Shot_Intent_Recognition.ipynb | 4 +- .../ASR_with_SpeakerDiarization.ipynb | 6 +- .../Speaker_Diarization_Inference.ipynb | 12 +- .../Speaker_Diarization_Training.ipynb | 8 +- .../Speaker_Identification_Verification.ipynb | 8 +- .../tools/CTC_Segmentation_Tutorial.ipynb | 8 +- tutorials/tools/Multispeaker_Simulator.ipynb | 4 +- .../tts/Aligner_Inference_Examples.ipynb | 4 +- .../Evaluation_MelCepstralDistortion.ipynb | 6 +- .../tts/FastPitch_Adapter_Finetuning.ipynb | 4 +- .../tts/FastPitch_ChineseTTS_Training.ipynb | 8 +- tutorials/tts/FastPitch_Finetuning.ipynb | 4 +- .../tts/FastPitch_GermanTTS_Training.ipynb | 10 +- .../tts/FastPitch_MixerTTS_Training.ipynb | 2 +- .../FastPitch_MultiSpeaker_Pretraining.ipynb | 4 +- .../tts/FastPitch_Speaker_Interpolation.ipynb | 2 +- .../tts/Inference_DurationPitchControl.ipynb | 4 +- tutorials/tts/Inference_ModelSelect.ipynb | 2 +- tutorials/tts/NeMo_TTS_Primer.ipynb | 2 +- .../tts/Pronunciation_customization.ipynb | 12 +- tutorials/tts/Tacotron2_Training.ipynb | 2 +- tutorials/tts/Vits_Training.ipynb | 2 +- 72 files changed, 358 insertions(+), 358 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index f43b301afdc0..27fbf11148f6 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -85,8 +85,8 @@ pipeline { stage('L0: Unit Tests CPU') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } steps { @@ -97,8 +97,8 @@ pipeline { stage('L2: ASR dev run') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -183,8 +183,8 @@ pipeline { stage('L2: ASR dev run - part two') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -213,8 +213,8 @@ pipeline { stage('L2: Speech to Text EMA') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } steps { @@ -234,8 +234,8 @@ pipeline { stage('L2: Speaker dev run') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -357,8 +357,8 @@ pipeline { // stage('L2: ASR DALI dev run') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -425,8 +425,8 @@ pipeline { // stage('L2: ASR RNNT dev run') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -487,8 +487,8 @@ pipeline { // stage('L2: Hybrid ASR RNNT-CTC dev run') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -517,8 +517,8 @@ pipeline { stage('L2: ASR Multi-dataloader dev run') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -565,8 +565,8 @@ pipeline { stage('L2: ASR Adapters') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -610,8 +610,8 @@ pipeline { stage('L2: Megatron T5 Adapter PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -656,8 +656,8 @@ pipeline { stage('L2: Megatron T5 Adapter TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -700,8 +700,8 @@ pipeline { stage('L2: Megatron T5 IA3 PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -746,8 +746,8 @@ pipeline { stage('L2: Megatron T5 IA3 TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -790,8 +790,8 @@ pipeline { stage('L2: Megatron GPT Adapter TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -833,8 +833,8 @@ pipeline { stage('L2: Megatron GPT Adapter PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -877,8 +877,8 @@ pipeline { stage('L2: Speech Transcription') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -898,8 +898,8 @@ pipeline { stage('L2: Transducer alignment') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -915,8 +915,8 @@ pipeline { stage('L2: Segmentation Tool') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } stages { @@ -971,8 +971,8 @@ pipeline { stage('L2: G2P Models') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1053,8 +1053,8 @@ pipeline { // stage('L2: Multi-GPU Megatron finetuning') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1080,8 +1080,8 @@ pipeline { stage('L2: STS-b') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1140,8 +1140,8 @@ pipeline { stage('L2: Dialogue Classification') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1311,8 +1311,8 @@ pipeline { stage('L2: Dialogue Generation') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1377,8 +1377,8 @@ pipeline { // stage('L2: Dialogue Generation Part 2') { // when { // anyOf { -// branch 'main' -// changeRequest target: 'main' +// branch 'r1.19.0' +// changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1407,8 +1407,8 @@ pipeline { stage('L2: COPY') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1437,8 +1437,8 @@ pipeline { stage('L2: Duplex Text Normalization') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1470,13 +1470,13 @@ pipeline { } } } - // Runs out of memory on the 12G TITAN V (GPU 0 on main CI) + // Runs out of memory on the 12G TITAN V (GPU 0 on r1.19.0 CI) // TODO: add when megatron bert is supported again in NeMo // stage('L2: MegaBERT Token Classification') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1501,8 +1501,8 @@ pipeline { stage('L2: BERT Text Classification') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1530,8 +1530,8 @@ pipeline { stage('L2: Parallel BERT Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1589,8 +1589,8 @@ pipeline { stage('L2: Parallel BART Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1650,8 +1650,8 @@ pipeline { stage('L2: Parallel GPT2 Question-Answering SQUAD v1.1 & v2.0') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1711,8 +1711,8 @@ pipeline { stage('L2: Intent and Slot Classification Tasks') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1751,8 +1751,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Text Classification') { // when { // anyOf{ - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1780,8 +1780,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Autoresume') { // when { // anyOf{ - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1811,8 +1811,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Evaluation from .nemo') { // when { // anyOf{ - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1832,8 +1832,8 @@ pipeline { // stage('L2: Model Parallel Size 2 Megatron Train from .nemo') { // when { // anyOf{ - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -1855,8 +1855,8 @@ pipeline { stage('L2: Parallel NLP Examples 2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -1980,8 +1980,8 @@ pipeline { stage('Punctuation & Capitalization tarred dataset') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2039,8 +2039,8 @@ pipeline { stage('Punctuation & Capitalization, Different ways of passing labels to model') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2147,8 +2147,8 @@ pipeline { stage('Punctuation & Capitalization inference') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2173,8 +2173,8 @@ pipeline { stage('L2: Parallel Pretraining BERT pretraining from Text/Preprocessed') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2235,8 +2235,8 @@ pipeline { stage('L2: Entity Linking') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2263,8 +2263,8 @@ pipeline { stage('L2: NMT Attention is All You Need Training') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2386,8 +2386,8 @@ pipeline { stage('L2: NMT Attention is All You Need Inference') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2422,8 +2422,8 @@ pipeline { stage('L2: NMT Attention is All You Need Finetuning') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2457,8 +2457,8 @@ pipeline { stage('L2: NMT Tarred Dataset Creation') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2511,8 +2511,8 @@ pipeline { stage('L2: Megatron NMT Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2610,8 +2610,8 @@ pipeline { // stage('L2: NMT Bottleneck Fallback') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -2657,8 +2657,8 @@ pipeline { // stage('L2: NMT Bottleneck Architecture') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -2740,8 +2740,8 @@ pipeline { // stage('L2: NMT Bottleneck LVM') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -2823,8 +2823,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training with Pipeline Paralleism') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2893,8 +2893,8 @@ pipeline { stage('L2: Megatron Bert Pretraining and Resume Training') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -2964,8 +2964,8 @@ pipeline { stage('L2: Megatron RETRO Pretraining and Resume Training') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3036,8 +3036,8 @@ pipeline { stage('L2: Megatron RETRO muTransfer Pretraining Performance') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3119,8 +3119,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: BioMegatron Bert NER Task') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3137,8 +3137,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3221,8 +3221,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Pretraining and Resume Training PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3305,8 +3305,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Finetuning PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3373,8 +3373,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Eval') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3390,8 +3390,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Eval PP2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3409,8 +3409,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Prompt Tuning TP1 PP1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3444,8 +3444,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron GPT Prompt Tuning TP2 PP1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3488,8 +3488,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron GPT Prompt Tuning TP1 PP2') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -3533,8 +3533,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron GPT Convert from Megatron-LM checkpoing and Eval') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -3560,8 +3560,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Change Partitions') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3607,8 +3607,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3703,8 +3703,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with ALiBi Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3799,8 +3799,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 with KERPLE Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3895,8 +3895,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Pretraining and Resume Training PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -3965,8 +3965,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 w/ Mixture of Expert Pretraining') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4010,8 +4010,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Prompt Learning TP1 PP1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4051,8 +4051,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Prompt Learning TP2 PP1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4096,8 +4096,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' // stage('L2: Megatron T5 Prompt Learning TP1 PP2') { // when { // anyOf { - // branch 'main' - // changeRequest target: 'main' + // branch 'r1.19.0' + // changeRequest target: 'r1.19.0' // } // } // failFast true @@ -4140,8 +4140,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron UL2 Pretraining and Resume Training TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4220,8 +4220,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 Eval') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4237,8 +4237,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, TP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4306,8 +4306,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron BART Pretraining and Resume Training, PP=2') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4379,8 +4379,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron T5 GLUE/XNLI Finetuning') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4452,8 +4452,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: Megatron Mock Data Generation') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true @@ -4489,8 +4489,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L2: TTS Fast dev runs 1') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } parallel { @@ -4635,8 +4635,8 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' stage('L??: Speech Checkpoints tests') { when { anyOf { - branch 'main' - changeRequest target: 'main' + branch 'r1.19.0' + changeRequest target: 'r1.19.0' } } failFast true diff --git a/README.rst b/README.rst index 1335620ead25..841509dfec5f 100644 --- a/README.rst +++ b/README.rst @@ -5,9 +5,9 @@ :target: http://www.repostatus.org/#active :alt: Project Status: Active – The project has reached a stable, usable state and is being actively developed. -.. |documentation| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=main +.. |documentation| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=r1.19.0 :alt: Documentation - :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/ + :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/ .. |license| image:: https://img.shields.io/badge/License-Apache%202.0-brightgreen.svg :target: https://github.com/NVIDIA/NeMo/blob/master/LICENSE @@ -25,7 +25,7 @@ :target: https://pepy.tech/project/nemo-toolkit :alt: PyPi total downloads -.. |codeql| image:: https://github.com/nvidia/nemo/actions/workflows/codeql.yml/badge.svg?branch=main&event=push +.. |codeql| image:: https://github.com/nvidia/nemo/actions/workflows/codeql.yml/badge.svg?branch=r1.19.0&event=push :target: https://github.com/nvidia/nemo/actions/workflows/codeql.yml :alt: CodeQL @@ -33,7 +33,7 @@ :target: https://github.com/psf/black :alt: Code style: black -.. _main-readme: +.. _r1.19.0-readme: **NVIDIA NeMo** =============== @@ -61,7 +61,7 @@ We have extensive `tutorials `_. For advanced users that want to train NeMo models from scratch or finetune existing NeMo models -we have a full suite of `example scripts `_ that support multi-GPU/multi-node training. +we have a full suite of `example scripts `_ that support multi-GPU/multi-node training. For scaling NeMo LLM training on Slurm clusters or public clouds, please see the `NVIDIA NeMo Megatron Launcher `_. The NM launcher has extensive recipes, scripts, utilities, and documentation for training NeMo LLMs and also has an `Autoconfigurator `_ @@ -74,7 +74,7 @@ Key Features * Speech processing * `HuggingFace Space for Audio Transcription (File, Microphone and YouTube) `_ - * `Automatic Speech Recognition (ASR) `_ + * `Automatic Speech Recognition (ASR) `_ * Supported ASR models: ``_ * Jasper, QuartzNet, CitriNet, ContextNet * Conformer-CTC, Conformer-Transducer, FastConformer-CTC, FastConformer-Transducer @@ -88,42 +88,42 @@ Key Features * Streaming/Buffered ASR (CTC/Transducer) - `Chunked Inference Examples `_ * Cache-aware Streaming Conformer - ``_ * Beam Search decoding - * `Language Modelling for ASR `_: N-gram LM in fusion with Beam Search decoding, Neural Rescoring with Transformer - * `Support of long audios for Conformer with memory efficient local attention `_ - * `Speech Classification, Speech Command Recognition and Language Identification `_: MatchboxNet (Command Recognition), AmberNet (LangID) + * `Language Modelling for ASR `_: N-gram LM in fusion with Beam Search decoding, Neural Rescoring with Transformer + * `Support of long audios for Conformer with memory efficient local attention `_ + * `Speech Classification, Speech Command Recognition and Language Identification `_: MatchboxNet (Command Recognition), AmberNet (LangID) * `Voice activity Detection (VAD) `_: MarbleNet * ASR with VAD Inference - `Example `_ - * `Speaker Recognition `_: TitaNet, ECAPA_TDNN, SpeakerNet - * `Speaker Diarization `_ + * `Speaker Recognition `_: TitaNet, ECAPA_TDNN, SpeakerNet + * `Speaker Diarization `_ * Clustering Diarizer: TitaNet, ECAPA_TDNN, SpeakerNet * Neural Diarizer: MSDD (Multi-scale Diarization Decoder) - * `Speech Intent Detection and Slot Filling `_: Conformer-Transformer + * `Speech Intent Detection and Slot Filling `_: Conformer-Transformer * `Pretrained models on different languages. `_: English, Spanish, German, Russian, Chinese, French, Italian, Polish, ... * `NGC collection of pre-trained speech processing models. `_ * Natural Language Processing * `NeMo Megatron pre-training of Large Language Models `_ - * `Neural Machine Translation (NMT) `_ - * `Punctuation and Capitalization `_ - * `Token classification (named entity recognition) `_ - * `Text classification `_ - * `Joint Intent and Slot Classification `_ - * `Question answering `_ - * `GLUE benchmark `_ - * `Information retrieval `_ - * `Entity Linking `_ - * `Dialogue State Tracking `_ - * `Prompt Learning `_ + * `Neural Machine Translation (NMT) `_ + * `Punctuation and Capitalization `_ + * `Token classification (named entity recognition) `_ + * `Text classification `_ + * `Joint Intent and Slot Classification `_ + * `Question answering `_ + * `GLUE benchmark `_ + * `Information retrieval `_ + * `Entity Linking `_ + * `Dialogue State Tracking `_ + * `Prompt Learning `_ * `NGC collection of pre-trained NLP models. `_ * `Synthetic Tabular Data Generation `_ -* `Speech synthesis (TTS) `_ +* `Speech synthesis (TTS) `_ * Spectrogram generation: Tacotron2, GlowTTS, TalkNet, FastPitch, FastSpeech2, Mixer-TTS, Mixer-TTS-X * Vocoders: WaveGlow, SqueezeWave, UniGlow, MelGAN, HiFiGAN, UnivNet * End-to-end speech generation: FastPitch_HifiGan_E2E, FastSpeech2_HifiGan_E2E, VITS * `NGC collection of pre-trained TTS models. `_ * `Tools `_ - * `Text Processing (text normalization and inverse text normalization) `_ - * `CTC-Segmentation tool `_ - * `Speech Data Explorer `_: a dash-based tool for interactive exploration of ASR/TTS datasets + * `Text Processing (text normalization and inverse text normalization) `_ + * `CTC-Segmentation tool `_ + * `Speech Data Explorer `_: a dash-based tool for interactive exploration of ASR/TTS datasets * `Speech Data Processor `_ @@ -139,10 +139,10 @@ Requirements Documentation ------------- -.. |main| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=main +.. |r1.19.0| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=r1.19.0 :alt: Documentation Status :scale: 100% - :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/ + :target: https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/ .. |stable| image:: https://readthedocs.com/projects/nvidia-nemo/badge/?version=stable :alt: Documentation Status @@ -152,7 +152,7 @@ Documentation +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ | Version | Status | Description | +=========+=============+==========================================================================================================================================+ -| Latest | |main| | `Documentation of the latest (i.e. main) branch. `_ | +| Latest | |r1.19.0| | `Documentation of the latest (i.e. main) branch. `_ | +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ | Stable | |stable| | `Documentation of the stable (i.e. most recent release) branch. `_ | +---------+-------------+------------------------------------------------------------------------------------------------------------------------------------------+ diff --git a/nemo/package_info.py b/nemo/package_info.py index 709159dd575a..d77e3046359b 100644 --- a/nemo/package_info.py +++ b/nemo/package_info.py @@ -16,7 +16,7 @@ MAJOR = 1 MINOR = 19 PATCH = 0 -PRE_RELEASE = 'rc0' +PRE_RELEASE = '' # Use the following formatting: (major, minor, patch, pre-release) VERSION = (MAJOR, MINOR, PATCH, PRE_RELEASE) diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index 50aa60260b35..193680f6d06d 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -14,7 +14,7 @@ "\n", "The toolkit comes with extendable collections of pre-built modules and ready-to-use models for automatic speech recognition (ASR), natural language processing (NLP) and text synthesis (TTS). Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training to multiple GPUs and multiple nodes.\n", "\n", - "For more information, please visit https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/#" + "For more information, please visit https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/#" ] }, { @@ -42,7 +42,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", @@ -1146,7 +1146,7 @@ "\n", "NeMo constantly adds new models and new tasks to these examples, such that these examples serve as the basis to train and evaluate models from scratch with the provided config files.\n", "\n", - "NeMo Examples directory can be found here - https://github.com/NVIDIA/NeMo/tree/main/examples" + "NeMo Examples directory can be found here - https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples" ] }, { @@ -1251,7 +1251,7 @@ "\n", "While the tutorials are a great example of the simplicity of NeMo, please note for the best performance when training on real datasets, we advice the use of the example scripts instead of the tutorial notebooks. \n", "\n", - "NeMo Tutorials directory can be found here - https://github.com/NVIDIA/NeMo/tree/main/tutorials" + "NeMo Tutorials directory can be found here - https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials" ] } ], diff --git a/tutorials/01_NeMo_Models.ipynb b/tutorials/01_NeMo_Models.ipynb index 6f230e62c1a3..2a65509bd8cd 100644 --- a/tutorials/01_NeMo_Models.ipynb +++ b/tutorials/01_NeMo_Models.ipynb @@ -37,7 +37,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/02_NeMo_Adapters.ipynb b/tutorials/02_NeMo_Adapters.ipynb index 51a91a3c7053..e6874d14169f 100644 --- a/tutorials/02_NeMo_Adapters.ipynb +++ b/tutorials/02_NeMo_Adapters.ipynb @@ -25,7 +25,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index e8fb33aba11f..ac79ca3b204d 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -38,7 +38,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, @@ -249,7 +249,7 @@ "* [Speech Synthesis](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/tts/Inference_ModelSelect.ipynb)\n", "\n", "\n", - "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/main/examples). " + "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples). " ] } ], diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb index 1b951e7b9e8c..da2e53fd94eb 100644 --- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb +++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb @@ -41,7 +41,7 @@ "!pip install text-unidecode\n", "\n", "### Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/VoiceSwapSample.ipynb b/tutorials/VoiceSwapSample.ipynb index addf19f3b236..ea8356981908 100644 --- a/tutorials/VoiceSwapSample.ipynb +++ b/tutorials/VoiceSwapSample.ipynb @@ -39,7 +39,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n" ] }, @@ -283,7 +283,7 @@ "* [Speech Synthesis](https://colab.research.google.com/github/NVIDIA/NeMo/blob/stable/tutorials/tts/Inference_ModelSelect.ipynb)\n", "\n", "\n", - "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/main/examples). " + "You can find scripts for training and fine-tuning ASR, NLP and TTS models [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples). " ] }, { diff --git a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb index b9c0db866f9c..fac120e1b699 100644 --- a/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb +++ b/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb @@ -40,7 +40,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -71,7 +71,7 @@ "\n", "For this tutorial (and limited by the compute and storage available on Colab environments), we will attempt to fine-tune an English ASR model onto the [Mozilla Common Voice](https://commonvoice.mozilla.org/en) dataset for Japanese. This dataset will also allow us to discuss a few details for fine-tuning low-resource languages. The methods discussed here can also be applied to languages with several thousand hours of data!\n", "\n", - "**Note**: It is advised to review the execution flow diagram for ASR models in order to correctly setup the model prior to fine-tuning - [ASR CTC Examples](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/asr_ctc/README.md)\n" + "**Note**: It is advised to review the execution flow diagram for ASR models in order to correctly setup the model prior to fine-tuning - [ASR CTC Examples](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/asr_ctc/README.md)\n" ] }, { diff --git a/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb b/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb index 5293f85044fc..c0af01bd27c2 100644 --- a/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb +++ b/tutorials/asr/ASR_Example_CommonVoice_Finetuning.ipynb @@ -10,7 +10,7 @@ "NOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n", "\n", "\n", - "Training an ASR model for a new language can be challenging, especially for low-resource languages (see [example](https://github.com/NVIDIA/NeMo/blob/main/docs/source/asr/examples/kinyarwanda_asr.rst) for Kinyarwanda CommonVoice ASR model).\n", + "Training an ASR model for a new language can be challenging, especially for low-resource languages (see [example](https://github.com/NVIDIA/NeMo/blob/r1.19.0/docs/source/asr/examples/kinyarwanda_asr.rst) for Kinyarwanda CommonVoice ASR model).\n", "\n", "This example describes all basic steps required to build ASR model for Esperanto:\n", "\n", @@ -160,7 +160,7 @@ "\n", "The tarred dataset allows storing the dataset as large *.tar files instead of small separate audio files. It may speed up the training and minimizes the load when data is moved from storage to GPU nodes.\n", "\n", - "The NeMo toolkit provides a [script]( https://github.com/NVIDIA/NeMo/blob/main/scripts/speech_recognition/convert_to_tarred_audio_dataset.py) to get tarred dataset.\n", + "The NeMo toolkit provides a [script]( https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/speech_recognition/convert_to_tarred_audio_dataset.py) to get tarred dataset.\n", "\n", "```bash\n", "\n", @@ -207,11 +207,11 @@ "source": [ "## Training hyper-parameters\n", "\n", - "The training parameters are defined in the [config file](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml) (general description of the [ASR configuration file](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/configs.html)). As an encoder, the [Conformer model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#conformer-ctc) is used here, the training parameters for which are already well configured based on the training English models. However, the set of optimal parameters may differ for a new language. In this section, we will look at the set of simple parameters that can improve recognition quality for a new language without digging into the details of the Conformer model too much.\n", + "The training parameters are defined in the [config file](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml) (general description of the [ASR configuration file](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/configs.html)). As an encoder, the [Conformer model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/stable/asr/models.html#conformer-ctc) is used here, the training parameters for which are already well configured based on the training English models. However, the set of optimal parameters may differ for a new language. In this section, we will look at the set of simple parameters that can improve recognition quality for a new language without digging into the details of the Conformer model too much.\n", "\n", "### Select Training Batch Size\n", "\n", - "We trained model on server with 16 V100 GPUs with 32 GB. We use a local batch size = 32 per GPU V100), so global batch size is 32x16=512. In general, we observed, that global batch between 512 and 2048 works well for Conformer-CTC-Large model. One can use the [accumulate_grad_batches](https://github.com/NVIDIA/NeMo/blob/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml#L173) parameter to increase the size of the global batch, which is equal to *local_batch * num_gpu * accumulate_grad_batches*.\n", + "We trained model on server with 16 V100 GPUs with 32 GB. We use a local batch size = 32 per GPU V100), so global batch size is 32x16=512. In general, we observed, that global batch between 512 and 2048 works well for Conformer-CTC-Large model. One can use the [accumulate_grad_batches](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml#L173) parameter to increase the size of the global batch, which is equal to *local_batch * num_gpu * accumulate_grad_batches*.\n", "\n", "### Selecting Optimizer and Learning Rate Scheduler\n", "\n", @@ -327,7 +327,7 @@ "+init_from_pretrained_model=${PRETRAINED_MODEL_NAME}\n", "```\n", "\n", - "If the size of the vocabulary differs from the one presented in the pretrained model, you need to change the vocabulary manually as done in the [finetuning tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb).\n", + "If the size of the vocabulary differs from the one presented in the pretrained model, you need to change the vocabulary manually as done in the [finetuning tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/ASR_CTC_Language_Finetuning.ipynb).\n", "\n", "```python\n", "model = nemo_asr.models.EncDecCTCModelBPE.from_pretrained(f\"nvidia/{PRETRAINED_MODEL_NAME}\", map_location='cpu')\n", diff --git a/tutorials/asr/ASR_for_telephony_speech.ipynb b/tutorials/asr/ASR_for_telephony_speech.ipynb index 11ba4b85bd47..48be4b4db737 100644 --- a/tutorials/asr/ASR_for_telephony_speech.ipynb +++ b/tutorials/asr/ASR_for_telephony_speech.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_NeMo.ipynb b/tutorials/asr/ASR_with_NeMo.ipynb index 0c0d239bf58c..c1f62a871a91 100644 --- a/tutorials/asr/ASR_with_NeMo.ipynb +++ b/tutorials/asr/ASR_with_NeMo.ipynb @@ -54,7 +54,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -588,7 +588,7 @@ "\n", "if not os.path.exists(config_path):\n", " # Grab the config we'll use in this example\n", - " BRANCH = 'main'\n", + " BRANCH = 'r1.19.0'\n", " !mkdir configs\n", " !wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/$BRANCH/examples/asr/conf/config.yaml\n", "\n", diff --git a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb index b932916f2bc5..cf4d8442fe0f 100644 --- a/tutorials/asr/ASR_with_Subword_Tokenization.ipynb +++ b/tutorials/asr/ASR_with_Subword_Tokenization.ipynb @@ -41,7 +41,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/ASR_with_Transducers.ipynb b/tutorials/asr/ASR_with_Transducers.ipynb index e6bccc3f0f42..7846a1468d98 100644 --- a/tutorials/asr/ASR_with_Transducers.ipynb +++ b/tutorials/asr/ASR_with_Transducers.ipynb @@ -29,7 +29,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference.ipynb b/tutorials/asr/Buffered_Transducer_Inference.ipynb index c23398dca46a..bc1209a80410 100644 --- a/tutorials/asr/Buffered_Transducer_Inference.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb index 2f179eaa9a5a..fad96a6097b0 100644 --- a/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb +++ b/tutorials/asr/Buffered_Transducer_Inference_with_LCS_Merge.ipynb @@ -46,7 +46,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "# Update numba and restart (this is required to update internal numba version of Colab)\n", diff --git a/tutorials/asr/Intro_to_Transducers.ipynb b/tutorials/asr/Intro_to_Transducers.ipynb index d3928bed987f..c82d7ed86dcd 100644 --- a/tutorials/asr/Intro_to_Transducers.ipynb +++ b/tutorials/asr/Intro_to_Transducers.ipynb @@ -44,7 +44,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ], "execution_count": null, @@ -225,7 +225,7 @@ "id": "0W12xF_CqcVF" }, "source": [ - "![](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/images/transducer.png?raw=true)" + "![](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/images/transducer.png?raw=true)" ] }, { diff --git a/tutorials/asr/Multilang_ASR.ipynb b/tutorials/asr/Multilang_ASR.ipynb index a1edeea815d0..431dc515a459 100644 --- a/tutorials/asr/Multilang_ASR.ipynb +++ b/tutorials/asr/Multilang_ASR.ipynb @@ -104,7 +104,7 @@ "\n", "## Install NeMo\n", "## We are using the main branch but you might want to adjust that too\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -204,7 +204,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"get_librispeech_data.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/dataset_processing/get_librispeech_data.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/dataset_processing/get_librispeech_data.py" ] }, { @@ -296,7 +296,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"get_commonvoice_data.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/dataset_processing/get_commonvoice_data.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/dataset_processing/get_commonvoice_data.py" ] }, { @@ -800,7 +800,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"process_asr_text_tokenizer.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py" + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py" ] }, { diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index fc8af2e76416..685d3ef6f37a 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -30,7 +30,7 @@ "* use beam search decoder with N-gram language model re-scoring\n", "\n", "You may find more info on how to train and use language models for ASR models here:\n", - "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n", + "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/asr/asr_language_modeling.html\n", "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n" ] }, @@ -52,7 +52,7 @@ "id": "I9eIxAyKHREB" }, "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "try:\n", " # Import NeMo Speech Recognition collection\n", " import nemo.collections.asr as nemo_asr\n", diff --git a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb index b38fab2c98bf..9d4f66b82599 100644 --- a/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb +++ b/tutorials/asr/Offline_ASR_with_VAD_for_CTC_models.ipynb @@ -23,7 +23,7 @@ "!pip install wget\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", diff --git a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb index 31d2c0dec943..6a1ac0bb1079 100644 --- a/tutorials/asr/Online_ASR_Microphone_Demo.ipynb +++ b/tutorials/asr/Online_ASR_Microphone_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Grab the config we'll use in this example\n", diff --git a/tutorials/asr/Online_Noise_Augmentation.ipynb b/tutorials/asr/Online_Noise_Augmentation.ipynb index f8741cdcbfe1..b2fbf1a2b17d 100644 --- a/tutorials/asr/Online_Noise_Augmentation.ipynb +++ b/tutorials/asr/Online_Noise_Augmentation.ipynb @@ -32,7 +32,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb index 7a8dacd82b6a..e642fd4f6961 100644 --- a/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb @@ -27,7 +27,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -67,7 +67,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook requires the `torchaudio` library to be installed for MarbleNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/main/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" + "This notebook requires the `torchaudio` library to be installed for MarbleNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" ] }, { diff --git a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb index c704ee1145c3..23e31e5b0da0 100644 --- a/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb +++ b/tutorials/asr/Online_Offline_Speech_Commands_Demo.ipynb @@ -29,7 +29,7 @@ "!pip install pyaudio\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -59,7 +59,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "This notebook requires the `torchaudio` library to be installed for MatchboxNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/main/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" + "This notebook requires the `torchaudio` library to be installed for MatchboxNet. Please follow the instructions available at the [torchaudio installer](https://github.com/NVIDIA/NeMo/blob/r1.19.0/scripts/installers/install_torchaudio_latest.sh) and [torchaudio Github page](https://github.com/pytorch/audio#installation) to install the appropriate version of torchaudio.\n" ] }, { diff --git a/tutorials/asr/Self_Supervised_Pre_Training.ipynb b/tutorials/asr/Self_Supervised_Pre_Training.ipynb index 04998f68f23e..fe47a62e2f27 100644 --- a/tutorials/asr/Self_Supervised_Pre_Training.ipynb +++ b/tutorials/asr/Self_Supervised_Pre_Training.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -51,7 +51,7 @@ "\n", "The approach we will use for pre-training our models is represented in the following diagram:\n", "\n", - " ![SSL diagram](https://raw.githubusercontent.com/NVIDIA/NeMo/main/tutorials/asr/images/contrastive_ssl.png)\n", + " ![SSL diagram](https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/tutorials/asr/images/contrastive_ssl.png)\n", "\n", "We first mask parts of our input using SpecAugment. The model is then trained to solve a contrastive task of distinguishing the latent representation of the masked time steps from several sampled distractors. Since our encoders also contain stride blocks which reduce the length of the inputs, in order to obtain target representations we combine several consecutive time steps. They are then passed through a quantizer, which has been found to help with contrastive pre-training." ] @@ -272,8 +272,8 @@ "source": [ "## Grab the configs we'll use in this example\n", "!mkdir configs\n", - "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/ssl/citrinet/citrinet_ssl_1024.yaml\n", - "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/citrinet/citrinet_1024.yaml\n" + "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/ssl/citrinet/citrinet_ssl_1024.yaml\n", + "!wget -P configs/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/citrinet/citrinet_1024.yaml\n" ] }, { @@ -482,7 +482,7 @@ "outputs": [], "source": [ "!mkdir scripts\n", - "!wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py\n", + "!wget -P scripts/ https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py\n", "\n", "!python ./scripts/process_asr_text_tokenizer.py \\\n", " --manifest=\"{data_dir}/an4/train_manifest.json\" \\\n", diff --git a/tutorials/asr/Speech_Commands.ipynb b/tutorials/asr/Speech_Commands.ipynb index 208752347d64..b26cba7da0b3 100644 --- a/tutorials/asr/Speech_Commands.ipynb +++ b/tutorials/asr/Speech_Commands.ipynb @@ -61,7 +61,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/Streaming_ASR.ipynb b/tutorials/asr/Streaming_ASR.ipynb index a4701dc025d8..d90bf363370a 100644 --- a/tutorials/asr/Streaming_ASR.ipynb +++ b/tutorials/asr/Streaming_ASR.ipynb @@ -28,7 +28,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -62,7 +62,7 @@ "* Real-time or close to real-time inference for live transcriptions\n", "* Offline transcriptions of very long audio\n", "\n", - "In this tutorial, we will mainly focus on streaming for handling long form audio and close to real-time inference with CTC based models. For training ASR models we usually use short segments of audio (<20s) that may be smaller chunks of a long audio that is aligned with the transcriptions and segmented into smaller chunks (see [tools/](https://github.com/NVIDIA/NeMo/tree/main/tools) for some great tools to do this). For running inference on long audio files we are restricted by the available GPU memory that dictates the maximum length of audio that can be transcribed in one inference call. We will take a look at one of the ways to overcome this restriction using NeMo's Conformer-CTC ASR model." + "In this tutorial, we will mainly focus on streaming for handling long form audio and close to real-time inference with CTC based models. For training ASR models we usually use short segments of audio (<20s) that may be smaller chunks of a long audio that is aligned with the transcriptions and segmented into smaller chunks (see [tools/](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools) for some great tools to do this). For running inference on long audio files we are restricted by the available GPU memory that dictates the maximum length of audio that can be transcribed in one inference call. We will take a look at one of the ways to overcome this restriction using NeMo's Conformer-CTC ASR model." ] }, { diff --git a/tutorials/asr/Voice_Activity_Detection.ipynb b/tutorials/asr/Voice_Activity_Detection.ipynb index b8013822c486..b4c7e33f7bab 100644 --- a/tutorials/asr/Voice_Activity_Detection.ipynb +++ b/tutorials/asr/Voice_Activity_Detection.ipynb @@ -28,7 +28,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", diff --git a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb index 62481c3762d2..80cf4ecacc1d 100644 --- a/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb +++ b/tutorials/asr/asr_adapters/ASR_with_Adapters.ipynb @@ -50,7 +50,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "## Grab the config we'll use in this example\n", @@ -1297,7 +1297,7 @@ "source": [ "# Further reading\n", "\n", - "For efficient scripts to add, train, and evaluate adapter augmented models, please refer to the [Adapters example section](https://github.com/NVIDIA/NeMo/tree/main/examples/asr/asr_adapters).\n", + "For efficient scripts to add, train, and evaluate adapter augmented models, please refer to the [Adapters example section](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/asr/asr_adapters).\n", "\n", "Please follow the following articles that discuss the use of adapters in ASR - \n", "- [Exploiting Adapters for Cross-lingual Low-resource Speech Recognition](https://arxiv.org/abs/2105.11905)\n", diff --git a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb index c4406a4f04ee..97697781dddd 100644 --- a/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb +++ b/tutorials/cloud/aws/ASR_Finetuning_at_Scale_with_AWS_SageMaker.ipynb @@ -70,7 +70,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -193,17 +193,17 @@ "config_path = str(config_dir / \"config.yaml\")\n", "\n", "# download scripts to format the data source.\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speech_recognition/convert_hf_dataset_to_nemo.py\", str(code_dir))\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speech_recognition/convert_to_tarred_audio_dataset.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speech_recognition/convert_hf_dataset_to_nemo.py\", str(code_dir))\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speech_recognition/convert_to_tarred_audio_dataset.py\",\n", " str(code_dir))\n", "\n", "# download scripts to run training\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/conformer/conformer_ctc_bpe.yaml\", config_path)\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/asr_ctc/speech_to_text_ctc_bpe.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/conformer/conformer_ctc_bpe.yaml\", config_path)\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/asr_ctc/speech_to_text_ctc_bpe.py\",\n", " str(code_dir))\n", "\n", "# download script to create tokenizer\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tokenizers/process_asr_text_tokenizer.py\",\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tokenizers/process_asr_text_tokenizer.py\",\n", " str(code_dir))" ] }, diff --git a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb index 8cf540b27114..078e76d55ba7 100644 --- a/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb +++ b/tutorials/cloud/aws/SageMaker_ASR_Training.ipynb @@ -55,7 +55,7 @@ "!pip install matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "\"\"\"\n", @@ -173,8 +173,8 @@ "outputs": [], "source": [ "config_path = str(config_dir / \"config.yaml\")\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/conf/conformer/conformer_ctc_char.yaml\", config_path)\n", - "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/asr/asr_ctc/speech_to_text_ctc.py\", str(code_dir))" + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/conf/conformer/conformer_ctc_char.yaml\", config_path)\n", + "wget.download(\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/asr/asr_ctc/speech_to_text_ctc.py\", str(code_dir))" ] }, { diff --git a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb index faa93de12514..c18ebbac596b 100644 --- a/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb +++ b/tutorials/nlp/01_Pretrained_Language_Models_for_Downstream_Tasks.ipynb @@ -26,7 +26,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, @@ -152,7 +152,7 @@ "id": "jEgEo0aPj3Ws" }, "source": [ - "All NeMo [NLP models](https://github.com/NVIDIA/NeMo/tree/main/examples/nlp) have an associated config file. As an example, let's examine the config file for the Named Entity Recognition (NER) model (more details about the model and the NER task could be found [here](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb))." + "All NeMo [NLP models](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/nlp) have an associated config file. As an example, let's examine the config file for the Named Entity Recognition (NER) model (more details about the model and the NER task could be found [here](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb))." ] }, { @@ -261,7 +261,7 @@ "id": "EVp4zvxPatga" }, "source": [ - "and then start the training as usual (please see [tutorials/nlp](https://github.com/NVIDIA/NeMo/tree/main/tutorials/nlp) for more details about training of a particular model). \n", + "and then start the training as usual (please see [tutorials/nlp](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials/nlp) for more details about training of a particular model). \n", "\n", "You can also provide a pretrained language model checkpoint and a configuration file if available.\n", "\n", @@ -349,7 +349,7 @@ "model.language_model.lm_checkpoint= \\\n", "model.language_model.config_file=`\n", "\n", - "The general Megatron-LM model names are used to download the correct vocabulary file needed to setup the model correctly. Note, the data preprocessing and model training is done in NeMo. Megatron-LM has its own set of training arguments (including tokenizer) that are ignored during finetuning in NeMo. Please see downstream task [config files and training scripts](https://github.com/NVIDIA/NeMo/tree/main/examples/nlp) for all NeMo supported arguments.\n", + "The general Megatron-LM model names are used to download the correct vocabulary file needed to setup the model correctly. Note, the data preprocessing and model training is done in NeMo. Megatron-LM has its own set of training arguments (including tokenizer) that are ignored during finetuning in NeMo. Please see downstream task [config files and training scripts](https://github.com/NVIDIA/NeMo/tree/r1.19.0/examples/nlp) for all NeMo supported arguments.\n", "\n", "## Download pretrained model\n", "\n", @@ -373,7 +373,7 @@ "source": [ "# Using any HuggingFace Pretrained Model\n", "\n", - "Currently, there are 4 HuggingFace language models that have the most extensive support in [NeMo](https://github.com/NVIDIA/NeMo/tree/main/nemo/collections/nlp/modules/common/huggingface): \n", + "Currently, there are 4 HuggingFace language models that have the most extensive support in [NeMo](https://github.com/NVIDIA/NeMo/tree/r1.19.0/nemo/collections/nlp/modules/common/huggingface): \n", "\n", "* BERT\n", "* RoBERTa\n", @@ -383,7 +383,7 @@ "As was mentioned before, just set `model.language_model.pretrained_model_name` to the desired model name in your config and get_lm_model() will take care of the rest.\n", "\n", "If you want to use another language model from [https://huggingface.co/models](https://huggingface.co/models), use HuggingFace API directly in NeMo.\n", - "More details on model training could be found at [tutorials](https://github.com/NVIDIA/NeMo/tree/main/tutorials)." + "More details on model training could be found at [tutorials](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tutorials)." ] } ], diff --git a/tutorials/nlp/02_NLP_Tokenizers.ipynb b/tutorials/nlp/02_NLP_Tokenizers.ipynb index c63d2a8b1689..5c909fe73432 100644 --- a/tutorials/nlp/02_NLP_Tokenizers.ipynb +++ b/tutorials/nlp/02_NLP_Tokenizers.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.19.0'" ] }, { @@ -35,7 +35,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb index 323bfa1c49b8..28d5330ac3b2 100644 --- a/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb +++ b/tutorials/nlp/Data_Preprocessing_and_Cleaning_for_NMT.ipynb @@ -217,7 +217,7 @@ "print()\n", "\n", "\n", - "!wget https://raw.github.com/NVIDIA/NeMo/main/scripts/neural_machine_translation/filter_langs_nmt.py \\\n", + "!wget https://raw.github.com/NVIDIA/NeMo/r1.19.0/scripts/neural_machine_translation/filter_langs_nmt.py \\\n", " -O filter_langs_nmt.py\n", "\n", "!python filter_langs_nmt.py \\\n", @@ -300,7 +300,7 @@ "\n", "## Install NeMo\n", "\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", "\n", "!pip uninstall -y sacrebleu\n", @@ -760,7 +760,7 @@ "metadata": {}, "outputs": [], "source": [ - "!wget https://raw.github.com/NVIDIA/NeMo/main/examples/nlp/machine_translation/create_tarred_parallel_dataset.py \\\n", + "!wget https://raw.github.com/NVIDIA/NeMo/r1.19.0/examples/nlp/machine_translation/create_tarred_parallel_dataset.py \\\n", " -O create_tarred_parallel_dataset.py\n", "\n", "!python create_tarred_parallel_dataset.py \\\n", diff --git a/tutorials/nlp/Dialogue.ipynb b/tutorials/nlp/Dialogue.ipynb index ddd3bdd4f929..cf0392da9c36 100644 --- a/tutorials/nlp/Dialogue.ipynb +++ b/tutorials/nlp/Dialogue.ipynb @@ -27,7 +27,7 @@ "outputs": [], "source": [ "import os \n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!apt-get update && apt-get install -y libsndfile1 ffmpeg\n", "!git clone https://github.com/NVIDIA/NeMo --branch $BRANCH\n", "os.chdir('NeMo')\n", diff --git a/tutorials/nlp/Entity_Linking_Medical.ipynb b/tutorials/nlp/Entity_Linking_Medical.ipynb index ff8eda123b7f..892eb881b528 100644 --- a/tutorials/nlp/Entity_Linking_Medical.ipynb +++ b/tutorials/nlp/Entity_Linking_Medical.ipynb @@ -17,7 +17,7 @@ "\"\"\"\n", "\n", "## Install NeMo if using google collab or if its not installed locally\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, @@ -68,7 +68,7 @@ "#### Task Description\n", "[Entity linking](https://en.wikipedia.org/wiki/Entity_linking) is the process of connecting concepts mentioned in natural language to their canonical forms stored in a knowledge base. For example, say a knowledge base contained the entity 'ID3452 influenza' and we wanted to process some natural language containing the sentence \"The patient has flu like symptoms\". An entity linking model would match the word 'flu' to the knowledge base entity 'ID3452 influenza', allowing for disambiguation and normalization of concepts referenced in text. Entity linking applications range from helping automate data ingestion to assisting in real time dialogue concept normalization. We will be focusing on entity linking in the medical domain for this demo, but the entity linking model, dataset, and training code within NVIDIA NeMo can be applied to other domains like finance and retail.\n", "\n", - "Within NeMo and this tutorial we use the entity linking approach described in Liu et. al's NAACL 2021 \"[Self-alignment Pre-training for Biomedical Entity Representations](https://arxiv.org/abs/2010.11784v2)\". The main idea behind this approach is to reshape an initial concept embedding space such that synonyms of the same concept are pulled closer together and unrelated concepts are pushed further apart. The concept embeddings from this reshaped space can then be used to build a knowledge base embedding index. This index stores concept IDs mapped to their respective concept embeddings in a format conducive to efficient nearest neighbor search. We can link query concepts to their canonical forms in the knowledge base by performing a nearest neighbor search- matching concept query embeddings to the most similar concepts embeddings in the knowledge base index. \n", + "Within NeMo and this tutorial we use the entity linking approach described in Liu et. al's NAACL 2021 \"[Self-alignment Pre-training for Biomedical Entity Representations](https://arxiv.org/abs/2010.11784v2)\". The r1.19.0 idea behind this approach is to reshape an initial concept embedding space such that synonyms of the same concept are pulled closer together and unrelated concepts are pushed further apart. The concept embeddings from this reshaped space can then be used to build a knowledge base embedding index. This index stores concept IDs mapped to their respective concept embeddings in a format conducive to efficient nearest neighbor search. We can link query concepts to their canonical forms in the knowledge base by performing a nearest neighbor search- matching concept query embeddings to the most similar concepts embeddings in the knowledge base index. \n", "\n", "In this tutorial we will be using the [faiss](https://github.com/facebookresearch/faiss) library to build our concept index." ] diff --git a/tutorials/nlp/GLUE_Benchmark.ipynb b/tutorials/nlp/GLUE_Benchmark.ipynb index d8fe75940b09..1c60b95bcc8c 100644 --- a/tutorials/nlp/GLUE_Benchmark.ipynb +++ b/tutorials/nlp/GLUE_Benchmark.ipynb @@ -44,7 +44,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'r1.19.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb index 6204bf2516bb..50ec879b7761 100644 --- a/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb +++ b/tutorials/nlp/ITN_with_Thutmose_Tagger.ipynb @@ -21,7 +21,7 @@ "import os\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "\n", "GITHUB_ACCOUNT = 'NVIDIA' # change this if using a fork\n", "\n", @@ -284,7 +284,7 @@ "id": "miXYxOv_mNVo" }, "source": [ - "The script [prepare_corpora_for_alignment.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/dataset_preparation/prepare_corpora_for_alignment.py) prepares the described parallel corpora. It extracts all unique ITN phrase-pairs from the Google TN dataset, tokenizes them as described above and stores in separate folders for each semiotic class. It also generates a bash script for running the alignment. At the end it prints how many examples it has found:\n", + "The script [prepare_corpora_for_alignment.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/dataset_preparation/prepare_corpora_for_alignment.py) prepares the described parallel corpora. It extracts all unique ITN phrase-pairs from the Google TN dataset, tokenizes them as described above and stores in separate folders for each semiotic class. It also generates a bash script for running the alignment. At the end it prints how many examples it has found:\n", "```\n", "content/alignment/punct has 920953 instances\n", "content/alignment/date has 150499 instances\n", @@ -405,7 +405,7 @@ { "cell_type": "markdown", "source": [ - "GIZA++ will generate many files in our class folders, but we need only two files with final alignments, those with suffixes `A3.final`. The two files correspond to the alignments produced by two GIZA++ runs - direct and reverse (switching source and target corpus). This is a common practice, it allows us to find safer alignment points - tokens that were aligned to one another in both runs. The script [extract_giza_alignments.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py) heuristically combines these two GIZA++ alignments. It also applies a bunch of regular expressions to correct some alignment mistakes." + "GIZA++ will generate many files in our class folders, but we need only two files with final alignments, those with suffixes `A3.final`. The two files correspond to the alignments produced by two GIZA++ runs - direct and reverse (switching source and target corpus). This is a common practice, it allows us to find safer alignment points - tokens that were aligned to one another in both runs. The script [extract_giza_alignments.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/dataset_preparation/extract_giza_alignments.py) heuristically combines these two GIZA++ alignments. It also applies a bunch of regular expressions to correct some alignment mistakes." ], "metadata": { "id": "ueJYVF0cU3ic" @@ -1016,11 +1016,11 @@ "\n", "See also the scripts for the whole pipeline:\n", "\n", - "> [prepare_dataset_en.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/prepare_dataset_en.sh)\n", + "> [prepare_dataset_en.sh](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/prepare_dataset_en.sh)\n", "\n", - "> [normalization_as_tagging_train.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py)\n", + "> [normalization_as_tagging_train.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/normalization_as_tagging_train.py)\n", "\n", - "> [run_infer.sh](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/text_normalization_as_tagging/run_infer.sh)\n", + "> [run_infer.sh](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/text_normalization_as_tagging/run_infer.sh)\n", "\n" ], "metadata": { diff --git a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb index 104d69df18e2..c656fdd7088a 100644 --- a/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb +++ b/tutorials/nlp/Joint_Intent_and_Slot_Classification.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index f925d2bc59b0..5bec75028064 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='main'" + "BRANCH='r1.19.0'" ] }, { @@ -64,7 +64,7 @@ "\n", "If you prefer to use the Huggingface BERT models, please skip this section and refer to `Setting up a NeMo Experiment` section to load a model from `nemo_nlp.modules.get_pretrained_lm_models_list()`\n", "\n", - "NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file." + "NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file." ] }, { diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index e1aa32f7bbf1..a92317b17320 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -62,7 +62,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "DATA_PATH='.'\n", "TRANSACTIONS=DATA_PATH+'/card_transaction.v1.csv'\n", "#CHECKPOINTS='/chk_points'\n", diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index 004014ebdeeb..02d533e59ad4 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='main'" + "BRANCH='r1.19.0'" ] }, { @@ -45,7 +45,7 @@ "\n", "- Our p-tuning implementation is based off Liu et al's paper [GPT Understands, Too](https://arxiv.org/abs/2103.10385).\n", "\n", - "- Command line usage examples and API documentation can be found in [our user docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html). \n", + "- Command line usage examples and API documentation can be found in [our user docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/nemo_megatron/prompt_learning.html). \n", "\n", "\"Prompt\n", "\n", @@ -88,7 +88,7 @@ "# The Best of Both\n", "A single pretrained GPT model can use both p-tuning and prompt-tuning. While you must decide to use either p-tuning or prompt-tuning for each task you want your model to perform, you can p-tune your model on a set of tasks A, then prompt tune your same model on a different set of tasks B, then finally run inference on tasks from both A and B at the same time. During prompt-tuning or p-tuning, tasks tuned at the same time must use the same number of virtual tokens. During inference, tasks using differing amounts of virtual tokens can be run at the same time.\n", "\n", - "Please see our [docs for more comparisons between prompt and p-tuning](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html). \n", + "Please see our [docs for more comparisons between prompt and p-tuning](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/nemo_megatron/prompt_learning.html). \n", "\n", "With all that covered, let's get started!\n" ] @@ -723,7 +723,7 @@ "- `length_params`\n", "- `sampling_params`\n", "\n", - "as arguments. More information about the [text generation API can be found here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/nlp/modules/common/transformer/text_generation.py).\n", + "as arguments. More information about the [text generation API can be found here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/nlp/modules/common/transformer/text_generation.py).\n", "\n", "If `length_params` and `sampling_params` are set to `None`, the model generates output with a greedy decoding strategy and generates up to `30` new tokens. Most predictive downstream tasks (not text generation tasks), use greedy sampling. To see other ways to run inference with your prompt learning model and more details on how to define various inference parameters, visit `examples/nlp/language_modeling/megatron_gpt_eval.py`.\n", "\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 1519c234372b..54e424c831c2 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.19.0'" ] }, { @@ -293,7 +293,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "As you see, `get_tatoeba_data.py` script provides not only downloads Tatoeba but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script:\n", + "As you see, `get_tatoeba_data.py` script provides not only downloads Tatoeba but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script:\n", "\n", "```\n", "NEMO_ROOT = \"\"\n", @@ -421,7 +421,7 @@ "\n", "- **trainer**: Any argument to be passed to PyTorch Lightning\n", "\n", - "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." + "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." ] }, { @@ -950,7 +950,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/punctuation_capitalization_train_evaluate.py).\n", + "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/punctuation_capitalization_train_evaluate.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 5580bc4cf946..3ce2ef9fb6de 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -10,7 +10,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.19.0'" ] }, { @@ -369,7 +369,7 @@ } }, "source": [ - "As you see, `get_libritts_data.py` script provides not only downloads LibriTTS but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script." + "As you see, `get_libritts_data.py` script provides not only downloads LibriTTS but also creates labels. If you wish to preprocess your own data, use [examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/data/prepare_data_for_punctuation_capitalization.py) script." ] }, { @@ -514,7 +514,7 @@ "\n", "- **trainer**: Any argument to be passed to PyTorch Lightning\n", "\n", - "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." + "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." ] }, { @@ -913,7 +913,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/main/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py).\n", + "If you have NeMo installed locally, you can also train the model with [nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/nlp/token_classification/punctuation_capitalization_lexical_audio_train_evaluate.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/nlp/Question_Answering.ipynb b/tutorials/nlp/Question_Answering.ipynb index 7217b0fb6756..b337c569425d 100644 --- a/tutorials/nlp/Question_Answering.ipynb +++ b/tutorials/nlp/Question_Answering.ipynb @@ -74,7 +74,7 @@ }, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.19.0'" ] }, { diff --git a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb index b7c25cb416ef..0cd718e71754 100644 --- a/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb +++ b/tutorials/nlp/Relation_Extraction-BioMegatron.ipynb @@ -6,7 +6,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.19.0'" ] }, { diff --git a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb index 5b5b74e7bf11..3296acd05919 100644 --- a/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb +++ b/tutorials/nlp/Text_Classification_Sentiment_Analysis.ipynb @@ -20,7 +20,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n", "\n" ] diff --git a/tutorials/nlp/Token_Classification-BioMegatron.ipynb b/tutorials/nlp/Token_Classification-BioMegatron.ipynb index 517f2e557743..c3f95bff841a 100644 --- a/tutorials/nlp/Token_Classification-BioMegatron.ipynb +++ b/tutorials/nlp/Token_Classification-BioMegatron.ipynb @@ -7,7 +7,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH='main'" + "BRANCH='r1.19.0'" ] }, { diff --git a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb index c3f7e28b6b1f..9b8007751f55 100644 --- a/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb +++ b/tutorials/nlp/Token_Classification_Named_Entity_Recognition.ipynb @@ -30,7 +30,7 @@ "metadata": {}, "outputs": [], "source": [ - "BRANCH = 'main'" + "BRANCH = 'r1.19.0'" ] }, { @@ -53,7 +53,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" + "BRANCH = 'r1.19.0'\n!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]\n" ], "execution_count": null, "outputs": [] diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index 69df7b27b02d..9ec4482f30b2 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -22,7 +22,7 @@ "# If you're using Google Colab and not running locally, run this cell\n", "\n", "# install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[nlp]" ] }, @@ -630,7 +630,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py](https://github.com/carolmanderson/NeMo/blob/main/examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py).\n", + "If you have NeMo installed locally, you can also train the model with [examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py](https://github.com/carolmanderson/NeMo/blob/r1.19.0/examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb index ea943b35e0d0..0c0b8163622c 100644 --- a/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb +++ b/tutorials/speaker_tasks/ASR_with_SpeakerDiarization.ipynb @@ -30,7 +30,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -58,7 +58,7 @@ "For detailed parameter setting and execution of speaker diarization, refer to this [Diarization Inference](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb) tutorial.\n", "\n", "\n", - "An example script that runs ASR and speaker diarization together can be found at [ASR with Diarization](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/diarization/clustering_diarizer/offline_diar_with_asr_infer.py).\n", + "An example script that runs ASR and speaker diarization together can be found at [ASR with Diarization](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/diarization/clustering_diarizer/offline_diar_with_asr_infer.py).\n", "\n", "### Speaker diarization in ASR pipeline\n", "\n", @@ -196,7 +196,7 @@ "DOMAIN_TYPE = \"meeting\" # Can be meeting or telephonic based on domain type of the audio file\n", "CONFIG_FILE_NAME = f\"diar_infer_{DOMAIN_TYPE}.yaml\"\n", "\n", - "CONFIG_URL = f\"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/speaker_tasks/diarization/conf/inference/{CONFIG_FILE_NAME}\"\n", + "CONFIG_URL = f\"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/speaker_tasks/diarization/conf/inference/{CONFIG_FILE_NAME}\"\n", "\n", "if not os.path.exists(os.path.join(data_dir,CONFIG_FILE_NAME)):\n", " CONFIG = wget.download(CONFIG_URL, data_dir)\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb index 1fd0f1b140d5..93ff3ed97b2e 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Inference.ipynb @@ -23,7 +23,7 @@ "!pip install text-unidecode\n", "\n", "# ## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "## Install TorchAudio\n", @@ -62,9 +62,9 @@ "* **with oracle VAD**: use ground-truth speech/non-speech labels. \n", "* **with system VAD**: use speech/non-speech labels generated by an actual VAD model. \n", "\n", - "We will first demonstrate how to perform diarization with a oracle VAD timestamps (we assume we already have speech timestamps) and pretrained speaker embedding extractor model which can be found in tutorial for [Speaker Identification and Verification in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb).\n", + "We will first demonstrate how to perform diarization with a oracle VAD timestamps (we assume we already have speech timestamps) and pretrained speaker embedding extractor model which can be found in tutorial for [Speaker Identification and Verification in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb).\n", "\n", - "In the following section, we will also show how to perform VAD and then diarization if ground truth timestamp speech were not available (non-oracle VAD). We also have tutorials for [VAD training in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/Voice_Activity_Detection.ipynb) and [online offline microphone inference](https://github.com/NVIDIA/NeMo/blob/main/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb), where you can custom your model and training/finetuning on your own data.\n", + "In the following section, we will also show how to perform VAD and then diarization if ground truth timestamp speech were not available (non-oracle VAD). We also have tutorials for [VAD training in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/Voice_Activity_Detection.ipynb) and [online offline microphone inference](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/asr/Online_Offline_Microphone_VAD_Demo.ipynb), where you can custom your model and training/finetuning on your own data.\n", "\n", "For demonstration purposes we would be using simulated audio from [an4 dataset](http://www.speech.cs.cmu.edu/databases/an4/)." ] @@ -140,7 +140,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - " We use a default multi-scale setting in [diar_infer_telephonic.yaml](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml) which has 5 scales from 1.5 s to 0.5 s, 50% overlap and equal weights. Note that only the ratio between numbers in `multiscale_weights` since the fused affinity matrix is normalized. For example, \\[1,1,1,1,1\\] and \\[0.5,0.5,0.5,0.5,0.5\\] will lead to the exactly same result." + " We use a default multi-scale setting in [diar_infer_telephonic.yaml](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml) which has 5 scales from 1.5 s to 0.5 s, 50% overlap and equal weights. Note that only the ratio between numbers in `multiscale_weights` since the fused affinity matrix is normalized. For example, \\[1,1,1,1,1\\] and \\[0.5,0.5,0.5,0.5,0.5\\] will lead to the exactly same result." ] }, { @@ -191,7 +191,7 @@ "MSDD models employ pairwise (two-speaker) unit-model for both training and inference. While training, pairwise model is trained on data samples with two speakers or two-speaker subset from data samples with more than two speakers. \n", "In inference mode, we retrieve all possible pairs from the estimated number of speakers and average the results. For example, if there are four speakers `(A, B, C, D)`, we extract 6 pairs: `(A,B)`, `(A,C)`, `(A,D)`, `(B,C)`, `(B,D)`, `(C,D)`. Finally, the sigmoid outputs are averaged. In this way, MSDD can deal with flexible number of speakers using a pairwise model. \n", "\n", - "The detailed information on MSDD model and model training can be found in tutorial on [Speaker Diarization Training](https://github.com/NVIDIA/NeMo/blob/main/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb). " + "The detailed information on MSDD model and model training can be found in tutorial on [Speaker Diarization Training](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb). " ] }, { @@ -399,7 +399,7 @@ "from omegaconf import OmegaConf\n", "MODEL_CONFIG = os.path.join(data_dir,'diar_infer_telephonic.yaml')\n", "if not os.path.exists(MODEL_CONFIG):\n", - " config_url = \"https://raw.githubusercontent.com/NVIDIA/NeMo/main/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml\"\n", + " config_url = \"https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/examples/speaker_tasks/diarization/conf/inference/diar_infer_telephonic.yaml\"\n", " MODEL_CONFIG = wget.download(config_url,data_dir)\n", "\n", "config = OmegaConf.load(MODEL_CONFIG)\n", diff --git a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb index 3c56df2bbba0..ab5cab58bc69 100644 --- a/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb +++ b/tutorials/speaker_tasks/Speaker_Diarization_Training.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -197,9 +197,9 @@ "\n", "- Please skip this section and go directly to [Prepare Training data for MSDD](#Prepare-Training-data-for-MSDD) section if you have your own speaker diarization dataset. \n", "\n", - "In this tutorial, we use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) and the Librispeech corpus to generate a toy training dataset for demonstration purpose. You can replace the simulated dataset with your own datasets if you have proper speaker annotations (RTTM files) for the dataset. If you do not have access to any speaker diarization datasets, you can use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) by generating a good amount of data samples to meet your needs. \n", + "In this tutorial, we use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) and the Librispeech corpus to generate a toy training dataset for demonstration purpose. You can replace the simulated dataset with your own datasets if you have proper speaker annotations (RTTM files) for the dataset. If you do not have access to any speaker diarization datasets, you can use [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) by generating a good amount of data samples to meet your needs. \n", "\n", - "For more details regarding data simulator, please follow the descriptions in [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tools/Multispeaker_Simulator.ipynb) and we will not cover configurations and detailed process of data simulation in this tutorial. \n" + "For more details regarding data simulator, please follow the descriptions in [NeMo Multispeaker Simulator](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tools/Multispeaker_Simulator.ipynb) and we will not cover configurations and detailed process of data simulation in this tutorial. \n" ] }, { @@ -232,7 +232,7 @@ "source": [ "import os\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "\n", "# download scripts if not already there \n", "if not os.path.exists('NeMo/scripts'):\n", diff --git a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb index 8e3ae9c1f131..f956334b892c 100644 --- a/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb +++ b/tutorials/speaker_tasks/Speaker_Identification_Verification.ipynb @@ -27,7 +27,7 @@ "!pip install text-unidecode\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[asr]\n", "\n", "# Install TorchAudio\n", @@ -58,7 +58,7 @@ "source": [ "In this tutorial, we shall first train these embeddings on speaker-related datasets, and then get speaker embeddings from a pretrained network for a new dataset. Since Google Colab has very slow read-write speeds, I'll be demonstrating this tutorial using [an4](http://www.speech.cs.cmu.edu/databases/an4/). \n", "\n", - "Instead, if you'd like to try on a bigger dataset like [hi-mia](https://arxiv.org/abs/1912.01231) use the [get_hi-mia-data.py](https://github.com/NVIDIA/NeMo/tree/main/scripts/dataset_processing/speaker_tasks/get_hi-mia_data.py) script to download the necessary files, extract them, and resample to 16Khz if any of these samples are not at 16Khz. " + "Instead, if you'd like to try on a bigger dataset like [hi-mia](https://arxiv.org/abs/1912.01231) use the [get_hi-mia-data.py](https://github.com/NVIDIA/NeMo/tree/r1.19.0/scripts/dataset_processing/speaker_tasks/get_hi-mia_data.py) script to download the necessary files, extract them, and resample to 16Khz if any of these samples are not at 16Khz. " ] }, { @@ -276,7 +276,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note: All the following steps are just for explanation of each section, but one can use the provided [training script](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/recognition/speaker_reco.py) to launch training in the command line." + "Note: All the following steps are just for explanation of each section, but one can use the provided [training script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/recognition/speaker_reco.py) to launch training in the command line." ] }, { @@ -760,7 +760,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "Note: You may use [finetune-script](https://github.com/NVIDIA/NeMo/blob/main/examples/speaker_tasks/recognition/speaker_reco_finetune.py) to launch training in the command line. Following is just a demonstration of the script" + "Note: You may use [finetune-script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/speaker_tasks/recognition/speaker_reco_finetune.py) to launch training in the command line. Following is just a demonstration of the script" ] }, { diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 98f0cce4e9ec..5f5641d1fb85 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -35,7 +35,7 @@ "id": "d4KCUoxSpdoZ" }, "source": [ - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "\n", "\"\"\"\n", "You can run either this notebook locally (if you have all the dependencies and a GPU) or on Google Colab.\n", @@ -126,7 +126,7 @@ "id": "S1DZk-inQGTI" }, "source": [ - "`TOOLS_DIR` contains scripts that we are going to need during the next steps, all necessary scripts could be found [here](https://github.com/NVIDIA/NeMo/tree/main/tools/ctc_segmentation/scripts)." + "`TOOLS_DIR` contains scripts that we are going to need during the next steps, all necessary scripts could be found [here](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools/ctc_segmentation/scripts)." ] }, { @@ -280,7 +280,7 @@ "* `max_length` argument - max number of words in a segment for alignment (used only if there are no punctuation marks present in the original text. Long non-speech segments are better for segments split and are more likely to co-occur with punctuation marks. Random text split could deteriorate the quality of the alignment.\n", "* out-of-vocabulary words will be removed based on pre-trained ASR model vocabulary, and the text will be changed to lowercase \n", "* sentences for alignment with the original punctuation and capitalization will be stored under `$OUTPUT_DIR/processed/*_with_punct.txt`\n", - "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", + "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", "\n", "### Audio preprocessing:\n", "* non '.wav' audio files will be converted to `.wav` format\n", @@ -699,7 +699,7 @@ "source": [ "# Next Steps\n", "\n", - "- Check out [NeMo Speech Data Explorer tool](https://github.com/NVIDIA/NeMo/tree/main/tools/speech_data_explorer#speech-data-explorer) to interactively evaluate the aligned segments.\n", + "- Check out [NeMo Speech Data Explorer tool](https://github.com/NVIDIA/NeMo/tree/r1.19.0/tools/speech_data_explorer#speech-data-explorer) to interactively evaluate the aligned segments.\n", "- Try Audio-based normalization tool." ] }, diff --git a/tutorials/tools/Multispeaker_Simulator.ipynb b/tutorials/tools/Multispeaker_Simulator.ipynb index c2a9caf1ea72..8264854dfd59 100644 --- a/tutorials/tools/Multispeaker_Simulator.ipynb +++ b/tutorials/tools/Multispeaker_Simulator.ipynb @@ -18,7 +18,7 @@ "\"\"\"\n", "\n", "NEMO_DIR_PATH = \"NeMo\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "\n", "! git clone https://github.com/NVIDIA/NeMo\n", "%cd NeMo\n", @@ -326,7 +326,7 @@ "outputs": [], "source": [ "if not os.path.exists(\"multispeaker_data_analysis.py\"):\n", - " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/speaker_tasks/multispeaker_data_analysis.py\n", + " !wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/speaker_tasks/multispeaker_data_analysis.py\n", "\n", "from multispeaker_data_analysis import run_multispeaker_data_analysis\n", "\n", diff --git a/tutorials/tts/Aligner_Inference_Examples.ipynb b/tutorials/tts/Aligner_Inference_Examples.ipynb index 611e1e3b6e66..e113af5565ae 100644 --- a/tutorials/tts/Aligner_Inference_Examples.ipynb +++ b/tutorials/tts/Aligner_Inference_Examples.ipynb @@ -39,7 +39,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", @@ -700,7 +700,7 @@ "## Resources\n", "\n", "- For more information about the Aligner architecture, check out the [RAD-TTS Aligner paper](https://arxiv.org/abs/2108.10447).\n", - "- If you would like to run disambiguation on a large batch of sentences, try out the [Aligner disambiguation example script](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/aligner_heteronym_disambiguation.py)." + "- If you would like to run disambiguation on a large batch of sentences, try out the [Aligner disambiguation example script](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/aligner_heteronym_disambiguation.py)." ] }, { diff --git a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb index 699f1b131408..0f501f89a90e 100644 --- a/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb +++ b/tutorials/tts/Evaluation_MelCepstralDistortion.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !pip install librosa numpy matplotlib" ] @@ -601,9 +601,9 @@ "source": [ "## Additional NeMo Resources\n", "\n", - "If you are unsure where to begin for training a TTS model, you may want to start with the [FastPitch and Mixer-TTS Training notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) or the [NeMo TTS Primer notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/NeMo_TTS_Primer.ipynb). For fine-tuning, there is also the [FastPitch Fine-Tuning notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_Finetuning.ipynb).\n", + "If you are unsure where to begin for training a TTS model, you may want to start with the [FastPitch and Mixer-TTS Training notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) or the [NeMo TTS Primer notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/NeMo_TTS_Primer.ipynb). For fine-tuning, there is also the [FastPitch Fine-Tuning notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_Finetuning.ipynb).\n", "\n", - "For some guidance on how to load a trained model and perform inference to generate mels or waveforms, check out how it's done in the [Inference notebook](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/Inference_ModelSelect.ipynb). Important functions to know are include `from_pretrained()` (if loading from an NGC model) and `restore_from()` (if loading a `.nemo` file). See the [NeMo Primer notebook](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/00_NeMo_Primer.ipynb) for more general information about model training, saving, and loading." + "For some guidance on how to load a trained model and perform inference to generate mels or waveforms, check out how it's done in the [Inference notebook](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/Inference_ModelSelect.ipynb). Important functions to know are include `from_pretrained()` (if loading from an NGC model) and `restore_from()` (if loading a `.nemo` file). See the [NeMo Primer notebook](https://github.com/NVIDIA/NeMo/blob/stable/tutorials/00_NeMo_Primer.ipynb) for more general information about model training, saving, and loading." ] } ], diff --git a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb index fa1b1bdc90c8..95bc3805030c 100644 --- a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb @@ -59,7 +59,7 @@ "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", - "# BRANCH = 'main'\n", + "# BRANCH = 'r1.19.0'\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode pynini==2.1.4 scipy==1.7.3\n", "# !python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]\n", @@ -246,7 +246,7 @@ "source": [ "### Extract Supplementary Data\n", "\n", - "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." + "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." ] }, { diff --git a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb index 9c4ea4369534..6685eca56251 100644 --- a/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_ChineseTTS_Training.ipynb @@ -61,7 +61,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", @@ -134,10 +134,10 @@ "\n", "FastPitch is non-autoregressive model for mel-spectrogram generation based on FastSpeech, conditioned on fundamental frequency contours. For more details about model, please refer to the original [paper](https://ieeexplore.ieee.org/abstract/document/9413889). Original [FastPitch model](https://ieeexplore.ieee.org/abstract/document/9413889) uses an external Tacotron 2 model trained on LJSpeech-1.1 to extract training alignments and estimate durations of input symbols. This implementation of FastPitch is based on [Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch), which uses an alignment mechanism proposed in [RAD-TTS](https://openreview.net/pdf?id=0NQwnnwAORi) and extended in [TTS Aligner](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9747707).\n", "\n", - "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", + "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", "\n", "### HiFi-GAN\n", - "HiFi-GAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/models/hifigan.py)." + "HiFi-GAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/tts/models/hifigan.py)." ] }, { @@ -780,7 +780,7 @@ "- Finetuning with #1 has artifacts from the original audio (noise) that get passed on as input to the vocoder resulting in artifacts in vocoder output in the form of noise.\n", "- On the other hand, #2.1 (i.e. `Mel spectrogram predicted from FastPitch with groundtruth alignment and duration`) gives the best results because it enables HiFi-GAN to learn mel spectrograms generated by FastPitch as well as duration distributions closer to the real world (i.e. ground truth) durations. \n", "\n", - "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest.json`, `test_manifest.json` and `val_manifest.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/main/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoChineseTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest.json` corresponds to `train_manifest_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." + "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest.json`, `test_manifest.json` and `val_manifest.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/r1.19.0/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoChineseTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest.json` corresponds to `train_manifest_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." ] }, { diff --git a/tutorials/tts/FastPitch_Finetuning.ipynb b/tutorials/tts/FastPitch_Finetuning.ipynb index 794d4b71ff44..cf94862263cd 100755 --- a/tutorials/tts/FastPitch_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Finetuning.ipynb @@ -57,7 +57,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", @@ -627,7 +627,7 @@ "id": "843674e7", "metadata": {}, "source": [ - "We can then finetune hifigan similarly to fastpitch using NeMo's [hifigan_finetune.py](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/hifigan_finetune.py) and [hifigan.yaml](https://github.com/NVIDIA/NeMo/blob/main/examples/tts/conf/hifigan/hifigan.yaml):\n", + "We can then finetune hifigan similarly to fastpitch using NeMo's [hifigan_finetune.py](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/hifigan_finetune.py) and [hifigan.yaml](https://github.com/NVIDIA/NeMo/blob/r1.19.0/examples/tts/conf/hifigan/hifigan.yaml):\n", "\n", "```bash\n", "python examples/tts/hifigan_finetune.py \\\n", diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb index e7cb0e896650..7d1ce265856a 100644 --- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb @@ -61,7 +61,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", @@ -133,10 +133,10 @@ "\n", "FastPitch is non-autoregressive model for mel-spectrogram generation based on FastSpeech, conditioned on fundamental frequency contours. For more details about model, please refer to the original [paper](https://ieeexplore.ieee.org/abstract/document/9413889). Original [FastPitch model](https://ieeexplore.ieee.org/abstract/document/9413889) uses an external Tacotron 2 model trained on LJSpeech-1.1 to extract training alignments and estimate durations of input symbols. This implementation of FastPitch is based on [Deep Learning Examples](https://github.com/NVIDIA/DeepLearningExamples/tree/master/PyTorch/SpeechSynthesis/FastPitch), which uses an alignment mechanism proposed in [RAD-TTS](https://openreview.net/pdf?id=0NQwnnwAORi) and extended in [TTS Aligner](https://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=9747707).\n", "\n", - "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", + "For more information on training a basic FastPitch model, please refer to [FastPitch_MixerTTS_Training.ipynb](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) tutorial.\n", "\n", "### HiFiGAN\n", - "HiFiGAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/main/nemo/collections/tts/models/hifigan.py)." + "HiFiGAN is a generative adversarial network (GAN) model that generates audio from mel spectrograms. The generator uses transposed convolutions to upsample mel spectrograms to audio. For more details about the model, please refer to the original [paper](https://arxiv.org/abs/2010.05646). NeMo re-implementation of HiFi-GAN can be found [here](https://github.com/NVIDIA/NeMo/blob/r1.19.0/nemo/collections/tts/models/hifigan.py)." ] }, { @@ -172,7 +172,7 @@ "3. `text`: original text;\n", "4. `normalized_text`: normalized text through our text normalization pipeline.\n", " \n", - "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", + "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", "\n", "You can run the below command to obtain the final manifests, `train_manifest_text_normed.json`, `val_manifest_text_normed.json` and `test_manifest_text_normed.json`. **Note** that this script would take sometime (~2 hours) to download and normalize the entire dataset." ] @@ -649,7 +649,7 @@ "- Finetuning with #1 has artifacts from the original audio (noise) that get passed on as input to the vocoder resulting in artifacts in vocoder output in the form of noise.\n", "- On the other hand, #2.1 (i.e. `Mel spectrogram predicted from FastPitch with groundtruth alignment and duration`) gives the best results because it enables HiFi-GAN to learn mel spectrograms generated by FastPitch as well as duration distributions closer to the real world (i.e. ground truth) durations. \n", "\n", - "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest_text_normed.json`, `test_manifest_text_normed.json` and `val_manifest_text_normed.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/main/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoGermanTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest_text_normed.json` corresponds to `train_manifest_text_normed_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." + "From implementation perspective - we follow the same process described in [Finetuning FastPitch for a new speaker](FastPitch_Finetuning.ipynb) - i.e. take the latest checkpoint from FastPitch training and predict spectrograms for each of the input records in `train_manifest_text_normed.json`, `test_manifest_text_normed.json` and `val_manifest_text_normed.json`. NeMo provides an efficient script, [scripts/dataset_processing/tts/generate_mels.py](https://raw.githubusercontent.com/nvidia/NeMo/r1.19.0/scripts/dataset_processing/tts/generate_mels.py), to generate Mel-spectrograms in the directory `NeMoGermanTTS/mels` and also create new JSON manifests with a suffix `_mel` by adding a new key `\"mel_filepath\"`. For example, `train_manifest_text_normed.json` corresponds to `train_manifest_text_normed_mel.json` saved in the same directory. You can run the following CLI to obtain the new JSON manifests." ] }, { diff --git a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb index 558c0d95d30b..403faa965534 100644 --- a/tutorials/tts/FastPitch_MixerTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_MixerTTS_Training.ipynb @@ -50,7 +50,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode scipy==1.7.3\n", diff --git a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb index defd0272d89d..a67744ef0f58 100644 --- a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb +++ b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb @@ -56,7 +56,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "# BRANCH = 'main'\n", + "# BRANCH = 'r1.19.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget unidecode pynini==2.1.4 scipy==1.7.3\n", @@ -258,7 +258,7 @@ "source": [ "### Extract Supplementary Data\n", "\n", - "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." + "As mentioned in the [FastPitch and MixerTTS training tutorial](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/FastPitch_MixerTTS_Training.ipynb) - To accelerate and stabilize our training, we also need to extract pitch for every audio, estimate pitch statistics (mean, std, min, and max). To do this, all we need to do is iterate over our data one time, via `extract_sup_data.py` script." ] }, { diff --git a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb index eda5bba0aa1e..5a7f56dc201d 100644 --- a/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb +++ b/tutorials/tts/FastPitch_Speaker_Interpolation.ipynb @@ -94,7 +94,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Inference_DurationPitchControl.ipynb b/tutorials/tts/Inference_DurationPitchControl.ipynb index 73c12bc79900..d4e1b1ba0678 100644 --- a/tutorials/tts/Inference_DurationPitchControl.ipynb +++ b/tutorials/tts/Inference_DurationPitchControl.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", @@ -202,7 +202,7 @@ "\n", "Let's see the `pitch_predicted` for a sample text. You can run the below cell. You should get an image that looks like the following for the input `Hey, what is my pitch?`:\n", "\n", - "\n", + "\n", "\n", "Notice that the last word `pitch` has an increase in pitch to stress that it is a question." ] diff --git a/tutorials/tts/Inference_ModelSelect.ipynb b/tutorials/tts/Inference_ModelSelect.ipynb index 195b773fb5ee..abdda3e16747 100644 --- a/tutorials/tts/Inference_ModelSelect.ipynb +++ b/tutorials/tts/Inference_ModelSelect.ipynb @@ -46,7 +46,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies.\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/NeMo_TTS_Primer.ipynb b/tutorials/tts/NeMo_TTS_Primer.ipynb index 99306744dd05..497552a9ac43 100644 --- a/tutorials/tts/NeMo_TTS_Primer.ipynb +++ b/tutorials/tts/NeMo_TTS_Primer.ipynb @@ -25,7 +25,7 @@ "source": [ "# Install NeMo library. If you are running locally (rather than on Google Colab), comment out the below lines\n", "# and instead follow the instructions at https://github.com/NVIDIA/NeMo#Installation\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "!python -m pip install git+https://github.com/NVIDIA/NeMo.git@$BRANCH#egg=nemo_toolkit[all]" ] }, diff --git a/tutorials/tts/Pronunciation_customization.ipynb b/tutorials/tts/Pronunciation_customization.ipynb index 6fe269e76904..6185610fe4ab 100644 --- a/tutorials/tts/Pronunciation_customization.ipynb +++ b/tutorials/tts/Pronunciation_customization.ipynb @@ -26,7 +26,7 @@ "4. Run this cell to set up dependencies.\n", "\"\"\"\n", "\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Google Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode \n", @@ -128,7 +128,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " \n", + " \n", "\n", "\n", "During preprocessing, unambiguous dictionary words are converted to phonemes, while OOV and words with multiple entries are kept as graphemes. For example, **paracetamol** is missing from the phoneme dictionary, and **can** has 2 forms." @@ -186,7 +186,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " \n", + " \n", "\n", "\n", "## Dictionary customization\n", @@ -212,7 +212,7 @@ "if os.path.exists(ipa_cmu_dict):\n", " ! rm $ipa_cmu_dict\n", "\n", - "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/main/scripts/tts_dataset_files/$ipa_cmu_dict\n", + "! wget https://raw.githubusercontent.com/NVIDIA/NeMo/r1.19.0/scripts/tts_dataset_files/$ipa_cmu_dict\n", "\n", "with open(ipa_cmu_dict, \"a\") as f:\n", " f.write(f\"PARACETAMOL {new_pronunciation}\\n\")\n", @@ -267,7 +267,7 @@ "metadata": {}, "source": [ "#### Expected results if you run the tutorial:\n", - " " + " " ] }, { @@ -276,7 +276,7 @@ "source": [ "# Resources\n", "* [TTS pipeline customization](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-custom.html#tts-pipeline-configuration)\n", - "* [Overview of TTS in NeMo](https://github.com/NVIDIA/NeMo/blob/main/tutorials/tts/NeMo_TTS_Primer.ipynb)\n", + "* [Overview of TTS in NeMo](https://github.com/NVIDIA/NeMo/blob/r1.19.0/tutorials/tts/NeMo_TTS_Primer.ipynb)\n", "* [G2P models in NeMo](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/g2p.html)\n", "* [Riva TTS documentation](https://docs.nvidia.com/deeplearning/riva/user-guide/docs/tts/tts-overview.html)" ] diff --git a/tutorials/tts/Tacotron2_Training.ipynb b/tutorials/tts/Tacotron2_Training.ipynb index e2ae5082e608..a696ee26e8a7 100644 --- a/tutorials/tts/Tacotron2_Training.ipynb +++ b/tutorials/tts/Tacotron2_Training.ipynb @@ -54,7 +54,7 @@ "3. Connect to an instance with a GPU (Runtime -> Change runtime type -> select \"GPU\" for hardware accelerator)\n", "4. Run this cell to set up dependencies# .\n", "\"\"\"\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# # If you're using Colab and not running locally, uncomment and run this cell.\n", "# !apt-get install sox libsndfile1 ffmpeg\n", "# !pip install wget text-unidecode\n", diff --git a/tutorials/tts/Vits_Training.ipynb b/tutorials/tts/Vits_Training.ipynb index 37e55e0d7572..dbe4e9362cc1 100644 --- a/tutorials/tts/Vits_Training.ipynb +++ b/tutorials/tts/Vits_Training.ipynb @@ -63,7 +63,7 @@ "# !pip install wget text-unidecode matplotlib>=3.3.2\n", "\n", "## Install NeMo\n", - "BRANCH = 'main'\n", + "BRANCH = 'r1.19.0'\n", "# !python -m pip install \"git+https://github.com/NVIDIA/NeMo.git@${BRANCH}#egg=nemo_toolkit[all]\"\n", "\n", "## Install pynini\n", From 1dc8b37bba47f0534400414f1d3f972778b9c7a5 Mon Sep 17 00:00:00 2001 From: Li Tao Date: Fri, 19 May 2023 21:04:59 +0800 Subject: [PATCH 02/28] Fix a bug, use _ceil_to_nearest instead as _round_to_nearest is not defined (#6681) --- .../nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py index deb6e77cdb92..2c896c2e61af 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_sft_chat_dataset.py @@ -183,7 +183,7 @@ def collate_fn(self, batch): if self.pad_to_max_length: max_length = self.max_seq_length else: - max_length = min(self.max_seq_length, self._round_to_nearest(max_length, 8)) + max_length = min(self.max_seq_length, self._ceil_to_nearest(max_length, 8)) assert max_length <= self.max_seq_length attention_mask = [self._create_attention_mask(max_length) for _ in batch] From 0ca1dd3685995267b49193236a6ca0d27aea75a6 Mon Sep 17 00:00:00 2001 From: Vladimir Bataev Date: Tue, 23 May 2023 21:19:09 +0400 Subject: [PATCH 03/28] Fix k2 installation in Docker with CUDA 12 (#6707) Signed-off-by: Vladimir Bataev --- scripts/speech_recognition/k2/setup.sh | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/scripts/speech_recognition/k2/setup.sh b/scripts/speech_recognition/k2/setup.sh index 64d9a3c122e4..48ca31dab803 100755 --- a/scripts/speech_recognition/k2/setup.sh +++ b/scripts/speech_recognition/k2/setup.sh @@ -15,10 +15,12 @@ # limitations under the License. K2_REPO=https://github.com/k2-fsa/k2 -LATEST_RELEASE=$(git -c 'versionsort.suffix=-' \ - ls-remote --exit-code --refs --sort='version:refname' --tags ${K2_REPO} '*.*' \ - | tail --lines=1 \ - | cut -d '/' -f 3) +LATEST_RELEASE=e5671de # Temporary fix for CUDA 12 +# uncomment the following line after the next k2 version is released (>1.24.3) +#LATEST_RELEASE=$(git -c 'versionsort.suffix=-' \ +# ls-remote --exit-code --refs --sort='version:refname' --tags ${K2_REPO} '*.*' \ +# | tail --lines=1 \ +# | cut -d '/' -f 3) # "cut --delimiter '/' --fields 3" doesn't work on macOS, use "-d ... -f ..." instead K2_MAKE_ARGS="-j" pip install -v "git+${K2_REPO}@${LATEST_RELEASE}#egg=k2" || { echo "k2 could not be installed!"; exit 1; } From db6e29b83573e9d132ccd06192732571ddb9f349 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Wed, 24 May 2023 15:21:36 -0700 Subject: [PATCH 04/28] Tutorial fixes (#6717) Signed-off-by: smajumdar --- tutorials/00_NeMo_Primer.ipynb | 2 +- tutorials/AudioTranslationSample.ipynb | 2 +- tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb | 2 +- tutorials/asr/Offline_ASR.ipynb | 2 +- tutorials/nlp/MegatronBert_export.ipynb | 2 +- tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb | 4 ++-- tutorials/nlp/Punctuation_and_Capitalization.ipynb | 2 +- .../nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb | 2 +- tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb | 2 +- tutorials/tools/CTC_Segmentation_Tutorial.ipynb | 2 +- tutorials/tts/FastPitch_GermanTTS_Training.ipynb | 2 +- 11 files changed, 12 insertions(+), 12 deletions(-) diff --git a/tutorials/00_NeMo_Primer.ipynb b/tutorials/00_NeMo_Primer.ipynb index 193680f6d06d..2eff9c596b7f 100644 --- a/tutorials/00_NeMo_Primer.ipynb +++ b/tutorials/00_NeMo_Primer.ipynb @@ -14,7 +14,7 @@ "\n", "The toolkit comes with extendable collections of pre-built modules and ready-to-use models for automatic speech recognition (ASR), natural language processing (NLP) and text synthesis (TTS). Built for speed, NeMo can utilize NVIDIA's Tensor Cores and scale out training to multiple GPUs and multiple nodes.\n", "\n", - "For more information, please visit https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/#" + "For more information, please visit https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/#" ] }, { diff --git a/tutorials/AudioTranslationSample.ipynb b/tutorials/AudioTranslationSample.ipynb index ac79ca3b204d..b5c9d13a5c6d 100644 --- a/tutorials/AudioTranslationSample.ipynb +++ b/tutorials/AudioTranslationSample.ipynb @@ -63,7 +63,7 @@ "import nemo\n", "# Import Speech Recognition collection\n", "import nemo.collections.asr as nemo_asr\n", - "# Import Natural Language Processing colleciton\n", + "# Import Natural Language Processing collection\n", "import nemo.collections.nlp as nemo_nlp\n", "# Import Speech Synthesis collection\n", "import nemo.collections.tts as nemo_tts\n", diff --git a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb index da2e53fd94eb..ede417d3583c 100644 --- a/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb +++ b/tutorials/Publish_NeMo_Model_On_Hugging_Face_Hub.ipynb @@ -627,7 +627,7 @@ "\n", "\n", "Eg: \n", - "Since this model was trained on publically available speech datasets, the performance of this model might degrade for speech which includes technical terms, or vernacular that the model has not been trained on. The model might also perform worse for accented speech.\n", + "Since this model was trained on publicly available speech datasets, the performance of this model might degrade for speech which includes technical terms, or vernacular that the model has not been trained on. The model might also perform worse for accented speech.\n", "\n", "\n", "## References\n", diff --git a/tutorials/asr/Offline_ASR.ipynb b/tutorials/asr/Offline_ASR.ipynb index 685d3ef6f37a..2d963a6b77d3 100644 --- a/tutorials/asr/Offline_ASR.ipynb +++ b/tutorials/asr/Offline_ASR.ipynb @@ -30,7 +30,7 @@ "* use beam search decoder with N-gram language model re-scoring\n", "\n", "You may find more info on how to train and use language models for ASR models here:\n", - "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/asr/asr_language_modeling.html\n", + "https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/asr/asr_language_modeling.html\n", "\n\nNOTE: User is responsible for checking the content of datasets and the applicable licenses and determining if suitable for the intended use.\n" ] }, diff --git a/tutorials/nlp/MegatronBert_export.ipynb b/tutorials/nlp/MegatronBert_export.ipynb index 5bec75028064..c435d6e76d54 100644 --- a/tutorials/nlp/MegatronBert_export.ipynb +++ b/tutorials/nlp/MegatronBert_export.ipynb @@ -64,7 +64,7 @@ "\n", "If you prefer to use the Huggingface BERT models, please skip this section and refer to `Setting up a NeMo Experiment` section to load a model from `nemo_nlp.modules.get_pretrained_lm_models_list()`\n", "\n", - "NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file." + "NeMo Megatron BERT can [load from a pretrained model](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/core/core.html?highlight=nemo%20file#restore) using `.nemo` file. We can convert the Megatron-LM checkpoint to the `.nemo` file. Let's first download the pretrained model weights and vocabulary file." ] }, { diff --git a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb index 02d533e59ad4..7ccf33826157 100644 --- a/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb +++ b/tutorials/nlp/Multitask_Prompt_and_PTuning.ipynb @@ -45,7 +45,7 @@ "\n", "- Our p-tuning implementation is based off Liu et al's paper [GPT Understands, Too](https://arxiv.org/abs/2103.10385).\n", "\n", - "- Command line usage examples and API documentation can be found in [our user docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/nemo_megatron/prompt_learning.html). \n", + "- Command line usage examples and API documentation can be found in [our user docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html). \n", "\n", "\"Prompt\n", "\n", @@ -88,7 +88,7 @@ "# The Best of Both\n", "A single pretrained GPT model can use both p-tuning and prompt-tuning. While you must decide to use either p-tuning or prompt-tuning for each task you want your model to perform, you can p-tune your model on a set of tasks A, then prompt tune your same model on a different set of tasks B, then finally run inference on tasks from both A and B at the same time. During prompt-tuning or p-tuning, tasks tuned at the same time must use the same number of virtual tokens. During inference, tasks using differing amounts of virtual tokens can be run at the same time.\n", "\n", - "Please see our [docs for more comparisons between prompt and p-tuning](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/nemo_megatron/prompt_learning.html). \n", + "Please see our [docs for more comparisons between prompt and p-tuning](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/nemo_megatron/prompt_learning.html). \n", "\n", "With all that covered, let's get started!\n" ] diff --git a/tutorials/nlp/Punctuation_and_Capitalization.ipynb b/tutorials/nlp/Punctuation_and_Capitalization.ipynb index 54e424c831c2..ea6dc45ef273 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization.ipynb @@ -421,7 +421,7 @@ "\n", "- **trainer**: Any argument to be passed to PyTorch Lightning\n", "\n", - "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." + "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." ] }, { diff --git a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb index 3ce2ef9fb6de..62b3255d119b 100644 --- a/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb +++ b/tutorials/nlp/Punctuation_and_Capitalization_Lexical_Audio.ipynb @@ -514,7 +514,7 @@ "\n", "- **trainer**: Any argument to be passed to PyTorch Lightning\n", "\n", - "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." + "See [docs](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/nlp/punctuation_and_capitalization.html#training-punctuation-and-capitalization-model) for full config description." ] }, { diff --git a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb index 9ec4482f30b2..a1b0c4fd8561 100644 --- a/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb +++ b/tutorials/nlp/Zero_Shot_Intent_Recognition.ipynb @@ -630,7 +630,7 @@ "source": [ "## Training Script\n", "\n", - "If you have NeMo installed locally, you can also train the model with [examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py](https://github.com/carolmanderson/NeMo/blob/r1.19.0/examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py).\n", + "If you have NeMo installed locally, you can also train the model with [examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py](https://github.com/carolmanderson/NeMo/blob/main/examples/nlp/zero_shot_intent_recognition/zero_shot_intent_train.py).\n", "\n", "To run training script, use:\n", "\n", diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 5f5641d1fb85..5f1ffd27ea05 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -280,7 +280,7 @@ "* `max_length` argument - max number of words in a segment for alignment (used only if there are no punctuation marks present in the original text. Long non-speech segments are better for segments split and are more likely to co-occur with punctuation marks. Random text split could deteriorate the quality of the alignment.\n", "* out-of-vocabulary words will be removed based on pre-trained ASR model vocabulary, and the text will be changed to lowercase \n", "* sentences for alignment with the original punctuation and capitalization will be stored under `$OUTPUT_DIR/processed/*_with_punct.txt`\n", - "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", + "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", "\n", "### Audio preprocessing:\n", "* non '.wav' audio files will be converted to `.wav` format\n", diff --git a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb index 7d1ce265856a..512ec8249694 100644 --- a/tutorials/tts/FastPitch_GermanTTS_Training.ipynb +++ b/tutorials/tts/FastPitch_GermanTTS_Training.ipynb @@ -172,7 +172,7 @@ "3. `text`: original text;\n", "4. `normalized_text`: normalized text through our text normalization pipeline.\n", " \n", - "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/r1.19.0/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", + "This script supports processing either of Thorsten's Neutral Datasets 21.02 or 22.10. In this tutorial, we only focus on the latest 22.10 version dataset. Please refer [thorsten-muller-s-german-neutral-tts-datasets](https://docs.nvidia.com/deeplearning/nemo/user-guide/docs/en/main/tts/datasets.html#thorsten-muller-s-german-neutral-tts-datasets) for more details about Thorsten's datasets. \n", "\n", "You can run the below command to obtain the final manifests, `train_manifest_text_normed.json`, `val_manifest_text_normed.json` and `test_manifest_text_normed.json`. **Note** that this script would take sometime (~2 hours) to download and normalize the entire dataset." ] From 2e2df4aed113156803a1d580d97c849118f94310 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Fri, 26 May 2023 09:13:49 -0700 Subject: [PATCH 05/28] VP Fixes for converter + Config management (#6698) (#6738) * [Temp] VP Fixes Signed-off-by: smajumdar * Revert logging Signed-off-by: smajumdar --------- Signed-off-by: smajumdar (cherry picked from commit b6f46a0f36659024bae04f24323a16aa8b09f45a) --- .../megatron_change_num_partitions.py | 99 ++++++++++++++++--- 1 file changed, 83 insertions(+), 16 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_change_num_partitions.py b/examples/nlp/language_modeling/megatron_change_num_partitions.py index 558986e3da36..2938a16098a1 100644 --- a/examples/nlp/language_modeling/megatron_change_num_partitions.py +++ b/examples/nlp/language_modeling/megatron_change_num_partitions.py @@ -56,7 +56,7 @@ --target_pipeline_model_parallel_size=1 \ --target_pipeline_model_parallel_split_rank=0 \ --precision=bf16 - + # Megatron GPT + Virtual Pipeline parallelism python megatron_change_num_partitions.py \ @@ -138,17 +138,34 @@ def set_virtual_parallel_rank_safely(rank: int): def force_cpu_model(cfg): with open_dict(cfg): - # temporarily + # temporarily set to cpu original_cpu_init = cfg.get('use_cpu_initialization', False) - original_amp_o2 = cfg.get('megatron_amp_O2', False) + if 'megatron_amp_O2' in cfg: + key = 'megatron_amp_O2' + original_amp_o2 = cfg.megatron_amp_O2 + elif 'megatron_amp_02' in cfg: + key = 'megatron_amp_02' + original_amp_o2 = cfg.megatron_amp_02 + else: + key, original_amp_o2 = None, None + + # Set new values cfg.use_cpu_initialization = True - cfg.megatron_amp_O2 = False - return cfg, {'original_cpu_init': original_cpu_init, 'original_amp_o2': original_amp_o2} + if key is not None: + cfg[key] = False + + # Setup restore dict + restore_dict = {'use_cpu_initialization': original_cpu_init} # 'megatron_amp_O2': original_amp_o2 + if key is not None: + restore_dict[key] = original_amp_o2 + + return cfg, restore_dict def restore_model_config(cfg, original_dict): with open_dict(cfg): for key, val in original_dict.items(): + logging.info(f"Restoring model config key ({key}) from {cfg[key]} to original value of {val}") cfg[key] = val return cfg @@ -1034,6 +1051,8 @@ def main(): os.path.join(model_filepath, args.ckpt_name) ) + vp_state_dict = torch.load(checkpoint_path, map_location="cpu") + if hparams_filepath is not None: # Force the model onto CPU tmp_cfg = OmegaConf.load(hparams_filepath) @@ -1078,9 +1097,10 @@ def main(): vp_params_tmp = [] for vp_idx in range(vp_size): set_virtual_parallel_rank_safely(vp_idx) - params = [p for p in model.model[vp_idx].parameters()] - # params = model.model[vp_idx].module.state_dict_for_save_checkpoint() - # params = [p for p in params.values()] + vp_params = vp_state_dict[f'model{vp_idx}'] + model.model[vp_idx].module.load_state_dict(vp_params, strict=True) + model.model[vp_idx].module.to('cpu') + params = [p for p in model.model[vp_idx].module.parameters()] vp_params_tmp.append(params) # partitions[pp_rank][vp_idx].append(params) @@ -1141,6 +1161,8 @@ def main(): model = model.to('cpu') model._save_restore_connector = NLPSaveRestoreConnector() + restore_model_config(model.cfg, restore_dict) + vp_param_count = 0 for vp in range(vp_size): for pp in range(pp_size): @@ -1159,15 +1181,62 @@ def main(): else: flat_partitions = {idx: [] for idx in range(pp_size)} - for pp in range(pp_size): - for tp in range(tp_size): - vp_cache = [] - for vp in range(vp_size): - vp_cache.extend(partitions[vp][pp][tp]) + """ + Under VP convention + Notation : + Stage = PP rank + Number = GPT model / layer index + Ignore TP - every PP has all TP corresponding to that PP + chunk_index = the physical index of any [] in the list. Ex idx = 2 in below map corresponds to [2: PP 0 VP 1]] + + + For a PP 2 VP 4 model with 8 GPT layers- - flat_partitions[pp].append(vp_cache) + Indices + # Stage 0: [0:PP 0 VP 0] [2:PP 0 VP 1] [4:PP 0 VP 2] [6:PP 0 VP 3] + # Stage 1: [1:PP 1 VP 0] [3:PP 1 VP 1] [5:PP 1 VP 2] [7:PP 1 VP 3] + + after conversion will become + + # Stage 0: [0,1,2,3:PP 0] + # Stage 1: [4,5,6,7:PP 1] + + """ + pp_index = 0 + chunk_counter = 0 + tp_cache = [[] for _ in range(tp_size)] + + for vp in range(vp_size): + for pp in range(pp_size): + # Gather all TP under this VP PP combination. + # We will accumulate TP parameters from multiple layers in this cache. + for tp in range(tp_size): + tp_cache[tp].extend(partitions[vp][pp][tp]) + + # This counter indexes the global selection of a VP PP combination in the above map + chunk_counter += 1 + + # Log the mapping from old VP x PP to new PP index + logging.info(f"VP Conversion - vp: {vp} pp: {pp} -> pp_idx: {pp_index}") + + # Every vp_size chunks, we can fill a new PP index in the flat_partitions + if chunk_counter % vp_size == 0: + flat_partitions[pp_index].extend(tp_cache) + tp_cache = [[] for _ in range(tp_size)] + pp_index += 1 + + logging.debug( + f"VP merge step: \n" + f"vp: {vp} pp: {pp} pp_idx: {pp_index - 1} " + f"len(flat_partitions): {len(flat_partitions[pp_index - 1])}" + ) + + logging.debug(f"PP Size len(flat partitions) : {len(flat_partitions)}") + logging.debug(f"TP Size len(flat partitions[0]): {len(flat_partitions[0])}") + logging.debug(f"Layers len(flat partitions[0][0]) : {len(flat_partitions[0][0])}") partitions = flat_partitions + del tp_cache if tgt_tp_size > 1 or tgt_pp_size > 1: merge_partition(model, partitions) @@ -1175,8 +1244,6 @@ def main(): # Write out the PP 1 TP 1 model to disk merge_partition(model, partitions, args.target_file) - restore_model_config(model.cfg, restore_dict) - # Empty cache memory of all parameters from all PP TP partitions partitions.clear() From 4df8f33a3bb5de4a6e39f4b9ad31dd7d35739783 Mon Sep 17 00:00:00 2001 From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Date: Sat, 27 May 2023 06:29:54 +0800 Subject: [PATCH 06/28] Fix fastpitch test nightly (#6742) Signed-off-by: hsiehjackson --- .../collections/tts/models/test_fastpitch.py | 24 ++++++++++++++++++- 1 file changed, 23 insertions(+), 1 deletion(-) diff --git a/tests/collections/tts/models/test_fastpitch.py b/tests/collections/tts/models/test_fastpitch.py index c77d70cbc44a..2502b1f7cb8a 100644 --- a/tests/collections/tts/models/test_fastpitch.py +++ b/tests/collections/tts/models/test_fastpitch.py @@ -16,7 +16,10 @@ This file implemented unit tests for loading all pretrained FastPitch NGC checkpoints and generating Mel-spectrograms. The test duration breakdowns are shown below. In general, each test for a single model is ~25 seconds on an NVIDIA RTX A6000. """ +import random + import pytest +import torch from nemo.collections.tts.models import FastPitchModel @@ -38,4 +41,23 @@ def test_inference(pretrained_model, language_specific_text_example): model, language_id = pretrained_model text = language_specific_text_example[language_id] parsed_text = model.parse(text) - _ = model.generate_spectrogram(tokens=parsed_text) + + # Multi-Speaker + speaker_id = None + reference_spec = None + reference_spec_lens = None + + if hasattr(model.fastpitch, 'speaker_emb'): + speaker_id = 0 + + if hasattr(model.fastpitch, 'speaker_encoder'): + if hasattr(model.fastpitch.speaker_encoder, 'lookup_module'): + speaker_id = 0 + if hasattr(model.fastpitch.speaker_encoder, 'gst_module'): + bs, lens, t_spec = parsed_text.shape[0], random.randint(50, 100), model.cfg.n_mel_channels + reference_spec = torch.rand(bs, lens, t_spec) + reference_spec_lens = torch.tensor([lens]).long().expand(bs) + + _ = model.generate_spectrogram( + tokens=parsed_text, speaker=speaker_id, reference_spec=reference_spec, reference_spec_lens=reference_spec_lens + ) From e806e1166684fcbac732fc0f664b4654b9b2e748 Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Fri, 26 May 2023 16:41:46 -0600 Subject: [PATCH 07/28] check for first or last stage (#6708) * check for first or last stage Signed-off-by: ericharper * remove redundant check Signed-off-by: ericharper * fix typo Signed-off-by: ericharper * add map_location Signed-off-by: ericharper --------- Signed-off-by: ericharper --- .../language_modeling/megatron_gpt_eval.py | 1 + .../modules/common/text_generation_utils.py | 65 ++++++++++--------- 2 files changed, 36 insertions(+), 30 deletions(-) diff --git a/examples/nlp/language_modeling/megatron_gpt_eval.py b/examples/nlp/language_modeling/megatron_gpt_eval.py index 0ac155374512..14cdbf8a760c 100644 --- a/examples/nlp/language_modeling/megatron_gpt_eval.py +++ b/examples/nlp/language_modeling/megatron_gpt_eval.py @@ -203,6 +203,7 @@ def main(cfg) -> None: trainer=trainer, override_config_path=pretrained_cfg, save_restore_connector=save_restore_connector, + map_location=f'cuda:{trainer.local_rank}', # map_location is needed for converted models ) elif cfg.checkpoint_dir: app_state = AppState() diff --git a/nemo/collections/nlp/modules/common/text_generation_utils.py b/nemo/collections/nlp/modules/common/text_generation_utils.py index b39ac406d4a4..8cfb02c5e321 100644 --- a/nemo/collections/nlp/modules/common/text_generation_utils.py +++ b/nemo/collections/nlp/modules/common/text_generation_utils.py @@ -135,36 +135,41 @@ def megatron_gpt_generate(model, inputs, tokenizer, length_params, sampling_para def get_computeprob_response(tokenizer, response, inputs): - compute_prob_response = {} - new_token_ids = [] - new_tokens = [] - new_texts = [] - log_probs = [] - full_logprobs = [] - offsets = [] - for batch_id in range(len(response['tokens'])): - if isinstance(inputs, (list, tuple)): - if isinstance(inputs[0], str): - new_token_id = tokenizer.text_to_ids(inputs[batch_id]) - new_text = inputs[batch_id] - token_len = len(new_token_id) - elif isinstance(inputs[0], torch.Tensor): - token_len = int(inputs[1][batch_id].item()) - new_token_id = inputs[0][batch_id][:token_len].tolist() - new_text = tokenizer.ids_to_text(new_token_id) - new_token_ids.append(new_token_id) - new_tokens.append(response['tokens'][batch_id][:token_len]) - new_texts.append(new_text) - log_probs.append(response['logprob'][batch_id][:token_len]) - full_logprobs.append(response['full_logprob'][batch_id][:token_len]) - offsets.append(response['offsets'][batch_id][:-1]) - compute_prob_response['sentences'] = new_texts - compute_prob_response['tokens'] = new_tokens - compute_prob_response['token_ids'] = new_token_ids - compute_prob_response['logprob'] = log_probs - compute_prob_response['full_logprob'] = full_logprobs - compute_prob_response['offsets'] = offsets - return compute_prob_response + if parallel_state.is_pipeline_first_stage() or parallel_state.is_pipeline_last_stage(): + # we only have a response on the first and last pipeline stages + compute_prob_response = {} + new_token_ids = [] + new_tokens = [] + new_texts = [] + log_probs = [] + full_logprobs = [] + offsets = [] + for batch_id in range(len(response['tokens'])): + if isinstance(inputs, (list, tuple)): + if isinstance(inputs[0], str): + new_token_id = tokenizer.text_to_ids(inputs[batch_id]) + new_text = inputs[batch_id] + token_len = len(new_token_id) + elif isinstance(inputs[0], torch.Tensor): + token_len = int(inputs[1][batch_id].item()) + new_token_id = inputs[0][batch_id][:token_len].tolist() + new_text = tokenizer.ids_to_text(new_token_id) + new_token_ids.append(new_token_id) + new_tokens.append(response['tokens'][batch_id][:token_len]) + new_texts.append(new_text) + log_probs.append(response['logprob'][batch_id][:token_len]) + full_logprobs.append(response['full_logprob'][batch_id][:token_len]) + offsets.append(response['offsets'][batch_id][:-1]) + compute_prob_response['sentences'] = new_texts + compute_prob_response['tokens'] = new_tokens + compute_prob_response['token_ids'] = new_token_ids + compute_prob_response['logprob'] = log_probs + compute_prob_response['full_logprob'] = full_logprobs + compute_prob_response['offsets'] = offsets + return compute_prob_response + else: + # intermediate stages + return None def get_batch(model, tokenizer, context_tokens): From dbd6a565992842bc8a04714c154b3e124da4c049 Mon Sep 17 00:00:00 2001 From: Markel Sanz Ausin Date: Mon, 29 May 2023 16:01:52 -0700 Subject: [PATCH 08/28] Bug fix to restore act ckpt (#6753) * Bug fix to restore act ckpt Signed-off-by: Markel Sanz Ausin * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Markel Sanz Ausin Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../language_modeling/megatron_gpt_model.py | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index e9545361b88d..809825752cab 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1143,16 +1143,20 @@ def _restore_activation_checkpointing_args(self): _reset_activation_checkpointing_args. """ # Restore config values. - self.cfg.activations_checkpoint_granularity = self.last_checkpointing_granularity - self.cfg.activations_checkpoint_method = self.last_checkpointing_method - self.cfg.activations_checkpoint_num_layers = self.last_checkpointing_num_layers + self.cfg.activations_checkpoint_granularity = self.last_activations_checkpoint_granularity + self.cfg.activations_checkpoint_method = self.last_activations_checkpoint_method + self.cfg.activations_checkpoint_num_layers = self.last_activations_checkpoint_num_layers self.cfg.activations_checkpoint_layers_per_pipeline = self.last_activations_checkpoint_layers_per_pipeline # Restore model parameters. for module in self.get_gpt_module_list(): - module.language_model.encoder.activations_checkpoint_granularity = self.last_checkpointing_granularity - module.language_model.encoder.activations_checkpoint_method = self.last_checkpointing_method - module.language_model.encoder.activations_checkpoint_num_layers = self.last_checkpointing_num_layers + module.language_model.encoder.activations_checkpoint_granularity = ( + self.last_activations_checkpoint_granularity + ) + module.language_model.encoder.activations_checkpoint_method = self.last_activations_checkpoint_method + module.language_model.encoder.activations_checkpoint_num_layers = ( + self.last_activations_checkpoint_num_layers + ) module.language_model.encoder.activations_checkpoint_layers_per_pipeline = ( self.last_activations_checkpoint_layers_per_pipeline ) From a0f757e257ad91fb842024d7f6a2d5a189338626 Mon Sep 17 00:00:00 2001 From: Markel Sanz Ausin Date: Wed, 31 May 2023 10:22:23 -0700 Subject: [PATCH 09/28] Bug fix to reset sequence parallelism (#6756) * Bug fix to reset sequence parallelism Signed-off-by: Markel Sanz Ausin * Update seq par reset/restore Signed-off-by: Markel Sanz Ausin * Add nested loop Signed-off-by: Markel Sanz Ausin --------- Signed-off-by: Markel Sanz Ausin --- .../models/language_modeling/megatron_gpt_model.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 809825752cab..66fa0ed2716e 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1170,12 +1170,13 @@ def _reset_sequence_parallelism_args(self): self.last_sequence_parallel = self.cfg.sequence_parallel # Reset config values. Needed for calling generate. - self.cfg.sequence_parallel = None + self.cfg.sequence_parallel = False # Reset model parameters. - for module in self.get_gpt_module_list(): - module.language_model.encoder.sequence_parallel = None + for mod in module.modules(): + if hasattr(mod, "sequence_parallel"): + mod.sequence_parallel = self.last_sequence_parallel def _restore_sequence_parallelism_args(self): """ Restores the sequence parallelism parameters using the values saved by @@ -1187,4 +1188,6 @@ def _restore_sequence_parallelism_args(self): # Restore model parameters. for module in self.get_gpt_module_list(): - module.language_model.encoder.sequence_parallel = self.last_sequence_parallel + for mod in module.modules(): + if hasattr(mod, "sequence_parallel"): + mod.sequence_parallel = self.last_sequence_parallel From 39dd654c6b37e42c35e14d2994caa1ed92c11c43 Mon Sep 17 00:00:00 2001 From: Abhinav Khattar Date: Wed, 31 May 2023 10:46:19 -0700 Subject: [PATCH 10/28] Fix checkpointed forward and add test for full activation checkpointing (#6744) * fix checkpointed forward and add test for full activation checkpointing Signed-off-by: Abhinav Khattar * add method Signed-off-by: Abhinav Khattar * add method Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar --- Jenkinsfile | 2 ++ nemo/collections/nlp/modules/common/megatron/transformer.py | 3 --- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 27fbf11148f6..780e3e4b43c4 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -3175,6 +3175,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.hidden_size=256 \ model.num_attention_heads=8 \ model.activations_checkpoint_method='block' \ + model.activations_checkpoint_granularity='full' \ model.activations_checkpoint_num_layers=1 \ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" @@ -3211,6 +3212,7 @@ assert_frame_equal(training_curve, gt_curve, rtol=1e-3, atol=1e-3)"''' model.hidden_size=256 \ model.num_attention_heads=8 \ model.activations_checkpoint_method='block' \ + model.activations_checkpoint_granularity='full' \ model.activations_checkpoint_num_layers=1 \ model.data.data_prefix=[.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document,.5,/home/TestData/nlp/megatron_gpt/data/gpt/simple_wiki_gpt_preproc_text_document] \ model.data.index_mapping_dir=examples/nlp/language_modeling/gpt_index_mappings" diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 0f6112e08036..9a09a9f9aa0b 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -1268,9 +1268,6 @@ def custom_forward(*inputs): return custom_forward - # Make sure memory is freed. - tensor_parallel.reset_checkpointed_activations_memory_buffer() - if self.activations_checkpoint_method == 'uniform': # Uniformly divide the total number of Transformer layers and checkpoint # the input activation of each divided chunk. From 216bcabbab57f46f9f2b8cc4caba855fdf5da532 Mon Sep 17 00:00:00 2001 From: Somshubra Majumdar Date: Wed, 31 May 2023 15:54:55 -0700 Subject: [PATCH 11/28] Fix Links (#6777) Signed-off-by: smajumdar --- tutorials/tools/CTC_Segmentation_Tutorial.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb index 5f1ffd27ea05..4d64acedbecf 100644 --- a/tutorials/tools/CTC_Segmentation_Tutorial.ipynb +++ b/tutorials/tools/CTC_Segmentation_Tutorial.ipynb @@ -280,7 +280,7 @@ "* `max_length` argument - max number of words in a segment for alignment (used only if there are no punctuation marks present in the original text. Long non-speech segments are better for segments split and are more likely to co-occur with punctuation marks. Random text split could deteriorate the quality of the alignment.\n", "* out-of-vocabulary words will be removed based on pre-trained ASR model vocabulary, and the text will be changed to lowercase \n", "* sentences for alignment with the original punctuation and capitalization will be stored under `$OUTPUT_DIR/processed/*_with_punct.txt`\n", - "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/r1.19.0/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/r1.19.0/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", + "* numbers will be converted from written to their spoken form with `num2words` package. For English, it's recommended to use NeMo normalization tool use `--use_nemo_normalization` argument (not supported if running this segmentation tutorial in Colab, see the text normalization tutorial: [`https://github.com/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb`](https://colab.research.google.com/github/NVIDIA/NeMo-text-processing/blob/main/tutorials/Text_(Inverse)_Normalization.ipynb) for more details). Even `num2words` normalization is usually enough for proper segmentation. However, it does not take audio into account. NeMo supports audio-based normalization for English, German and Russian languages that can be applied to the segmented data as a post-processing step. Audio-based normalization produces multiple normalization options. For example, `901` could be normalized as `nine zero one` or `nine hundred and one`. The audio-based normalization chooses the best match among the possible normalization options and the transcript based on the character error rate. See [https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py](https://github.com/NVIDIA/NeMo-text-processing/blob/main/nemo_text_processing/text_normalization/normalize_with_audio.py) for more details.\n", "\n", "### Audio preprocessing:\n", "* non '.wav' audio files will be converted to `.wav` format\n", From 4ecc769381d0a35f9249c02ccf26a3d8e72f98ca Mon Sep 17 00:00:00 2001 From: Abhinav Khattar Date: Thu, 1 Jun 2023 09:42:28 -0700 Subject: [PATCH 12/28] add call to p2p overlap (#6779) * add call to p2p overlap Signed-off-by: Abhinav Khattar * update Jenkins for test Signed-off-by: Abhinav Khattar --------- Signed-off-by: Abhinav Khattar --- Jenkinsfile | 9 +++++++++ .../nlp/models/language_modeling/megatron_gpt_model.py | 2 ++ 2 files changed, 11 insertions(+) diff --git a/Jenkinsfile b/Jenkinsfile index 780e3e4b43c4..7d0b8ee28e87 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -57,6 +57,15 @@ pipeline { } } + stage('Megatron Core installation') { + steps { + sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \ + cd Megatron-LM && \ + git checkout e6d7e09845590d0a36bc7f29eb28db974fb8da4e && \ + pip install -e .' + } + } + stage('PyTorch Lightning version') { steps { sh 'python -c "import pytorch_lightning; print(pytorch_lightning.__version__)"' diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 66fa0ed2716e..7b67f1602346 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -371,6 +371,8 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): no_sync_func=no_sync_func, grad_sync_func=grad_sync_func, param_sync_func=param_sync_func, + overlap_p2p_comm=self.cfg.get('overlap_p2p_comm', False), + batch_p2p_comm=self.cfg.get('batch_p2p_comm', True), ) # only the last stages of the pipeline return losses From 1486b1239aa652bb9906f6d19c63a5a532621214 Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Thu, 1 Jun 2023 11:05:55 -0600 Subject: [PATCH 13/28] Fix get_parameters when using main params optimizer (#6764) * fix get param Signed-off-by: ericharper * change name Signed-off-by: ericharper --------- Signed-off-by: ericharper --- .../models/language_modeling/megatron_base_model.py | 12 +++++++----- nemo/core/optim/optimizer_with_main_params.py | 4 ++-- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 1237491fa39c..2aaedbe5a806 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -240,14 +240,16 @@ def _vocab_size_with_padding(self, orig_vocab_size, make_vocab_size_divisible_by ) return after - def _get_parameters(self): + def get_parameters_with_grad(self): """ - private method to load all the trainable parameters from optimizer param groups + Get all parameters with grad from optimizer param groups """ params = [] for param_group in self._optimizer_param_groups: for param in param_group['params']: - if param.requires_grad: # (@adithyare) adapter training with pp>1 can result in params with no grads + if ( + param.grad is not None + ): # (@adithyare) adapter training with pp>1 can result in params with no grads params.append(param) return params @@ -272,9 +274,9 @@ def configure_gradient_clipping(self, *args, **kwargs): else: if self.megatron_amp_o2: # grep fp32 master parameters for gradient clipping - parameters = self._optimizer.get_parameters() + parameters = self._optimizer.get_parameters_with_grad() else: - parameters = self._get_parameters() + parameters = self.get_parameters_with_grad() grad_norm = clip_grad_norm_fp32(parameters=parameters, max_norm=clip_val) self.log('grad_norm', grad_norm, rank_zero_only=True, batch_size=1) diff --git a/nemo/core/optim/optimizer_with_main_params.py b/nemo/core/optim/optimizer_with_main_params.py index c9790ee2a139..44d54a0e63ff 100644 --- a/nemo/core/optim/optimizer_with_main_params.py +++ b/nemo/core/optim/optimizer_with_main_params.py @@ -488,11 +488,11 @@ def async_master_grads_allreudce(self): def fp32_grad_accumulation(self): return self._fp32_grad_accum - def get_parameters(self): + def get_parameters_with_grad(self): params = [] for param_group in self.optimizer.param_groups: for param in param_group['params']: - if param.requires_grad: # (@adithyare) added to enable pp>1 training for adapters + if param.grad is not None: # (@adithyare) added to enable pp>1 training for adapters params.append(param) return params From aff5217f2149bee31355ee85ecdd0db14ce27eea Mon Sep 17 00:00:00 2001 From: wdykas <73254672+wdykas@users.noreply.github.com> Date: Thu, 1 Jun 2023 14:25:27 -0400 Subject: [PATCH 14/28] Lddl bert (#6761) * initial POC for LDDL Bert * Finish LDDL POC * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * address comments * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * fix merge head * resolving merge * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * add support for val/test loaders * change to new LDDL class + add winding * fix logging level * fix winding * test fix * fixes to winding * add file system * add prepemption optimizations * more logging * more prints * better logging * asfsf * add barrier * removing prints * working with mb lddl loader * final changes * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * update requirements file with LDDL Signed-off-by: wdykas * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * revert adding to requirements --------- Signed-off-by: wdykas Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: Eric Harper --- .../conf/megatron_bert_config.yaml | 2 +- .../megatron_bert_pretraining.py | 5 +- .../language_modeling/megatron_bert_model.py | 129 +++++++++++++++++- 3 files changed, 126 insertions(+), 10 deletions(-) diff --git a/examples/nlp/language_modeling/conf/megatron_bert_config.yaml b/examples/nlp/language_modeling/conf/megatron_bert_config.yaml index cbc0562e2904..a7e3364d41b4 100644 --- a/examples/nlp/language_modeling/conf/megatron_bert_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_bert_config.yaml @@ -133,7 +133,7 @@ model: seq_length: ${model.encoder_seq_length} skip_warmup: True num_workers: 0 - dataloader_type: single # cyclic + dataloader_type: single # cyclic, LDDL reset_position_ids: False # Reset position ids after end-of-document token reset_attention_mask: False # Reset attention mask after end-of-document token eod_mask_loss: False # Mask loss for the end of document tokens diff --git a/examples/nlp/language_modeling/megatron_bert_pretraining.py b/examples/nlp/language_modeling/megatron_bert_pretraining.py index e6abee295a1a..5f0b74db92b6 100644 --- a/examples/nlp/language_modeling/megatron_bert_pretraining.py +++ b/examples/nlp/language_modeling/megatron_bert_pretraining.py @@ -29,11 +29,12 @@ from nemo.utils import logging from nemo.utils.exp_manager import exp_manager -mp.set_start_method("spawn", force=True) - @hydra_runner(config_path="conf", config_name="megatron_bert_config") def main(cfg) -> None: + if cfg.model.data.dataloader_type != "LDDL": + mp.set_start_method("spawn", force=True) + logging.info("\n\n************** Experiment configuration ***********") logging.info(f'\n{OmegaConf.to_yaml(cfg)}') diff --git a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py index 64430a669269..cac1a50e98ae 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_bert_model.py @@ -40,6 +40,7 @@ from nemo.core.neural_types import ChannelType, MaskType, NeuralType from nemo.utils import AppState, logging + try: from apex.transformer.pipeline_parallel.utils import get_num_microbatches @@ -49,6 +50,14 @@ HAVE_APEX = False +try: + import logging + from lddl.torch_mp import get_bert_pretrain_data_loader + + HAVE_LDDL = True +except (ImportError, ModuleNotFoundError): + HAVE_LDDL = False + try: from megatron.core import parallel_state from megatron.core.pipeline_parallel.schedules import get_forward_backward_func @@ -300,7 +309,12 @@ def training_step(self, dataloader_iter, batch_idx): for param in module.embedding.parameters(): param.data_ptr() - tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + if self.cfg.data.dataloader_type == "LDDL": + # this is of type bert dataset + seq_length = dataloader_iter.iterator.loaders.get_seqlen() + tensor_shape = [seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + else: + tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] # run forward and backwards passes for an entire global batch # we do this inside training_step to support pipeline parallelism @@ -324,7 +338,10 @@ def training_step(self, dataloader_iter, batch_idx): loss_tensor = torch.vstack(loss_tensors_list) loss_mean = loss_tensor.mean(axis=0) else: - loss_mean = torch.tensor([0.0, 0.0]).cuda() + if self.cfg.bert_binary_head == True: + loss_mean = torch.tensor([0.0, 0.0, 0.0]).cuda() + else: + loss_mean = torch.tensor([0.0, 0.0]).cuda() # when using sequence parallelism, the sequence parallel layernorm grads must be all-reduced if self.cfg.get('tensor_model_parallel_size', 1) > 1 and self.cfg.get('sequence_parallel', False): @@ -404,7 +421,12 @@ def allreduce_first_last_embeddings(self): torch.distributed.all_reduce(grad, group=parallel_state.get_embedding_group()) def validation_step(self, dataloader_iter, batch_idx): - tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + + if self.cfg.data.dataloader_type == "LDDL": + seq_length = dataloader_iter.iterator.get_seqlen() + tensor_shape = [seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] + else: + tensor_shape = [self.cfg.encoder_seq_length, self.cfg.micro_batch_size, self.cfg.hidden_size] fwd_bwd_function = get_forward_backward_func() @@ -476,6 +498,95 @@ def loss_func(self, loss_mask, sentence_order, output_tensor): # [lm_loss]) # return loss, {'lm loss': averaged_losses[0]} + def build_LDDL_data(self, cfg): + if not HAVE_LDDL: + raise ImportError( + "LDDL was not found. Please see the LDDL README for installation instructions: https://github.com/NVIDIA/LDDL#installation." + ) + logging.info(f'Starting building LDDL Dataloaders') + self._train_ds = None + self._validation_ds = None + self._test_ds = None + data_parallel_size = parallel_state.get_data_parallel_world_size() + num_micro_batches = self.cfg.global_batch_size // (self.cfg.micro_batch_size * data_parallel_size) + global_batch_size_on_this_data_parallel_rank = num_micro_batches * self.cfg.micro_batch_size + samples_consumed_dploader = self.compute_consumed_samples(0) // data_parallel_size + # We run under the assumption that the datapath is the prefix if LDDL dataloader + train_lddl_data_path = self.cfg.data.data_prefix[0] + self._train_dl = get_bert_pretrain_data_loader( + train_lddl_data_path, + dp_rank=parallel_state.get_data_parallel_rank(), + local_rank=self.local_rank, + shuffle_buffer_size=16384, + shuffle_buffer_warmup_factor=16, + vocab_file=self.cfg.tokenizer.vocab_file, + data_loader_kwargs={ + 'batch_size': global_batch_size_on_this_data_parallel_rank, + 'num_workers': self.cfg.data.num_workers, + 'prefetch_factor': 2, + }, + mlm_probability=0.15, + base_seed=self.cfg.seed, + log_level=logging.CRITICAL, + log_dir="/tmp/log", + return_raw_samples=False, + start_epoch=0, + sequence_length_alignment=8, + ignore_index=-1, + samples_seen=samples_consumed_dploader, + micro_batch_size=self.cfg.micro_batch_size, + ) + logging.info(f'Completed build train LDDL Dataloader') + if len(self.cfg.data.data_prefix) > 1: + val_lddl_data_path = self.cfg.data.data_prefix[1] + self._validation_dl = get_bert_pretrain_data_loader( + val_lddl_data_path, + dp_rank=parallel_state.get_data_parallel_rank(), + local_rank=self.local_rank, + shuffle_buffer_size=16384, + shuffle_buffer_warmup_factor=16, + vocab_file=self.cfg.tokenizer.vocab_file, + data_loader_kwargs={ + 'batch_size': global_batch_size_on_this_data_parallel_rank, + 'num_workers': self.cfg.data.num_workers, + 'prefetch_factor': 2, + }, + mlm_probability=0.15, + base_seed=self.cfg.seed, + log_level=logging.CRITICAL, + log_dir="/tmp/log", + return_raw_samples=False, + start_epoch=0, + sequence_length_alignment=8, + ignore_index=-1, + micro_batch_size=self.cfg.micro_batch_size, + ) + if len(self.cfg.data.data_prefix) > 2: + test_lddl_data_path = self.cfg.data.data_prefix[2] + self._test_dl = get_bert_pretrain_data_loader( + test_lddl_data_path, + dp_rank=parallel_state.get_data_parallel_rank(), + local_rank=self.local_rank, + shuffle_buffer_size=16384, + shuffle_buffer_warmup_factor=16, + vocab_file=self.cfg.tokenizer.vocab_file, + data_loader_kwargs={ + 'batch_size': global_batch_size_on_this_data_parallel_rank, + 'num_workers': self.cfg.data.num_workers, + 'prefetch_factor': 2, + }, + mlm_probability=0.15, + base_seed=self.cfg.seed, + log_level=logging.CRITICAL, + log_dir="/tmp/log", + return_raw_samples=False, + start_epoch=0, + sequence_length_alignment=8, + ignore_index=-1, + micro_batch_size=self.cfg.micro_batch_size, + ) + logging.info(f'Finished building LDDL Dataloaders') + def build_train_valid_test_datasets(self): logging.info('Building Bert datasets.') if self.trainer.limit_val_batches > 1.0 and isinstance(self.trainer.limit_val_batches, float): @@ -581,10 +692,14 @@ def setup(self, stage=None): else: # TODO: consider adding a ModelPT guard to check if model is being restored. # allowing restored models to optionally setup datasets - self.build_train_valid_test_datasets() - self.setup_training_data(self.cfg.data) - self.setup_validation_data(self.cfg.data) - self.setup_test_data(self.cfg.data) + if self.cfg.data.dataloader_type == "LDDL": + self.build_LDDL_data(self.cfg.data) + torch.distributed.barrier() + else: + self.build_train_valid_test_datasets() + self.setup_training_data(self.cfg.data) + self.setup_validation_data(self.cfg.data) + self.setup_test_data(self.cfg.data) # when using pipeline model parallel the final stage need to initialize word embeddings if parallel_state.get_pipeline_model_parallel_world_size() > 1: From 4bbb3c663b07045a88b9e095e890ce2e461efd6b Mon Sep 17 00:00:00 2001 From: Tim Moon <4406448+timmoon10@users.noreply.github.com> Date: Thu, 1 Jun 2023 12:07:10 -0700 Subject: [PATCH 15/28] Debug Transformer Engine FP8 support with Megatron-core infrastructure (#6740) * Construct FP8 amax reduction group Signed-off-by: Tim Moon * update core for CI Signed-off-by: Abhinav Khattar --------- Signed-off-by: Tim Moon Signed-off-by: Abhinav Khattar Co-authored-by: Abhinav Khattar --- nemo/collections/nlp/parts/nlp_overrides.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index a43e06669489..e7d74fb61fd0 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -168,6 +168,7 @@ def init_model_parallel(self, global_rank: int, world_size: int) -> None: pipeline_model_parallel_size=app_state.pipeline_model_parallel_size, virtual_pipeline_model_parallel_size=app_state.virtual_pipeline_model_parallel_size, pipeline_model_parallel_split_rank=app_state.pipeline_model_parallel_split_rank, + use_fp8=app_state.use_fp8, ) # assert that fake tp and pp rank match after model parallel init @@ -405,7 +406,7 @@ class PEFTSaveRestoreConnector(NLPSaveRestoreConnector): Args: peft_model_nemo_path: Used to provide the .nemo file corresponding to a PEFT model (which will only contain a small set of params) peft_model_ckpt_path: Used to provide the path to .ckpt files of a PEFt model. This is required when no .nemo is available (yet) such as during resumed training. - If both are provided the peft_model_ckpt_path takes precedence. + If both are provided the peft_model_ckpt_path takes precedence. If neither are provided, PEFT params are initialized at random (not loaded from any external source). """ From e4460d1a8e728251aae87049ddeaf9af328cbc9c Mon Sep 17 00:00:00 2001 From: Sangkug Lym Date: Thu, 1 Jun 2023 13:29:09 -0700 Subject: [PATCH 16/28] Tensor-parallel communication overlap with userbuffer backend (#6780) * add interfaces for tp_communication overlap [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci Interface to provide custom userbuffer communicator settings by yaml file [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci * Construct MPI process group for userbuffers support Signed-off-by: Tim Moon --------- Signed-off-by: Tim Moon Co-authored-by: Tim Moon Co-authored-by: Abhinav Khattar --- .../conf/megatron_gpt_config.yaml | 7 +++++ .../language_modeling/megatron/gpt_model.py | 2 ++ .../language_modeling/megatron_base_model.py | 9 ++++++ .../language_modeling/megatron_gpt_model.py | 28 +++++++++++++++++++ .../modules/common/megatron/language_model.py | 4 +++ .../modules/common/megatron/megatron_init.py | 2 ++ .../modules/common/megatron/transformer.py | 4 +++ nemo/collections/nlp/parts/nlp_overrides.py | 4 +++ nemo/utils/app_state.py | 17 +++++++++++ 9 files changed, 77 insertions(+) diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml index d502f255bd8e..2135f1f0ce4e 100755 --- a/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml +++ b/examples/nlp/language_modeling/conf/megatron_gpt_config.yaml @@ -166,6 +166,13 @@ model: fp8_amax_compute_algo: most_recent # 'most_recent' or 'max'. Algorithm for computing amax from history reduce_amax: True # Perform reduction to sync amax tensors across GPUs after every iteration use_emha: False # Use fused multi-head attention for large sequence-length. Note this is not yet supported. Please set to False. + ub_tp_comm_overlap: False + # Use userbuffer backend to overlap tensor-parallel communications with computes. + # This feature is only available with Transformer Engine and squence parallelism enabled and, currently, supports only GPT models. + ub_tp_comm_overlap_cfg: null + # A yaml file with userbuffer communicator configurations. This file should provide `method`, `dtype`, `num_sm`, `num_splits`, + # `cga_size`, `num_splits`, `set_sm_margin`, and `aggregate` for the communicators to use custom settings. + # If the configuration file is not provided a default setting is used for all communicators. data: # Path to data must be specified by the user. diff --git a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py index e890e6ae4807..6dc387466135 100755 --- a/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron/gpt_model.py @@ -163,6 +163,7 @@ def __init__( fp8_amax_compute_algo='most_recent', reduce_amax=True, use_emha=False, + ub_tp_comm_overlap=False, ): super(GPTModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights) @@ -243,6 +244,7 @@ def __init__( fp8_amax_compute_algo=fp8_amax_compute_algo, reduce_amax=reduce_amax, use_emha=use_emha, + ub_tp_comm_overlap=ub_tp_comm_overlap, ) if self.share_embeddings_and_output_weights: diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 2aaedbe5a806..563988323203 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -123,6 +123,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): global_batch_size=cfg.get('global_batch_size'), rampup_batch_size=cfg.get('rampup_batch_size'), use_fp8=cfg.get('fp8', False), + init_mpi_proc_group=cfg.get('ub_tp_comm_overlap', False), seed=self.cfg.get('seed', 1234), apex_transformer_log_level=self.cfg.get('apex_transformer_log_level', 30), ) @@ -540,6 +541,14 @@ def _validate_and_override_config(self): 'Make sure the number of model chunks is the same across all pipeline stages.' ) + if self.cfg.get('ub_tp_comm_overlap', False): + if not self.cfg.get('transformer_engine', False) or not self.cfg.get('sequence_parallel', False): + logging.info( + "Userbuffer tensor-parallel communication overlap is available with both Transformer Engine and sequence-parallelism." + ) + with open_dict(self.cfg): + self.cfg.ub_tp_comm_overlap = False + def is_data_parallel_rank_zero(self): if is_global_rank_zero(): return True diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 7b67f1602346..3f5dd8110774 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -81,6 +81,7 @@ try: import transformer_engine + from transformer_engine.pytorch import module as te_module HAVE_TE = True @@ -179,6 +180,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer): self._nsys_profile_end_step *= grad_accum_steps self.get_attention_mask_from_fusion = self.cfg.get('get_attention_mask_from_fusion', True) + self.initialize_ub = self.cfg.get('ub_tp_comm_overlap', False) def get_gpt_module_list(self): if isinstance(self.model, list): @@ -254,6 +256,7 @@ def model_provider_func(self, pre_process, post_process): fp8_amax_compute_algo=self.cfg.get('fp8_amax_compute_algo', 'most_recent'), reduce_amax=self.cfg.get('reduce_amax', True), use_emha=self.cfg.get('use_emha', False), + ub_tp_comm_overlap=self.cfg.get('ub_tp_comm_overlap', False), ) return model @@ -410,6 +413,31 @@ def training_step(self, dataloader_iter, batch_idx): The input batch to each micro-batch is fetched using the dataloader function in the micro-batch fwd function. """ + # Initialize userbuffer communicators. Initialization is done only once at the + # beginning of the first training step. + if self.initialize_ub: + input_shape = [ + self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), + self.cfg.get('hidden_size'), + ] + ub_cfg_file_name = self.cfg.get('ub_tp_comm_overlap_cfg', None) + if ub_cfg_file_name is not None: + try: + import yaml + + with open(ub_cfg_file_name, 'r') as ub_cfg_file: + ub_cfgs = yaml.safe_load(ub_cfg_file) + except (ImportError, TypeError): + print("Fail to read ub_tp_comm_overlap config file.") + else: + ub_cfgs = None + te_module.initialize_ub( + shape=input_shape, + tp_size=self.cfg.get('tensor_model_parallel_size'), + use_fp8=self.cfg.get('fp8'), + ub_cfgs=ub_cfgs, + ) + self.initialize_ub = False # we zero grads here because we also call backward in the megatron-core fwd/bwd functions self._optimizer.zero_grad() diff --git a/nemo/collections/nlp/modules/common/megatron/language_model.py b/nemo/collections/nlp/modules/common/megatron/language_model.py index b8b12cf0caec..92a1b004b0e8 100755 --- a/nemo/collections/nlp/modules/common/megatron/language_model.py +++ b/nemo/collections/nlp/modules/common/megatron/language_model.py @@ -116,6 +116,7 @@ def get_language_model( fp8_amax_compute_algo='most_recent', reduce_amax=True, use_emha=False, + ub_tp_comm_overlap=False, ): """Build language model and return along with the key to save.""" @@ -191,6 +192,7 @@ def get_language_model( fp8_amax_compute_algo=fp8_amax_compute_algo, reduce_amax=reduce_amax, use_emha=use_emha, + ub_tp_comm_overlap=ub_tp_comm_overlap, ) # key used for checkpoints. language_model_key = 'language_model' @@ -497,6 +499,7 @@ def __init__( fp8_amax_compute_algo='most_recent', reduce_amax=True, use_emha=False, + ub_tp_comm_overlap=False, ): super(TransformerLanguageModel, self).__init__(share_token_embeddings=share_embeddings_and_output_weights) @@ -602,6 +605,7 @@ def __init__( fp8_amax_compute_algo=fp8_amax_compute_algo, reduce_amax=reduce_amax, use_emha=use_emha, + ub_tp_comm_overlap=ub_tp_comm_overlap, ) self._encoder_key = 'encoder' diff --git a/nemo/collections/nlp/modules/common/megatron/megatron_init.py b/nemo/collections/nlp/modules/common/megatron/megatron_init.py index e0551fad5d16..7431bffad26c 100644 --- a/nemo/collections/nlp/modules/common/megatron/megatron_init.py +++ b/nemo/collections/nlp/modules/common/megatron/megatron_init.py @@ -67,6 +67,7 @@ def initialize_model_parallel_for_nemo( global_batch_size=None, rampup_batch_size=None, use_fp8=False, + init_mpi_proc_group=False, seed=1234, apex_transformer_log_level=30, ): @@ -83,6 +84,7 @@ def initialize_model_parallel_for_nemo( app_state.pipeline_model_parallel_size = pipeline_model_parallel_size app_state.virtual_pipeline_model_parallel_size = virtual_pipeline_model_parallel_size app_state.use_fp8 = use_fp8 + app_state.init_mpi_proc_group = init_mpi_proc_group ( app_state.tensor_model_parallel_rank, app_state.pipeline_model_parallel_rank, diff --git a/nemo/collections/nlp/modules/common/megatron/transformer.py b/nemo/collections/nlp/modules/common/megatron/transformer.py index 9a09a9f9aa0b..c57f3286ec3f 100644 --- a/nemo/collections/nlp/modules/common/megatron/transformer.py +++ b/nemo/collections/nlp/modules/common/megatron/transformer.py @@ -792,6 +792,7 @@ def __init__( layer_type: str = "encoder", drop_path_rate: float = 0, use_emha: bool = False, + ub_tp_comm_overlap: bool = False, autocast_dtype: Any = 16, zero_centered_gamma: bool = False, ) -> None: @@ -824,6 +825,7 @@ def __init__( set_parallel_mode=tp_size > 1, fuse_qkv_params=True, zero_centered_gamma=zero_centered_gamma, + ub_tp_comm_overlap=ub_tp_comm_overlap, ) # use_emha=use_emha, @@ -919,6 +921,7 @@ def __init__( fp8_amax_compute_algo='most_recent', reduce_amax=True, use_emha=False, + ub_tp_comm_overlap=False, normalize_attention_scores=True, multi_query_attention=False, num_moe_experts=1, @@ -1058,6 +1061,7 @@ def build_layer(layer_number): apply_residual_connection_post_layernorm=False, autocast_dtype=precision, use_emha=use_emha, + ub_tp_comm_overlap=ub_tp_comm_overlap, zero_centered_gamma=normalization == 'layernorm1p', ) else: diff --git a/nemo/collections/nlp/parts/nlp_overrides.py b/nemo/collections/nlp/parts/nlp_overrides.py index e7d74fb61fd0..199a46be6c39 100644 --- a/nemo/collections/nlp/parts/nlp_overrides.py +++ b/nemo/collections/nlp/parts/nlp_overrides.py @@ -181,6 +181,10 @@ def init_model_parallel(self, global_rank: int, world_size: int) -> None: app_state.data_parallel_size = parallel_state.get_data_parallel_world_size() app_state.pipeline_model_parallel_group = parallel_state.get_pipeline_model_parallel_group() + # create MPI process group for UCX-based communication APIs + if app_state.init_mpi_proc_group: + torch.distributed.new_group(backend='mpi') + def save_checkpoint( self, checkpoint: Dict[str, Any], filepath: Union[str, Path], storage_options: Optional[Any] = None ) -> None: diff --git a/nemo/utils/app_state.py b/nemo/utils/app_state.py index c3ead0bff48f..d06e1ac32e36 100644 --- a/nemo/utils/app_state.py +++ b/nemo/utils/app_state.py @@ -55,6 +55,7 @@ def __init__(self): self._data_parallel_group = None self._megatron_checkpoint_version = None self._use_fp8 = False + self._init_mpi_proc_gruop = False self._random_seed = None @@ -363,6 +364,22 @@ def use_fp8(self, use_fp8): """ self._use_fp8 = use_fp8 + @property + def init_mpi_proc_group(self): + """ Property sets the initialization of mpi process group. + Returns: + Initialize mpi process group. + """ + return self._init_mpi_proc_group + + @init_mpi_proc_group.setter + def init_mpi_proc_group(self, init_mpi_proc_group): + """ Property sets the initialization of mpi process group. + Args: + init_mpi_proc_group: Initialize mpi process group. + """ + self._init_mpi_proc_group = init_mpi_proc_group + @property def random_seed(self): """ Property returns the random seed. From 9bd8ecd15e6b79ba85329a5f314b5de66444592e Mon Sep 17 00:00:00 2001 From: Cheng-Ping Hsieh <37269846+hsiehjackson@users.noreply.github.com> Date: Thu, 1 Jun 2023 20:55:06 -0700 Subject: [PATCH 17/28] Fix adapter tutorial r1.19.0 (#6776) * Fix TTS adapter tutorial Signed-off-by: hsiehjackson * Fix version Signed-off-by: hsiehjackson --------- Signed-off-by: hsiehjackson --- nemo/collections/tts/modules/submodules.py | 16 +- .../tts/FastPitch_Adapter_Finetuning.ipynb | 178 +++++++----------- .../FastPitch_MultiSpeaker_Pretraining.ipynb | 8 +- 3 files changed, 78 insertions(+), 124 deletions(-) diff --git a/nemo/collections/tts/modules/submodules.py b/nemo/collections/tts/modules/submodules.py index 6efccf18eeea..408ab02dead2 100644 --- a/nemo/collections/tts/modules/submodules.py +++ b/nemo/collections/tts/modules/submodules.py @@ -758,15 +758,11 @@ def forward(self, batch_size=None, speaker=None, reference_spec=None, reference_ embs = self.lookup_module(speaker) # Get GST based speaker embedding - if self.gst_module is not None: - if reference_spec is None or reference_spec_lens is None: - raise ValueError( - "You should add `reference_audio` in sup_data_types or remove `speaker_encoder`in config." - ) - out = self.gst_module(reference_spec, reference_spec_lens) - embs = out if embs is None else embs + out - - elif self.gst_module is None and reference_spec is not None and reference_spec_lens is not None: - logging.warning("You may add `gst_module` in speaker_encoder to use reference_audio.") + if reference_spec is not None and reference_spec_lens is not None: + if self.gst_module is not None: + out = self.gst_module(reference_spec, reference_spec_lens) + embs = out if embs is None else embs + out + else: + logging.warning("You may add `gst_module` in speaker_encoder to use reference_audio.") return embs diff --git a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb index 95bc3805030c..67e274ff364f 100644 --- a/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb +++ b/tutorials/tts/FastPitch_Adapter_Finetuning.ipynb @@ -2,7 +2,7 @@ "cells": [ { "cell_type": "markdown", - "id": "ea49c0e5", + "id": "ed07e3c2", "metadata": {}, "source": [ "# FastPitch Adapter Finetuning\n", @@ -16,14 +16,14 @@ "2. **Fine-tune HiFiGAN on adaptation data**: fine-tune a vocoder for the fine-tuned multi-speaker FastPitch\n", "* Dataset Preparation: extract mel-spectrograms from fine-tuned FastPitch.\n", "* Training: fine-tune HiFiGAN with fine-tuned adaptation data.\n", - "3. **Inference**: generate speech from adpated FastPitch\n", + "3. **Inference**: generate speech from adapted FastPitch\n", "* Load Model: load pre-trained multi-speaker FastPitch with **fine-tuned adapters**.\n", "* Output Audio: generate audio files." ] }, { "cell_type": "markdown", - "id": "37259555", + "id": "772e7404", "metadata": {}, "source": [ "# License\n", @@ -46,7 +46,7 @@ { "cell_type": "code", "execution_count": null, - "id": "d61cbea5", + "id": "8f799aa0", "metadata": {}, "outputs": [], "source": [ @@ -73,7 +73,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fef9aba9", + "id": "0a4d3371", "metadata": {}, "outputs": [], "source": [ @@ -83,7 +83,7 @@ { "cell_type": "code", "execution_count": null, - "id": "49bc38ab", + "id": "25d94e3a", "metadata": {}, "outputs": [], "source": [ @@ -95,7 +95,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9459f9dc", + "id": "79cb9932", "metadata": {}, "outputs": [], "source": [ @@ -113,7 +113,7 @@ { "cell_type": "code", "execution_count": null, - "id": "eb26f54d", + "id": "ec7fed4e", "metadata": {}, "outputs": [], "source": [ @@ -131,7 +131,7 @@ { "cell_type": "code", "execution_count": null, - "id": "12b28329", + "id": "f815deff", "metadata": {}, "outputs": [], "source": [ @@ -149,7 +149,7 @@ }, { "cell_type": "markdown", - "id": "30996769", + "id": "539e8f0d", "metadata": {}, "source": [ "# 1. Fine-tune FastPitch on adaptation data" @@ -157,17 +157,17 @@ }, { "cell_type": "markdown", - "id": "2f5f5945", + "id": "270ed53f", "metadata": {}, "source": [ "## a. Data Preparation\n", - "For our tutorial, we use small part of VCTK dataset with a new target speaker (p267). Usually, the audios should have total duration more than 15 mintues." + "For our tutorial, we use small part of VCTK dataset with a new target speaker (p267). Usually, the audios should have total duration more than 15 minutes." ] }, { "cell_type": "code", "execution_count": null, - "id": "8047f988", + "id": "21ce4a34", "metadata": {}, "outputs": [], "source": [ @@ -177,7 +177,7 @@ { "cell_type": "code", "execution_count": null, - "id": "b8242769", + "id": "2d5edbe5", "metadata": {}, "outputs": [], "source": [ @@ -188,7 +188,7 @@ { "cell_type": "code", "execution_count": null, - "id": "79cf8539", + "id": "c1de2249", "metadata": {}, "outputs": [], "source": [ @@ -198,7 +198,7 @@ }, { "cell_type": "markdown", - "id": "35c3b97b", + "id": "e657c830", "metadata": {}, "source": [ "## b. Preprocessing" @@ -206,17 +206,17 @@ }, { "cell_type": "markdown", - "id": "ba3a7c3a", + "id": "4d0076d4", "metadata": {}, "source": [ "### Add absolute file path in manifest\n", - "We use absoluate path for audio_filepath to get the audio during training." + "We use absolute path for audio_filepath to get the audio during training." ] }, { "cell_type": "code", "execution_count": null, - "id": "8bc485b5", + "id": "7ccb5fb6", "metadata": {}, "outputs": [], "source": [ @@ -226,7 +226,7 @@ { "cell_type": "code", "execution_count": null, - "id": "f9cb8ef5", + "id": "23dc1ba6", "metadata": {}, "outputs": [], "source": [ @@ -241,7 +241,7 @@ }, { "cell_type": "markdown", - "id": "f92054d5", + "id": "b852072b", "metadata": {}, "source": [ "### Extract Supplementary Data\n", @@ -252,7 +252,7 @@ { "cell_type": "code", "execution_count": null, - "id": "0adc618b", + "id": "f6bdd226", "metadata": {}, "outputs": [], "source": [ @@ -267,7 +267,7 @@ }, { "cell_type": "markdown", - "id": "96dd5fe1", + "id": "fdae4e4e", "metadata": {}, "source": [ "After running the above command line, you will observe a new folder NeMoTTS_sup_data/pitch and printouts of pitch statistics like below. Specify these values to the FastPitch training configurations. We will be there in the following section.\n", @@ -280,7 +280,7 @@ { "cell_type": "code", "execution_count": null, - "id": "23703c76", + "id": "ac8fae15", "metadata": {}, "outputs": [], "source": [ @@ -295,7 +295,7 @@ }, { "cell_type": "markdown", - "id": "7c70e5db", + "id": "c9f98c86", "metadata": {}, "source": [ "## c. Model Setting\n", @@ -305,7 +305,7 @@ { "cell_type": "code", "execution_count": null, - "id": "439f2f82", + "id": "fd8c66fb", "metadata": {}, "outputs": [], "source": [ @@ -318,7 +318,7 @@ { "cell_type": "code", "execution_count": null, - "id": "30f865cb", + "id": "ff535c8f", "metadata": {}, "outputs": [], "source": [ @@ -350,7 +350,7 @@ { "cell_type": "code", "execution_count": null, - "id": "e92910b5", + "id": "4f457111", "metadata": {}, "outputs": [], "source": [ @@ -360,7 +360,7 @@ }, { "cell_type": "markdown", - "id": "7f03219f", + "id": "ef40def3", "metadata": {}, "source": [ "### Precompute Speaker Embedding\n", @@ -370,7 +370,7 @@ { "cell_type": "code", "execution_count": null, - "id": "c2a35241", + "id": "30664bcb", "metadata": {}, "outputs": [], "source": [ @@ -405,7 +405,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5fa1b309", + "id": "43001c75", "metadata": {}, "outputs": [], "source": [ @@ -417,7 +417,7 @@ }, { "cell_type": "markdown", - "id": "3b77e95f", + "id": "42915e02", "metadata": {}, "source": [ "## d. Training" @@ -426,21 +426,21 @@ { "cell_type": "code", "execution_count": null, - "id": "9e8c3740", + "id": "884bc2d0", "metadata": {}, "outputs": [], "source": [ "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"cmudict-0.7b_nv22.10\"))\n", "heteronyms_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"heteronyms-052722\"))\n", "\n", - "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to overide pitch_mean and pitch_std configs below.\n", + "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to override pitch_mean and pitch_std configs below.\n", "PITCH_MEAN=175.48513793945312\n", "PITCH_STD=42.3786735534668" ] }, { "cell_type": "markdown", - "id": "19bb6d8b", + "id": "6f04fc86", "metadata": {}, "source": [ "### Important notes\n", @@ -451,13 +451,16 @@ "* Other optional arguments based on your preference:\n", " * batch_size\n", " * exp_manager\n", - " * trainer" + " * trainer\n", + " * model.unfreeze_aligner=true\n", + " * model.unfreeze_duration_predictor=true\n", + " * model.unfreeze_pitch_predictor=true" ] }, { "cell_type": "code", "execution_count": null, - "id": "8c8cbea2", + "id": "7ae8383a", "metadata": {}, "outputs": [], "source": [ @@ -476,9 +479,11 @@ "~model.speaker_encoder.gst_module \\\n", "model.train_ds.dataloader_params.batch_size=8 \\\n", "model.validation_ds.dataloader_params.batch_size=8 \\\n", + "+model.text_tokenizer.add_blank_at=True \\\n", "model.optim.name=adam \\\n", - "model.optim.lr=2e-4 \\\n", - "~model.optim.sched \\\n", + "model.optim.lr=1e-3 \\\n", + "model.optim.sched.warmup_steps=0 \\\n", + "+model.optim.sched.min_lr=1e-4 \\\n", "exp_manager.exp_dir={logs_dir} \\\n", "+exp_manager.create_wandb_logger=True \\\n", "+exp_manager.wandb_logger_kwargs.name=\"tutorial-FastPitch-finetune-adaptation\" \\\n", @@ -495,7 +500,7 @@ { "cell_type": "code", "execution_count": null, - "id": "fe5c7b2f", + "id": "39d3074c", "metadata": {}, "outputs": [], "source": [ @@ -510,7 +515,7 @@ }, { "cell_type": "markdown", - "id": "75856d0e", + "id": "9e9a1f45", "metadata": {}, "source": [ "# 3. Fine-tune HiFiGAN on adaptation data" @@ -518,7 +523,7 @@ }, { "cell_type": "markdown", - "id": "3444698f", + "id": "deec135f", "metadata": {}, "source": [ "## a. Dataset Preparation\n", @@ -528,7 +533,7 @@ { "cell_type": "code", "execution_count": null, - "id": "bb2fd64d", + "id": "1aecaa68", "metadata": {}, "outputs": [], "source": [ @@ -554,7 +559,7 @@ { "cell_type": "code", "execution_count": null, - "id": "da69cb66", + "id": "6a153ea0", "metadata": {}, "outputs": [], "source": [ @@ -564,7 +569,7 @@ }, { "cell_type": "markdown", - "id": "fa2cbb02", + "id": "b05cd550", "metadata": {}, "source": [ "## b. Training" @@ -573,7 +578,7 @@ { "cell_type": "code", "execution_count": null, - "id": "ffdce5d5", + "id": "e5d5f281", "metadata": {}, "outputs": [], "source": [ @@ -601,7 +606,7 @@ { "cell_type": "code", "execution_count": null, - "id": "9e6376cf", + "id": "9c1c42f3", "metadata": {}, "outputs": [], "source": [ @@ -613,7 +618,7 @@ }, { "cell_type": "markdown", - "id": "e5076e51", + "id": "0665ac78", "metadata": {}, "source": [ "# 4. Inference" @@ -622,7 +627,7 @@ { "cell_type": "code", "execution_count": null, - "id": "52358549", + "id": "5f4afb24", "metadata": {}, "outputs": [], "source": [ @@ -633,7 +638,7 @@ }, { "cell_type": "markdown", - "id": "9e96ee13", + "id": "0d9ff309", "metadata": {}, "source": [ "## a. Load Model" @@ -642,17 +647,7 @@ { "cell_type": "code", "execution_count": null, - "id": "2cb5d524", - "metadata": {}, - "outputs": [], - "source": [ - "wave_model = WaveformFeaturizer(sample_rate=sample_rate)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "32dbd30c", + "id": "81e4dee0", "metadata": {}, "outputs": [], "source": [ @@ -668,7 +663,7 @@ { "cell_type": "code", "execution_count": null, - "id": "74a7ad03", + "id": "1eaef8be", "metadata": {}, "outputs": [], "source": [ @@ -678,7 +673,7 @@ }, { "cell_type": "markdown", - "id": "4f882975", + "id": "837bdbab", "metadata": {}, "source": [ "## b. Output Audio" @@ -687,26 +682,14 @@ { "cell_type": "code", "execution_count": null, - "id": "2178a8ef", + "id": "fef139cb", "metadata": {}, "outputs": [], "source": [ - "def gt_spectrogram(audio_path, wave_model, spec_gen_model):\n", - " features = wave_model.process(audio_path, trim=False)\n", - " audio, audio_length = features, torch.tensor(features.shape[0]).long()\n", - " audio = audio.unsqueeze(0).to(device=spec_gen_model.device)\n", - " audio_length = audio_length.unsqueeze(0).to(device=spec_gen_model.device)\n", - " with torch.no_grad():\n", - " spectrogram, spec_len = spec_gen_model.preprocessor(input_signal=audio, length=audio_length)\n", - " return spectrogram, spec_len\n", - "\n", - "def gen_spectrogram(text, spec_gen_model, reference_spec, reference_spec_lens):\n", + "def gen_spectrogram(text, spec_gen_model):\n", " parsed = spec_gen_model.parse(text)\n", " with torch.no_grad(): \n", - " spectrogram = spec_gen_model.generate_spectrogram(tokens=parsed, \n", - " reference_spec=reference_spec, \n", - " reference_spec_lens=reference_spec_lens)\n", - "\n", + " spectrogram = spec_gen_model.generate_spectrogram(tokens=parsed)\n", " return spectrogram\n", " \n", "def synth_audio(vocoder_model, spectrogram): \n", @@ -720,16 +703,10 @@ { "cell_type": "code", "execution_count": null, - "id": "766154e3", + "id": "b98ac280", "metadata": {}, "outputs": [], "source": [ - "# Reference Audio\n", - "with open(train_manifest, \"r\") as f:\n", - " for i, line in enumerate(f):\n", - " reference_record = json.loads(line)\n", - " break\n", - " \n", "# Validatation Audio\n", "num_val = 3\n", "val_records = []\n", @@ -743,27 +720,19 @@ { "cell_type": "code", "execution_count": null, - "id": "dfa71ca6", + "id": "b17446f9", "metadata": {}, "outputs": [], "source": [ "for i, val_record in enumerate(val_records):\n", - " reference_spec, reference_spec_lens = gt_spectrogram(reference_record['audio_filepath'], wave_model, spec_model)\n", - " reference_spec = reference_spec.to(spec_model.device)\n", - " spec_pred = gen_spectrogram(val_record['text'], spec_model,\n", - " reference_spec=reference_spec, \n", - " reference_spec_lens=reference_spec_lens)\n", - "\n", + " spec_pred = gen_spectrogram(val_record['text'], spec_model)\n", " audio_gen = synth_audio(vocoder_model, spec_pred)\n", - " \n", - " audio_ref = ipd.Audio(reference_record['audio_filepath'], rate=sample_rate)\n", + "\n", " audio_gt = ipd.Audio(val_record['audio_filepath'], rate=sample_rate)\n", " audio_gen = ipd.Audio(audio_gen, rate=sample_rate)\n", " \n", " print(\"------\")\n", " print(f\"Text: {val_record['text']}\")\n", - " print('Reference Audio')\n", - " ipd.display(audio_ref)\n", " print('Ground Truth Audio')\n", " ipd.display(audio_gt)\n", " print('Synthesized Audio')\n", @@ -775,18 +744,7 @@ { "cell_type": "code", "execution_count": null, - "id": "51d9d176", - "metadata": {}, - "outputs": [], - "source": [ - "print(f\"Pretraind FastPitch: {pretrained_fastpitch_checkpoint}\")\n", - "print(f\"Finetuned Adapter: {finetuned_adapter_checkpoint}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "id": "6180a7d2", + "id": "f8f525d1", "metadata": {}, "outputs": [], "source": [ @@ -797,7 +755,7 @@ { "cell_type": "code", "execution_count": null, - "id": "5b33263b", + "id": "66e8ab7d", "metadata": {}, "outputs": [], "source": [] @@ -819,7 +777,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.8.13" } }, "nbformat": 4, diff --git a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb index a67744ef0f58..1292cfcab5f8 100644 --- a/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb +++ b/tutorials/tts/FastPitch_MultiSpeaker_Pretraining.ipynb @@ -195,8 +195,8 @@ "id": "cae8567d", "metadata": {}, "source": [ - "### Add absoluate audio path in manifest\n", - "We use absoluate path for `audio_filepath` to get the audio during training." + "### Add absolute audio path in manifest\n", + "We use absolute path for `audio_filepath` to get the audio during training." ] }, { @@ -337,7 +337,7 @@ "phoneme_dict_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"cmudict-0.7b_nv22.10\"))\n", "heteronyms_path = os.path.abspath(os.path.join(code_dir, \"scripts\", \"tts_dataset_files\", \"heteronyms-052722\"))\n", "\n", - "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to overide pitch_mean and pitch_std configs below.\n", + "# Copy and Paste the PITCH_MEAN and PITCH_STD from previous steps (train_manifest) to override pitch_mean and pitch_std configs below.\n", "PITCH_MEAN=140.84278869628906\n", "PITCH_STD=65.4063949584961" ] @@ -727,7 +727,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.8.10" + "version": "3.8.13" } }, "nbformat": 4, From 913e5e5fabd250d2442a0b9615fb84dbe0fb5598 Mon Sep 17 00:00:00 2001 From: Sandeep Subramanian Date: Fri, 2 Jun 2023 10:20:00 -0700 Subject: [PATCH 18/28] Fix check (#6798) Signed-off-by: MaximumEntropy --- .../nlp/data/language_modeling/megatron/gpt_dataset.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py index cf1de245d0e7..d7113e7cdde3 100644 --- a/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py +++ b/nemo/collections/nlp/data/language_modeling/megatron/gpt_dataset.py @@ -601,7 +601,7 @@ def _build_index_mappings( last_epoch_num_samples = num_samples - num_samples_from_epochs_minus_one assert last_epoch_num_samples >= 0, 'last epoch number of samples should be non-negative.' num_samples_per_epoch = (tokens_per_epoch - add_extra_token) // seq_length - assert last_epoch_num_samples < ( + assert last_epoch_num_samples <= ( num_samples_per_epoch + 1 ), 'last epoch number of samples exceeded max value.' # If we have less than 80% of the samples for the last epoch, From a8aa8f126b651c5a6091561be46b94f80d08cb8b Mon Sep 17 00:00:00 2001 From: Markel Sanz Ausin Date: Fri, 2 Jun 2023 16:01:43 -0700 Subject: [PATCH 19/28] Bug fix for reset_sequence_parallel_args (#6802) Signed-off-by: Markel Sanz Ausin --- .../nlp/models/language_modeling/megatron_gpt_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 3f5dd8110774..7033d57a0da6 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -1206,7 +1206,7 @@ def _reset_sequence_parallelism_args(self): for module in self.get_gpt_module_list(): for mod in module.modules(): if hasattr(mod, "sequence_parallel"): - mod.sequence_parallel = self.last_sequence_parallel + mod.sequence_parallel = False def _restore_sequence_parallelism_args(self): """ Restores the sequence parallelism parameters using the values saved by From 0e0253ea7b73bd09b6e14bca5b933bfe576a86ad Mon Sep 17 00:00:00 2001 From: Sangkug Lym Date: Mon, 5 Jun 2023 08:36:29 -0700 Subject: [PATCH 20/28] Add ub communicator initialization to validation step (#6807) --- .../language_modeling/megatron_gpt_model.py | 54 ++++++++++--------- 1 file changed, 30 insertions(+), 24 deletions(-) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 7033d57a0da6..6518fb796497 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -407,37 +407,39 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): return loss_mean + def initialize_ub_func(self): + input_shape = [ + self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), + self.cfg.get('hidden_size'), + ] + ub_cfg_file_name = self.cfg.get('ub_tp_comm_overlap_cfg', None) + if ub_cfg_file_name is not None: + try: + import yaml + + with open(ub_cfg_file_name, 'r') as ub_cfg_file: + ub_cfgs = yaml.safe_load(ub_cfg_file) + except (ImportError, TypeError): + print("Fail to read ub_tp_comm_overlap config file.") + else: + ub_cfgs = None + te_module.initialize_ub( + shape=input_shape, + tp_size=self.cfg.get('tensor_model_parallel_size'), + use_fp8=self.cfg.get('fp8'), + ub_cfgs=ub_cfgs, + ) + self.initialize_ub = False + def training_step(self, dataloader_iter, batch_idx): """ We pass the dataloader iterator function to the micro-batch scheduler. The input batch to each micro-batch is fetched using the dataloader function in the micro-batch fwd function. """ - # Initialize userbuffer communicators. Initialization is done only once at the - # beginning of the first training step. + # Initialize userbuffer communicators. if self.initialize_ub: - input_shape = [ - self.cfg.get('encoder_seq_length') * self.cfg.get('micro_batch_size'), - self.cfg.get('hidden_size'), - ] - ub_cfg_file_name = self.cfg.get('ub_tp_comm_overlap_cfg', None) - if ub_cfg_file_name is not None: - try: - import yaml - - with open(ub_cfg_file_name, 'r') as ub_cfg_file: - ub_cfgs = yaml.safe_load(ub_cfg_file) - except (ImportError, TypeError): - print("Fail to read ub_tp_comm_overlap config file.") - else: - ub_cfgs = None - te_module.initialize_ub( - shape=input_shape, - tp_size=self.cfg.get('tensor_model_parallel_size'), - use_fp8=self.cfg.get('fp8'), - ub_cfgs=ub_cfgs, - ) - self.initialize_ub = False + self.initialize_ub_func() # we zero grads here because we also call backward in the megatron-core fwd/bwd functions self._optimizer.zero_grad() @@ -762,6 +764,10 @@ def validation_step(self, dataloader_iter, batch_idx): from the dataloader to produce a list of microbatches. The list of microbatches is then piped through the pipeline using megatron-core fwd/bwd functions. """ + # Initialize userbuffer communicators. + if self.initialize_ub: + self.initialize_ub_func() + if isinstance(self.model, list): for model_module in self.model: model_module.eval() From 41bb941ecbc235c321642456d61d0d7c011ef5d4 Mon Sep 17 00:00:00 2001 From: Abhinav Khattar Date: Mon, 5 Jun 2023 22:54:02 -0700 Subject: [PATCH 21/28] update core version (#6817) Signed-off-by: Abhinav Khattar --- Jenkinsfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Jenkinsfile b/Jenkinsfile index 7d0b8ee28e87..e16bb5d66545 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -61,7 +61,7 @@ pipeline { steps { sh 'git clone https://github.com/NVIDIA/Megatron-LM.git && \ cd Megatron-LM && \ - git checkout e6d7e09845590d0a36bc7f29eb28db974fb8da4e && \ + git checkout d2891b4ad3a00e3c4223f89491afd9e1b812f9b5 && \ pip install -e .' } } From 45144f53b2be2e9a31e8aa8647ae40426b91d5c3 Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Tue, 6 Jun 2023 12:00:05 -0600 Subject: [PATCH 22/28] Add trainer.validate example for GPT (#6794) * add trainer.validate example Signed-off-by: ericharper * clean up white space Signed-off-by: ericharper * add mbs and gbs to the config Signed-off-by: ericharper --------- Signed-off-by: ericharper --- .../conf/megatron_gpt_validate_config.yaml | 22 +++ .../megatron_gpt_validate.py | 155 ++++++++++++++++++ .../language_modeling/megatron_gpt_model.py | 23 +-- 3 files changed, 189 insertions(+), 11 deletions(-) create mode 100644 examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml create mode 100644 examples/nlp/language_modeling/megatron_gpt_validate.py diff --git a/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml b/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml new file mode 100644 index 000000000000..39b0c7ed2176 --- /dev/null +++ b/examples/nlp/language_modeling/conf/megatron_gpt_validate_config.yaml @@ -0,0 +1,22 @@ +trainer: + devices: 1 + num_nodes: 1 + accelerator: gpu + logger: False # logger provided by exp_manager + precision: 16 # 16, 32, or bf16 + log_every_n_steps: 1 + limit_val_batches: 10 + limit_test_batches: 50 + max_steps: 100 # needed to setup dataloaders + max_epochs: null + replace_sampler_ddp: False + +tensor_model_parallel_size: ??? # should be set the same as the pretrained model that is being restored from +pipeline_model_parallel_size: ??? # should be set the same as the pretrained model that is being restored from +micro_batch_size: null # limited by GPU memory, defaults to pretrained model config +global_batch_size: null # will use more micro batches to reach global batch size, defaults to pretrained model config +virtual_pipeline_model_parallel_size: null +gpt_model_file: null # GPT nemo file path +checkpoint_dir: null # checkpoint file dir. This is used to load the PTL checkpoint generated during the GPT training +checkpoint_name: null # PTL checkpoint file name, only used for PTL checkpoint loading +hparams_file: null # model configuration file, only used for PTL checkpoint loading diff --git a/examples/nlp/language_modeling/megatron_gpt_validate.py b/examples/nlp/language_modeling/megatron_gpt_validate.py new file mode 100644 index 000000000000..b5a61e627a14 --- /dev/null +++ b/examples/nlp/language_modeling/megatron_gpt_validate.py @@ -0,0 +1,155 @@ +# Copyright (c) 2023, NVIDIA CORPORATION. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import tempfile + +from omegaconf import OmegaConf, open_dict +from pytorch_lightning.trainer.trainer import Trainer + +from nemo.collections.nlp.models.language_modeling.megatron_gpt_model import MegatronGPTModel +from nemo.collections.nlp.modules.common.megatron.megatron_init import fake_initialize_model_parallel +from nemo.collections.nlp.parts.nlp_overrides import ( + MegatronHalfPrecisionPlugin, + NLPDDPStrategy, + NLPSaveRestoreConnector, + PipelineMixedPrecisionPlugin, +) +from nemo.core.config import hydra_runner +from nemo.utils import logging +from nemo.utils.app_state import AppState +from nemo.utils.model_utils import inject_model_parallel_rank + +""" Example script showing how to run validation on a MegatronGPT model. + + Sample usage: + + From nemo model: + + python megatron_gpt_validate.py \ + trainer.devices=4 \ + trainer.num_nodes=1 \ + trainer.limit_val_batches=10 \ + trainer.max_steps=100 \ + tensor_model_parallel_size=1 \ + pipeline_model_parallel_size=4 \ + trainer.precision=bf16 \ + gpt_model_file=/path/to/megatron_gpt_tp_1_pp4.nemo + + from PTL checkpoint: + python megatron_gpt_validate.py \ + trainer.devices=4 \ + trainer.num_nodes=1 \ + trainer.limit_val_batches=10 \ + trainer.max_steps=100 \ + tensor_model_parallel_size=1 \ + pipeline_model_parallel_size=4 \ + virtual_pipeline_model_parallel_size=4 \ + trainer.precision=bf16 \ + checkpoint_dir='/path/to/experiment/checkpoints' \ + checkpoint_name='megatron_gpt--val_loss=7.78-step=100-consumed_samples=6336.0-last.ckpt' \ + hparams_file='/path/to/experiment/hparams.yaml + +""" + + +def modify_pretrained_cfg(pretrained_cfg, trainer, cfg): + with open_dict(pretrained_cfg): + OmegaConf.set_struct(pretrained_cfg, True) + pretrained_cfg.sequence_parallel = False + pretrained_cfg.activations_checkpoint_granularity = None + pretrained_cfg.activations_checkpoint_method = None + pretrained_cfg.precision = trainer.precision + if cfg.micro_batch_size is not None: + pretrained_cfg.micro_batch_size = cfg.micro_batch_size + if cfg.global_batch_size is not None: + pretrained_cfg.global_batch_size = cfg.global_batch_size + if trainer.precision == "16": + pretrained_cfg.megatron_amp_O2 = False + return pretrained_cfg + + +@hydra_runner(config_path="conf", config_name="megatron_gpt_validate_config") +def main(cfg) -> None: + + trainer = Trainer(strategy=NLPDDPStrategy(), **cfg.trainer) + + assert ( + cfg.trainer.devices * cfg.trainer.num_nodes + == cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + ), "devices * num_nodes should equal tensor_model_parallel_size * pipeline_model_parallel_size" + + if cfg.gpt_model_file: + logging.info(f"Restoring model from {cfg.gpt_model_file}") + save_restore_connector = NLPSaveRestoreConnector() + if os.path.isdir(cfg.gpt_model_file): + save_restore_connector.model_extracted_dir = cfg.gpt_model_file + + pretrained_cfg = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + return_config=True, + save_restore_connector=save_restore_connector, + ) + pretrained_cfg = modify_pretrained_cfg(pretrained_cfg, trainer, cfg) + model = MegatronGPTModel.restore_from( + restore_path=cfg.gpt_model_file, + trainer=trainer, + override_config_path=pretrained_cfg, + save_restore_connector=save_restore_connector, + map_location=f'cuda:{trainer.local_rank}', # map_location is needed for converted models + ) + elif cfg.checkpoint_dir: + logging.info( + f"Restoring model from checkpoint_dir: {cfg.checkpoint_dir} with checkpoint name: {cfg.checkpoint_name}" + ) + app_state = AppState() + if cfg.tensor_model_parallel_size > 1 or cfg.pipeline_model_parallel_size > 1: + app_state.model_parallel_size = cfg.tensor_model_parallel_size * cfg.pipeline_model_parallel_size + app_state.tensor_model_parallel_size = cfg.tensor_model_parallel_size + app_state.pipeline_model_parallel_size = cfg.pipeline_model_parallel_size + app_state.virtual_pipeline_model_parallel_size = cfg.virtual_pipeline_model_parallel_size + ( + app_state.tensor_model_parallel_rank, + app_state.pipeline_model_parallel_rank, + app_state.model_parallel_size, + app_state.data_parallel_size, + app_state.pipeline_model_parallel_split_rank, + app_state.virtual_pipeline_model_parallel_rank, + ) = fake_initialize_model_parallel( + world_size=app_state.model_parallel_size, + rank=trainer.global_rank, + tensor_model_parallel_size_=cfg.tensor_model_parallel_size, + pipeline_model_parallel_size_=cfg.pipeline_model_parallel_size, + virtual_pipeline_model_parallel_size_=cfg.virtual_pipeline_model_parallel_size, + ) + checkpoint_path = inject_model_parallel_rank(os.path.join(cfg.checkpoint_dir, cfg.checkpoint_name)) + pretrained_cfg = OmegaConf.load(cfg.hparams_file) + pretrained_cfg = modify_pretrained_cfg(pretrained_cfg.cfg, trainer, cfg) + with tempfile.NamedTemporaryFile(suffix='.yaml') as f: + OmegaConf.save(config=pretrained_cfg, f=f.name) + model = MegatronGPTModel.load_from_checkpoint( + checkpoint_path=checkpoint_path, trainer=trainer, hparams_file=f.name, + ) + else: + raise ValueError("need at least a nemo file or checkpoint dir") + + logging.info("\n\n************** Model configuration ***********") + logging.info(f'\n{OmegaConf.to_yaml(model.cfg)}') + + trainer.validate(model=model) + + +if __name__ == '__main__': + main() # noqa pylint: disable=no-value-for-parameter diff --git a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py index 6518fb796497..a0b9d215f166 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_gpt_model.py @@ -361,7 +361,7 @@ def fwd_bwd_step(self, dataloader_iter, batch_idx, forward_only): # TODO @akhattar: add num_micro_batches_with_partial_activation_checkpoints when ready losses_reduced_per_micro_batch = fwd_bwd_function( - forward_step_func=self.get_forward_output_and_loss_func(), + forward_step_func=self.get_forward_output_and_loss_func(forward_only), data_iterator=self._make_data_iterator_list(dataloader_iter), model=self.model, num_microbatches=get_num_microbatches(), @@ -956,17 +956,18 @@ def setup(self, stage=None): self.setup_validation_data(self.cfg.data) self.setup_test_data(self.cfg.data) - # when using pipeline model parallel the final stage need to initialize word embeddings - if parallel_state.get_pipeline_model_parallel_world_size() > 1: - if isinstance(self.model, list): - for i, module in enumerate(self.model): - parallel_state.set_virtual_pipeline_model_parallel_rank(i) + if stage == 'fit': + # when using pipeline model parallel the final stage need to initialize word embeddings + if parallel_state.get_pipeline_model_parallel_world_size() > 1: + if isinstance(self.model, list): + for i, module in enumerate(self.model): + parallel_state.set_virtual_pipeline_model_parallel_rank(i) + if self.cfg.get('share_embeddings_and_output_weights', True): + module.sync_initial_word_embeddings() + parallel_state.set_virtual_pipeline_model_parallel_rank(0) + else: if self.cfg.get('share_embeddings_and_output_weights', True): - module.sync_initial_word_embeddings() - parallel_state.set_virtual_pipeline_model_parallel_rank(0) - else: - if self.cfg.get('share_embeddings_and_output_weights', True): - self.model.sync_initial_word_embeddings() + self.model.sync_initial_word_embeddings() if self.cfg.get('transformer_engine', False): self.setup_transformer_engine_tp_groups() From dc52b949617772d23366ae4c485ee0a43c3d5f99 Mon Sep 17 00:00:00 2001 From: Yi Dong <43824965+yidong72@users.noreply.github.com> Date: Thu, 8 Jun 2023 15:48:32 -0400 Subject: [PATCH 23/28] fix notebook error (#6840) Signed-off-by: Yi Dong --- ...on_Synthetic_Tabular_Data_Generation.ipynb | 23 ++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index a92317b17320..1d9849467c21 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -1,6 +1,7 @@ { "cells": [ { + "attachments": {}, "cell_type": "markdown", "id": "c3217a15", "metadata": {}, @@ -15,6 +16,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8c72dc42", "metadata": {}, @@ -25,6 +27,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "79154a9e", "metadata": {}, @@ -73,6 +76,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "7e0bbc89", "metadata": {}, @@ -92,6 +96,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "1ff1d46f", "metadata": {}, @@ -141,6 +146,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "aa356012", "metadata": {}, @@ -239,6 +245,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "02bff63f", "metadata": {}, @@ -267,6 +274,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "89e1e5b3", "metadata": {}, @@ -339,6 +347,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "05ebadc3", "metadata": {}, @@ -347,6 +356,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "2fe38a29", "metadata": {}, @@ -381,6 +391,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "678f65ef", "metadata": {}, @@ -411,6 +422,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "8af66b4a", "metadata": {}, @@ -464,6 +476,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "6ecec681", "metadata": {}, @@ -472,6 +485,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "58a3d4fa", "metadata": {}, @@ -543,6 +557,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "45ac928f", "metadata": {}, @@ -557,6 +572,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "158a4bbe", "metadata": {}, @@ -586,7 +602,7 @@ "outputs": [], "source": [ "CHECKPONT_FILE_NAME = megatron_gpt--val_loss=1.17-step=10047-consumed_samples=80376.0-last.ckpt # change it to your checkpoint file name\n", - "!python -m torch.distributed.launch --nproc_per_node=1 megatron_ckpt_to_nemo.py \\\n", + "!python -m torch.distributed.launch --nproc_per_node=1 --use-env=True megatron_ckpt_to_nemo.py \\\n", " --checkpoint_folder=gpt_creditcard_results/megatron_gpt/checkpoints/ \\\n", " --checkpoint_name={CHECKPONT_FILE_NAME} \\\n", " --nemo_file_path=tabular.nemo \\\n", @@ -597,6 +613,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "fa16378e", "metadata": {}, @@ -605,6 +622,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "ed056ec6", "metadata": {}, @@ -630,6 +648,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "a62b48dc", "metadata": {}, @@ -685,6 +704,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "cccd54d9", "metadata": {}, @@ -790,6 +810,7 @@ ] }, { + "attachments": {}, "cell_type": "markdown", "id": "0f2f6e3a", "metadata": {}, From 4239b8003c18a4de99272b826f4683590b57e4a5 Mon Sep 17 00:00:00 2001 From: Yi Dong <43824965+yidong72@users.noreply.github.com> Date: Thu, 8 Jun 2023 19:05:23 -0400 Subject: [PATCH 24/28] fix (#6842) Signed-off-by: Yi Dong --- tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb index 1d9849467c21..84ecdec36a72 100644 --- a/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb +++ b/tutorials/nlp/Megatron_Synthetic_Tabular_Data_Generation.ipynb @@ -602,7 +602,7 @@ "outputs": [], "source": [ "CHECKPONT_FILE_NAME = megatron_gpt--val_loss=1.17-step=10047-consumed_samples=80376.0-last.ckpt # change it to your checkpoint file name\n", - "!python -m torch.distributed.launch --nproc_per_node=1 --use-env=True megatron_ckpt_to_nemo.py \\\n", + "!torchrun --nproc_per_node=1 megatron_ckpt_to_nemo.py \\\n", " --checkpoint_folder=gpt_creditcard_results/megatron_gpt/checkpoints/ \\\n", " --checkpoint_name={CHECKPONT_FILE_NAME} \\\n", " --nemo_file_path=tabular.nemo \\\n", From 87e1b8180a2e4e31dcee4deb43bda56a75d2a53c Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Tue, 13 Jun 2023 11:25:07 -0600 Subject: [PATCH 25/28] Add API docs for NeMo Megatron (#6850) * add model pretraining and customization classes Signed-off-by: ericharper * fix Signed-off-by: ericharper * test width Signed-off-by: ericharper * increase middle pane width Signed-off-by: ericharper * add modules and datasets Signed-off-by: ericharper * remove global in t5 dataset s and fix formatting in megatron base model Signed-off-by: ericharper --------- Signed-off-by: ericharper --- docs/source/_static/css/custom.css | 2 +- docs/source/conf.py | 5 +- docs/source/nlp/api.rst | 193 +++++++++++------- .../language_modeling/megatron_base_model.py | 25 +-- 4 files changed, 135 insertions(+), 90 deletions(-) diff --git a/docs/source/_static/css/custom.css b/docs/source/_static/css/custom.css index da134a02d86a..cf0ad0ff2d7f 100644 --- a/docs/source/_static/css/custom.css +++ b/docs/source/_static/css/custom.css @@ -255,7 +255,7 @@ article ul { } } -@media (min-width: 1400px) { +@media (min-width: none) { body { font-size: 18px; } diff --git a/docs/source/conf.py b/docs/source/conf.py index a78ba3528048..0765f8940ab0 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -28,7 +28,6 @@ sys.path.insert(0, os.path.abspath("../..")) sys.path.insert(0, os.path.abspath("../../nemo")) -sys.path.insert(0, os.path.abspath("../../nemo_text_processing")) from package_info import __version__ @@ -47,7 +46,6 @@ 'hydra', # hydra-core in requirements, hydra during import 'dateutil', # part of core python 'transformers.tokenization_bert', # has ., troublesome for this regex - 'megatron', # megatron-lm in requirements, megatron in import 'sklearn', # scikit_learn in requirements, sklearn in import 'nemo_text_processing.inverse_text_normalization', # Not installed automatically 'nemo_text_processing.text_normalization', # Not installed automatically @@ -55,10 +53,13 @@ 'torchmetrics', # inherited from PTL 'lightning_utilities', # inherited from PTL 'apex', + 'megatron.core', + 'transformer_engine', 'joblib', # inherited from optional code 'IPython', 'ipadic', 'psutil', + 'regex', ] _skipped_autodoc_mock_imports = ['wrapt', 'numpy'] diff --git a/docs/source/nlp/api.rst b/docs/source/nlp/api.rst index 46efb0851d4e..7c6971a68d05 100755 --- a/docs/source/nlp/api.rst +++ b/docs/source/nlp/api.rst @@ -1,99 +1,142 @@ -NeMo NLP collection API +NeMo Megatron API ======================= -Model Classes -------------- +Pretraining Model Classes +------------------------- + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_base_model.MegatronBaseModel + :show-inheritance: + :no-members: + :members: __init__, configure_optimizers + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_model.MegatronGPTModel + :show-inheritance: + :no-members: + :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup, on_save_checkpoint, on_load_checkpoint + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_bert_model.MegatronBertModel + :show-inheritance: + :no-members: + :members: training_step, validation_step, build_train_valid_test_datasets, build_LDDL_data, setup, on_save_checkpoint, on_load_checkpoint + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_bart_model.MegatronBARTModel + :show-inheritance: + :no-members: + :members: training_step, validation_step, build_train_valid_test_datasets, setup, on_save_checkpoint, on_load_checkpoint + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_retrieval_model.MegatronRetrievalModel + :show-inheritance: + :no-members: + :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_model.MegatronT5Model + :show-inheritance: + :no-members: + :members: complete, encode, decode, add_special_tokens_to_tokenizer, training_step, validation_step, build_train_valid_test_datasets, setup + +Customization Model Classes +--------------------------- + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_sft_model.MegatronGPTSFTModel + :show-inheritance: + :no-members: + :members: generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model.MegatronGPTAdapterLearningModel + :show-inheritance: + :no-members: + :members: __init__, state_dict, generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_adapter_model.MegatronGPTInfusedAdapterModel + :show-inheritance: + :no-members: + :members: __init__, state_dict, generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_gpt_prompt_learning_model.MegatronGPTPromptLearningModel + :show-inheritance: + :no-members: + :members: built_virtual_prompt_dataset, generate, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5AdapterLearningModel + :show-inheritance: + :no-members: + :members: __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5AdapterLearningModel + :show-inheritance: + :no-members: + :members: _add_adapters_to_component, __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup + +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron_t5_adapter_model.MegatronT5InfusedAdapterModel + :show-inheritance: + :no-members: + :members: _add_adapters_to_component, __init__, state_dict, training_step, validation_step, build_train_valid_test_datasets, setup -.. autoclass:: nemo.collections.nlp.models.TextClassificationModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact, classifytext +Modules +------- -.. autoclass:: nemo.collections.nlp.models.GLUEModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact +.. autoclass:: nemo.collections.nlp.modules.common.megatron.module.MegatronModule + :show-inheritance: -.. autoclass:: nemo.collections.nlp.models.PunctuationCapitalizationModel - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.modules.common.megatron.module.Float16Module + :show-inheritance: -.. autoclass:: nemo.collections.nlp.models.TokenClassificationModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, register_artifact - -.. autoclass:: nemo.collections.nlp.models.QAModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end -.. autoclass:: nemo.collections.nlp.models.DuplexTaggerModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron.gpt_model.GPTModel + :show-inheritance: + :no-members: + :members: forward -.. autoclass:: nemo.collections.nlp.models.DuplexDecoderModel - :show-inheritance: - :members: setup_training_data, setup_optimization, setup_validation_data, setup_test_data, inference, validation_epoch_end, test_epoch_end +.. autoclass:: nemo.collections.nlp.models.language_modeling.megatron.bert_model.BertModel + :show-inheritance: + :no-members: + :members: forward -.. autoclass:: nemo.collections.nlp.models.BERTLMModel - :show-inheritance: - :members: setup_training_data, setup_optimization +.. autoclass:: nemo.collections.nlp.modules.common.megatron.token_level_encoder_decoder.MegatronTokenLevelEncoderDecoderModule + :show-inheritance: + :no-members: + :members: forward -Modules -------- +.. autoclass:: nemo.collections.nlp.modules.common.megatron.retrieval_token_level_encoder_decoder.MegatronRetrievalTokenLevelEncoderDecoderModule + :show-inheritance: + :no-members: + :members: forward -.. autoclass:: nemo.collections.nlp.modules.BertModule - :show-inheritance: - :members: - -.. autoclass:: nemo.collections.nlp.modules.AlbertEncoder - :show-inheritance: - :members: -.. autoclass:: nemo.collections.nlp.modules.BertEncoder - :show-inheritance: - :members: - -.. autoclass:: nemo.collections.nlp.modules.DistilBertEncoder - :show-inheritance: - :members: +Datasets +-------- -.. autoclass:: nemo.collections.nlp.modules.RobertaEncoder - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.blendable_dataset.BlendableDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.modules.SequenceClassifier - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset.GPTDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.modules.SequenceRegression - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_dataset.MockGPTDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.modules.SequenceTokenClassifier - :show-inheritance: - :members: +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.bert_dataset.BertDataset + :show-inheritance: -.. autofunction:: nemo.collections.nlp.modules.get_lm_model +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.base_prompt_learning_dataset.BasePromptLearningDataset + :show-inheritance: -.. autofunction:: nemo.collections.nlp.modules.get_pretrained_lm_models_list +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTDataset + :show-inheritance: -.. autofunction:: nemo.collections.nlp.modules.common.megatron.get_megatron_lm_models_list +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.gpt_sft_dataset.GPTSFTChatDataset + :show-inheritance: -Datasets --------- +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.retro_dataset.RETRODataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_dataset.BertPunctuationCapitalizationDataset - :show-inheritance: - :members: - :special-members: __getitem__ +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.t5_dataset.T5Dataset + :show-inheritance: + :exclude-members: MAX_SEQ_LENGTH_DELTA -.. autofunction:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_tarred_dataset.create_tarred_dataset +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.t5_prompt_learning_dataset.T5PromptLearningDataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_tarred_dataset.BertPunctuationCapitalizationTarredDataset - :show-inheritance: - :members: - :special-members: __iter__ - :exclude-members: reinforce_type +.. autoclass:: nemo.collections.nlp.data.language_modeling.megatron.ul2_dataset.UL2Dataset + :show-inheritance: -.. autoclass:: nemo.collections.nlp.data.token_classification.punctuation_capitalization_infer_dataset.BertPunctuationCapitalizationInferDataset - :show-inheritance: - :members: - :special-members: __getitem__ diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index 563988323203..b96a5adb0c62 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -60,18 +60,19 @@ class MegatronBaseModel(NLPModel): """ - Megatron base class - It does the following things: - 1. Initialize the model parallel for nemo given the model parallel parameters. - 2. Turn on all the nvidia optimizations. - 3. If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the correct size for tensor model parallelism. - 4. If using distributed optimizer, configure to be compatible with - O2-level optimizations and/or model parallelism. - 5. Perform gradient clipping: `grad_clip_pl_default` triggers the - PyTorch Lightning default implementation, `with_distributed_adam` - triggers the distributed optimizer's implementation, - `megatron_amp_o2` triggers gradient clipping on the main grads, - and otherwise gradient clipping is performed on the model grads. + Megatron base class. All NeMo Megatron models inherit from this class. + + - Initialize the model parallel world for nemo. + - Turn on all of the nvidia optimizations. + - If `cfg.tokenizer` is available, it loads the tokenizer and pad the vocab to the + correct size for tensor model parallelism. + - If using distributed optimizer, configure to be compatible + with O2 level optimizations and/or model parallelism. + - Perform gradient clipping: `grad_clip_pl_default` triggers + the PyTorch Lightning default implementation, `with_distributed_adam` triggers + the distributed optimizer's implementation, `megatron_amp_o2` triggers gradient clipping on the main grads, + and otherwise gradient clipping is performed on the model grads. + """ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): From f8757027193aea5f7165d8f21c81baee9e5643fc Mon Sep 17 00:00:00 2001 From: Sangkug Lym Date: Wed, 14 Jun 2023 15:50:36 -0700 Subject: [PATCH 26/28] Apply garbage collection interval to validation steps (#6870) * Apply garbage collection inverval to validation steps Signed-off-by: Sangkug Lym * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: Sangkug Lym Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../language_modeling/megatron_base_model.py | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py index b96a5adb0c62..f3ae5a938a61 100644 --- a/nemo/collections/nlp/models/language_modeling/megatron_base_model.py +++ b/nemo/collections/nlp/models/language_modeling/megatron_base_model.py @@ -157,6 +157,7 @@ def __init__(self, cfg: DictConfig, trainer: Trainer, no_lm_init=True): # The automatic garbage collector sould be disabled before training starts. if self.gc_interval > 0: gc.disable() + self.validation_global_step = 1 def _enable_nvidia_optimizations(self): "These optimizations are present in NVIDIA NGC PyTorch Containers" @@ -218,6 +219,16 @@ def on_train_start(self) -> None: super().on_train_start() self.init_global_step = self.trainer.global_step + def on_validation_start(self) -> None: + super().on_validation_start() + if self.gc_interval > 0: + gc.collect() + + def on_validation_end(self) -> None: + super().on_validation_end() + if self.gc_interval > 0: + gc.collect() + def _build_vocab(self): """ Manipulate vocabulary (e.g., pad vocabulary for increased performance)/ @@ -366,6 +377,14 @@ def on_train_batch_end(self, outputs, dataloader_iter: Any, batch_idx: int, unus if self.gc_interval > 0 and (self.trainer.global_step % self.gc_interval == 0): gc.collect() + def on_validation_batch_end(self, outputs, batch: Any, batch_idx: int, dataloader_idx: int) -> None: + super().on_validation_batch_end(outputs, batch, batch_idx, dataloader_idx) + + if self.gc_interval > 0: + if self.validation_global_step % self.gc_interval == 0: + gc.collect() + self.validation_global_step += 1 + def setup_optimization( self, optim_config: Optional[Union[DictConfig, Dict]] = None, optim_kwargs: Optional[Dict[str, Any]] = None, ): From 2331b063d6f0282fa79c32e4e780db6cc7ef19f2 Mon Sep 17 00:00:00 2001 From: Eric Harper Date: Thu, 15 Jun 2023 15:34:30 -0600 Subject: [PATCH 27/28] update mcore version (#6875) Signed-off-by: ericharper --- README.rst | 2 +- requirements/requirements_nlp.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/README.rst b/README.rst index 841509dfec5f..d77b7b1fadfa 100644 --- a/README.rst +++ b/README.rst @@ -263,7 +263,7 @@ packaging is also needed: .. code-block:: bash - pip install -y packaging + pip install packaging Transformer Engine diff --git a/requirements/requirements_nlp.txt b/requirements/requirements_nlp.txt index d88280b363c2..0d4a5a97e0b3 100644 --- a/requirements/requirements_nlp.txt +++ b/requirements/requirements_nlp.txt @@ -12,7 +12,7 @@ inflect jieba markdown2 matplotlib>=3.3.2 -megatron_core==0.1.0 +megatron_core==0.2.0 nltk>=3.6.5 numpy opencc From f0fa5410e67289fecf4c2af35f383e9e4c7d7f67 Mon Sep 17 00:00:00 2001 From: Dockerfile builder Date: Wed, 17 Apr 2024 16:39:16 -0700 Subject: [PATCH 28/28] fix language id issue for any2any training. This fix loads language ids from .yaml file --- .../conf/aayn_base_megatron.yaml | 35 +++++++++++++++++++ .../megatron_nmt_training.py | 18 ++++++++++ .../machine_translation/megatron_nmt_model.py | 16 +++++++-- 3 files changed, 66 insertions(+), 3 deletions(-) diff --git a/examples/nlp/machine_translation/conf/aayn_base_megatron.yaml b/examples/nlp/machine_translation/conf/aayn_base_megatron.yaml index f0f3c20f5ea4..367b415647a8 100644 --- a/examples/nlp/machine_translation/conf/aayn_base_megatron.yaml +++ b/examples/nlp/machine_translation/conf/aayn_base_megatron.yaml @@ -40,6 +40,41 @@ exp_manager: model_parallel_size: ${multiply:${model.tensor_model_parallel_size}, ${model.pipeline_model_parallel_size}} model: + # Language code + multilingual_lang_to_id: + 'cs': 64000 + 'da': 64001 + 'de': 64002 + 'el': 64003 + 'es': 64004 + 'fi': 64005 + 'fr': 64006 + 'hu': 64007 + 'it': 64008 + 'lt': 64009 + 'lv': 64010 + 'nl': 64011 + 'no': 64012 + 'pl': 64013 + 'pt': 64014 + 'ro': 64015 + 'ru': 64016 + 'sk': 64017 + 'sv': 64018 + 'zh': 64019 + 'ja': 64020 + 'hi': 64021 + 'ko': 64022 + 'et': 64023 + 'sl': 64024 + 'bg': 64025 + 'uk': 64026 + 'hr': 64027 + 'ar': 64028 + 'vi': 64029 + 'tr': 64030 + 'id': 64031 + 'en': 64032 # NMT Params multilingual: False label_smoothing: 0.1 # TODO: Implement this. diff --git a/examples/nlp/machine_translation/megatron_nmt_training.py b/examples/nlp/machine_translation/megatron_nmt_training.py index 7fd211447196..9370c6adb16a 100644 --- a/examples/nlp/machine_translation/megatron_nmt_training.py +++ b/examples/nlp/machine_translation/megatron_nmt_training.py @@ -34,6 +34,8 @@ from nemo.utils import logging from nemo.utils.exp_manager import exp_manager +from collections import OrderedDict + mp.set_start_method("spawn", force=True) @@ -173,6 +175,22 @@ def main(cfg) -> None: else: model = MegatronNMTModel(cfg.model, trainer) + if hasattr(cfg.model, 'multilingual_lang_to_id') and cfg.model.multilingual_lang_to_id is not None: + tmp_dict = OrderedDict() + for key, value in cfg.model.multilingual_lang_to_id.items(): + tmp_dict[key] = value + + if model.multilingual_lang_to_id != tmp_dict: + print("Created multilingual_lang_to_id is different from multilingual_lang_to_id from .yaml!") + model.multilingual_lang_to_id = tmp_dict + + + print("###############################") + print("Finally model.multilingual_lang_to_id:") + print(model.multilingual_lang_to_id) + print("###############################") + + trainer.fit(model) trainer.validate(model) diff --git a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py index 05fb492828aa..60150f2230e8 100644 --- a/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py +++ b/nemo/collections/nlp/models/machine_translation/megatron_nmt_model.py @@ -133,10 +133,16 @@ def _determine_multilingual_training_type(self): def _setup_multilingual_special_tokens(self): if self.multilingual_type == MultilingualModelType.many_to_many: if self.objective == 'nmt-xlm': - unique_langs = set(self.src_language + self.tgt_language) + unique_langs = [] + for l in self.src_language + self.tgt_language: + if l not in unique_langs: + unique_langs.append(l) else: # We don't take a set() for tgt_language here because the same lang can appear multiple times. - unique_langs = set(self.tgt_language) + unique_langs = [] + for l in self.tgt_language: + if l not in unique_langs: + unique_langs.append(l) for lng in unique_langs: self.multilingual_lang_tokens["<" + lng + ">"] = "<" + lng + ">" elif self.multilingual_type == MultilingualModelType.many_to_one: @@ -541,12 +547,15 @@ def eval_epoch_end(self, outputs, mode): self.log(f"{mode}_loss_avg", np.mean(loss_list), sync_dist=True, batch_size=1) self.log(f"{mode}_sacreBLEU_avg", np.mean(bleu_score_list), batch_size=1) + def _log_multilingual_bleu_and_loss(self, dataloader_idx, bleu_score, loss, mode): """ Function to log multilingual BLEU scores with the right source-target language string instead of just the dataloader idx. """ # Check if one-many or many-one and log with lang ids instead of dataloader_idx - if isinstance(self.src_language, ListConfig): + if isinstance(self.src_language, ListConfig) and isinstance(self.tgt_language, ListConfig): + translation_lang_string = f'{self.src_language[dataloader_idx]}-{self.tgt_language[dataloader_idx]}' + elif isinstance(self.src_language, ListConfig): translation_lang_string = f'{self.src_language[dataloader_idx]}-{self.tgt_language}' else: translation_lang_string = f'{self.src_language}-{self.tgt_language[dataloader_idx]}' @@ -554,6 +563,7 @@ def _log_multilingual_bleu_and_loss(self, dataloader_idx, bleu_score, loss, mode self.log(f'{mode}_sacreBLEU_{translation_lang_string}', bleu_score, sync_dist=True, batch_size=1) self.log(f'{mode}_loss_{translation_lang_string}', loss, sync_dist=True, batch_size=1) + def setup_validation_data(self, val_data_config: Optional[DictConfig]): if hasattr(self, '_validation_ds'): self._validation_dl = self._setup_eval_dataloader_from_config(