Merge branch 'main' into huvu/mcore_retro_deprecation_warning

NVIDIA · May 10, 2024 · 4032726 · 4032726
2 parents 00e79ab + 865839e
commit 4032726
Show file tree

Hide file tree

Showing 48 changed files with 816 additions and 7,058 deletions.
diff --git a/.github/workflows/blossom-ci.yml b/.github/workflows/blossom-ci.yml
diff --git a/.github/workflows/cicd-main.yml b/.github/workflows/cicd-main.yml
@@ -403,68 +403,68 @@ jobs:
         - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
           if: "failure()"
 
-  L2_PTQ_Llama2_FP8:
-    needs: [cicd-test-container-setup]
-    runs-on: self-hosted-azure
-    timeout-minutes: 10
-    container:
-      image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
-      options:
-        # --user 0:128
-        --device=/dev/nvidia0
-        --gpus all
-        --shm-size=8g
-        --env TRANSFORMERS_OFFLINE=0
-        --env HYDRA_FULL_ERROR=1
-        --volume /mnt/datadrive/TestData:/home/TestData
-    steps:
-        - name: Checkout repository
-          uses: actions/checkout@v4
-        - run: |
-            python examples/nlp/language_modeling/megatron_llama_quantization.py \
-            model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
-            tensor_model_parallel_size=2 \
-            trainer.devices=2 \
-            quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
-            quantization.algorithm=fp8 \
-            quantization.num_calib_size=8 \
-            inference.batch_size=2 \
-            export.inference_tensor_parallel=2 \
-            model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
-
-            rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
-        - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
-          if: "failure()"
-
-  L2_PTQ_Llama2_INT8_SQ:
-    needs: [cicd-test-container-setup]
-    runs-on: self-hosted-azure
-    timeout-minutes: 10
-    container:
-      image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
-      options:
-        # --user 0:128
-        --device=/dev/nvidia0
-        --gpus all
-        --shm-size=8g
-        --env TRANSFORMERS_OFFLINE=0
-        --env HYDRA_FULL_ERROR=1
-        --volume /mnt/datadrive/TestData:/home/TestData
-    steps:
-        - name: Checkout repository
-          uses: actions/checkout@v4
-        - run: |
-            python examples/nlp/language_modeling/megatron_llama_quantization.py \
-            model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
-            quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
-            quantization.algorithm=int8_sq \
-            quantization.num_calib_size=8 \
-            inference.batch_size=2 \
-            model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+  # L2_PTQ_Llama2_FP8:
+  #   needs: [cicd-test-container-setup]
+  #   runs-on: self-hosted-azure
+  #   timeout-minutes: 10
+  #   container:
+  #     image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+  #     options:
+  #       # --user 0:128
+  #       --device=/dev/nvidia0
+  #       --gpus all
+  #       --shm-size=8g
+  #       --env TRANSFORMERS_OFFLINE=0
+  #       --env HYDRA_FULL_ERROR=1
+  #       --volume /mnt/datadrive/TestData:/home/TestData
+  #   steps:
+  #       - name: Checkout repository
+  #         uses: actions/checkout@v4
+  #       - run: |
+  #           python examples/nlp/language_modeling/megatron_llama_quantization.py \
+  #           model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+  #           tensor_model_parallel_size=2 \
+  #           trainer.devices=2 \
+  #           quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
+  #           quantization.algorithm=fp8 \
+  #           quantization.num_calib_size=8 \
+  #           inference.batch_size=2 \
+  #           export.inference_tensor_parallel=2 \
+  #           model_save=/home/TestData/nlp/megatron_llama/ci_fp8.qnemo
+
+  #           rm -rf /home/TestData/nlp/megatron_llama/ci_fp8.qnemo
+  #       - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+  #         if: "failure()"
 
-            rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
-        - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
-          if: "failure()"
+  # L2_PTQ_Llama2_INT8_SQ:
+  #   needs: [cicd-test-container-setup]
+  #   runs-on: self-hosted-azure
+  #   timeout-minutes: 10
+  #   container:
+  #     image: nemoci.azurecr.io/nemo_container_${{ github.run_id }}
+  #     options:
+  #       # --user 0:128
+  #       --device=/dev/nvidia0
+  #       --gpus all
+  #       --shm-size=8g
+  #       --env TRANSFORMERS_OFFLINE=0
+  #       --env HYDRA_FULL_ERROR=1
+  #       --volume /mnt/datadrive/TestData:/home/TestData
+  #   steps:
+  #       - name: Checkout repository
+  #         uses: actions/checkout@v4
+  #       - run: |
+  #           python examples/nlp/language_modeling/megatron_llama_quantization.py \
+  #           model_file=/home/TestData/nlp/megatron_llama/llama_ci.nemo \
+  #           quantization.calib_dataset=/home/TestData/nlp/test_quantization/test.json \
+  #           quantization.algorithm=int8_sq \
+  #           quantization.num_calib_size=8 \
+  #           inference.batch_size=2 \
+  #           model_save=/home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+
+  #           rm -rf /home/TestData/nlp/megatron_llama/ci_int8_sq.qnemo
+  #       - uses: "NVIDIA/NeMo/.github/actions/cancel-workflow@main"
+  #         if: "failure()"
 
   #L2_PTQ_Llama2_INT4_AWQ:
   #  needs: [cicd-test-container-setup]
@@ -4876,7 +4876,7 @@ jobs:
         - name: Checkout repository
           uses: actions/checkout@v4
         - run: |
-            python examples/nlp/language_modeling/tuning/megatron_gpt_sft.py \
+            python examples/nlp/language_modeling/tuning/megatron_gpt_finetuning.py \
             trainer.devices=1 \
             trainer.num_nodes=1 \
             trainer.precision=32 \
@@ -4887,6 +4887,7 @@ jobs:
             +trainer.limit_test_batches=2 \
             exp_manager.checkpoint_callback_params.save_best_model=False \
             exp_manager.exp_dir=examples/nlp/language_modeling/gpt_sft_results \
+            model.peft.peft_scheme=none \
             model.optim.name=distributed_fused_adam \
             model.restore_from_path=/home/TestData/nlp/megatron_gpt/starcoder-ci-nemo/megatron_starcoder_tp1_pp1.nemo \
             model.tensor_model_parallel_size=1 \

diff --git a/.github/workflows/code-formatting.yml b/.github/workflows/code-formatting.yml
@@ -0,0 +1,66 @@
+name: Isort and Black Formatting
+# Incrementally reformat only changed files with black, all files with isort
+#
+# Replaces pre-commit.ci, since it reformats all the files.
+# See issue https://github.com/pre-commit-ci/issues/issues/90
+#
+# The action requires a custom token to trigger workflow after pushing reformatted files back to the branch.
+# `secrets.GITHUB_TOKEN` can be used instead, but this will result
+# in not running necessary checks after reformatting, which is undesirable.
+# For details see https://github.com/orgs/community/discussions/25702
+
+on:
+  pull_request_target:
+    paths:
+      - '**.py'
+
+jobs:
+  reformat_with_isort_and_black:
+    runs-on: ubuntu-latest
+    permissions:
+      # write permissions required to commit changes
+      contents: write
+    steps:
+      - name: Checkout branch
+        uses: actions/checkout@v4
+        with:
+          # setup repository and ref for PRs, see
+          # https://github.com/EndBug/add-and-commit?tab=readme-ov-file#working-with-prs
+          repository: ${{ github.event.pull_request.head.repo.full_name }}
+          ref: ${{ github.event.pull_request.head.ref }}
+          # custom token is required to trigger actions after reformatting + pushing
+          token: ${{ secrets.NEMO_REFORMAT_TOKEN }}
+
+      # https://github.com/tj-actions/changed-files
+      - name: Get changed files
+        id: changed-files
+        uses: tj-actions/changed-files@v44
+        with:
+          files: |
+            **.py
+
+      - name: Setup Python env
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.10"
+
+      - name: black
+        uses: psf/black@stable
+        with:
+          options: "--verbose"
+          # apply only to changed files (pass explicitly the files)
+          src: "${{ steps.changed-files.outputs.all_changed_files }}"
+          version: "~= 24.3"
+
+      - name: isort
+        uses: isort/isort-action@v1
+        with:
+          isort-version: "5.13.2"
+          # reformat all files with isort – safe since the whole repo is already reformatted
+          configuration: ""
+
+      - uses: EndBug/add-and-commit@v9
+        # Commit changes. Nothing is committed if no changes.
+        with:
+            message: Apply isort and black reformatting
+            commit: --signoff
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
@@ -19,6 +19,8 @@ ci:
   autofix_prs: true
   autoupdate_commit_msg: '[pre-commit.ci] pre-commit suggestions'
   autoupdate_schedule: quarterly
+  # skip all hooks that can change the files, use GitHub Action "code-formatting.yml" for this
+  skip: [black,isort]
 
 repos:
   - repo: https://github.com/pre-commit/pre-commit-hooks
@@ -32,15 +34,19 @@ repos:
       - id: requirements-txt-fixer
 
   - repo: https://github.com/PyCQA/isort
-    rev: 5.12.0
+    rev: 5.13.2
     hooks:
       - id: isort
         name: Format imports
         exclude: docs/
 
-  - repo: https://github.com/psf/black
-    rev: 19.10b0
+  # Using this mirror lets us use mypyc-compiled black, which is about 2x faster
+  - repo: https://github.com/psf/black-pre-commit-mirror
+    rev: 24.3.0
     hooks:
       - id: black
-        name: Format code
-        additional_dependencies: ['click==8.0.2']
+        # It is recommended to specify the latest version of Python
+        # supported by your project here, or alternatively use
+        # pre-commit's default_language_version, see
+        # https://pre-commit.com/#top_level-default_language_version
+        language_version: python3.10