refactor argument passing and model configs by using yaml (intel#127)

* refactoring Signed-off-by: Jiafu Zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * refactor argument passing and model configs by using yaml Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> * debugging gpt-j-6b Signed-off-by: jiafu zhang <jiafu.zhang@intel.com> --------- Signed-off-by: Jiafu Zhang <jiafu.zhang@intel.com> Signed-off-by: jiafu zhang <jiafu.zhang@intel.com>
JoshuaL3000 · Nov 23, 2023 · 122741e · 122741e
1 parent b5f244e
commit 122741e
Show file tree

Hide file tree

Showing 20 changed files with 476 additions and 352 deletions.
diff --git a/.github/workflows/config/mpt_deltatuner.yaml b/.github/workflows/config/mpt_deltatuner.yaml
@@ -0,0 +1,30 @@
+port: 8000
+name: mpt-7b
+route_prefix: /mpt-7b
+precision: 'bf16'
+cpus_per_worker: 24
+gpus_per_worker: 0
+deepspeed: false
+workers_per_group: 2
+ipex: false
+device: "cpu"
+model_description:
+  model_id_or_path: mosaicml/mpt-7b
+  tokenizer_name_or_path: EleutherAI/gpt-neox-20b
+  chat_processor: ChatModelGptJ
+  peft_model_id_or_path: nathan0/mpt-7b-deltatuner-model
+  peft_type: deltatuner
+  prompt:
+    intro: 'Below is an instruction that describes a task, paired with an input that
+      provides further context. Write a response that appropriately completes the request.
+
+      '
+    human_id: '
+
+      ### Instruction'
+    bot_id: '
+
+      ### Response'
+    stop_words: []
+  config:
+    trust_remote_code: true
diff --git a/.github/workflows/config/mpt_deltatuner_deepspeed.yaml b/.github/workflows/config/mpt_deltatuner_deepspeed.yaml
@@ -0,0 +1,30 @@
+port: 8000
+name: mpt-7b
+route_prefix: /mpt-7b
+precision: 'bf16'
+cpus_per_worker: 24
+gpus_per_worker: 0
+deepspeed: true
+workers_per_group: 2
+ipex: false
+device: "cpu"
+model_description:
+  model_id_or_path: mosaicml/mpt-7b
+  tokenizer_name_or_path: EleutherAI/gpt-neox-20b
+  chat_processor: ChatModelGptJ
+  peft_model_id_or_path: nathan0/mpt-7b-deltatuner-model
+  peft_type: deltatuner
+  prompt:
+    intro: 'Below is an instruction that describes a task, paired with an input that
+      provides further context. Write a response that appropriately completes the request.
+
+      '
+    human_id: '
+
+      ### Instruction'
+    bot_id: '
+
+      ### Response'
+    stop_words: []
+  config:
+    trust_remote_code: true
diff --git a/.github/workflows/workflow_finetune.yml b/.github/workflows/workflow_finetune.yml
@@ -123,6 +123,13 @@ jobs:
             docker exec "finetune" bash -c "python finetune/finetune.py --config_path finetune/finetune.conf"
           fi
 
+      - name: Stop Ray
+        run: |
+          cid=$(docker ps -q --filter "name=finetune")
+          if [[ ! -z "$cid" ]]; then
+            docker exec "finetune" bash -c "ray stop"
+          fi
+          
       - name: Stop Container
         if: success() || failure()
         run: |

diff --git a/.github/workflows/workflow_inference.yml b/.github/workflows/workflow_inference.yml
@@ -12,10 +12,10 @@ jobs:
     name: inference test
     strategy:
       matrix:
-        model: [ gpt-j-6B, gpt2, bloom, opt, mpt ]
+        model: [ gpt-j-6b, gpt2, bloom-560m, opt-125m, mpt-7b ]
         include:
-          - dtuner_model: /root/.cache/huggingface/hub/mpt-7b-deltatuner-model
-            model: mpt
+          - dtuner_model: nathan0/mpt-7b-deltatuner-model
+            model: mpt-7b
     runs-on: self-hosted
     steps:
       - name: Checkout
@@ -43,13 +43,13 @@ jobs:
       - name: Run Inference Test with Deltatuner
         if: ${{ matrix.dtuner_model }}
         run: |
-          docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py --deltatuner_model ${{ matrix.dtuner_model }}"
+          docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false'  python inference/run_model_serve.py --config_file .github/workflows/config/mpt_deltatuner.yaml"
           docker exec "inference" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
           docker exec "inference" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
 
       - name: Run Inference Test with DeepSpeed
         run: |
-          if [[ ${{ matrix.model }} =~ ^(gpt2|mpt)$ ]]; then
+          if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           else
             docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py --deepspeed"
@@ -60,14 +60,21 @@ jobs:
       - name: Run Inference Test with DeepSpeed and Deltatuner
         if: ${{ matrix.dtuner_model }}
         run: |
-          if [[ ${{ matrix.model }} =~ ^(gpt2|mpt)$ ]]; then
+          if [[ ${{ matrix.model }} =~ ^(gpt2|mpt-7b)$ ]]; then
             echo ${{ matrix.model }} is not supported!
           else
-            docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' MODEL_TO_SERVE=\"${{ matrix.model }}\" python inference/run_model_serve.py --deepspeed --deltatuner_model ${{ matrix.dtuner_model }}"
+            docker exec "inference" bash -c "KEEP_SERVE_TERMINAL='false' python inference/run_model_serve.py --config_file .github/workflows/config/mpt_deltatuner_deepspeed.yaml"
             docker exec "inference" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }}"
             docker exec "inference" bash -c "python inference/run_model_infer.py --num_iter 1 --model_endpoint http://127.0.0.1:8000/${{ matrix.model }} --streaming_response"
           fi
 
+      - name: Stop Ray
+        run: |
+          cid=$(docker ps -q --filter "name=inference")
+          if [[ ! -z "$cid" ]]; then
+            docker exec "inference" bash -c "ray stop"
+          fi
+      
       - name: Stop Container
         if: success() || failure()
         run: |