Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
33 changes: 16 additions & 17 deletions .github/workflows/run_maxtext_jetstream_tests.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -24,15 +24,15 @@ on:
workflow_dispatch:
schedule:
# Run the job every 4 hours
- cron: '0 */1 * * *'
- cron: '0 */24 * * *'

jobs:
prelim:
runs-on: ["self-hosted", "tpu", "v6e-8"]
steps:
- uses: actions/checkout@v4
- name: Test MOE Microbenchmarks
run: bash .github/workflows/test_moe_microbenchmarks.sh
- name: Test MOEBenchmarks
run: bash .github/workflows/test_moe_benchmarks.sh
# run: bash .github/workflows/test_moe_8x22b_microbenchmark.sh
# - name: Test MOE long context chunked prefill - 8k
# run: bash .github/workflows/benchmark_chunked_prefill.sh
Expand All @@ -50,19 +50,18 @@ jobs:
- name: Log message if dependent job succeeded
if: ${{ ! (failure() && github.event.pull_request == null) }}
run: echo "Conditions for creating/updating issue not met. Skipping."
# - name: Send email
# uses: dawidd6/action-send-mail@v3.6.0
# with:
# server_address: smtp.gmail.com
# server_port: 465
# username: ${{secrets.MAIL_USERNAME}}
# password: ${{secrets.MAIL_PASSWORD}}
# subject: Message from Inference Stable Stack Runs.
# to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
# from: InferenceStableStackRuns
# secure: true
# attachments: ~/test_dir/moe_8x7b_jetstream.txt
# # attachments: ~/test_dir/moe_8x7b.txt,~/test_dir/moe_8x22b.txt,~/test_dir/moe_8x22b_long_context_8k_prefill.txt
# body: workflow for ${{github.repository}} completed successfully!
- name: Send email
uses: dawidd6/action-send-mail@v3.6.0
with:
server_address: smtp.gmail.com
server_port: 465
username: ${{secrets.MAIL_USERNAME}}
password: ${{secrets.MAIL_PASSWORD}}
subject: Message from Inference Stable Stack Runs.
to: singhvijaya@google.com, yuyanpeng@google.com, vipannalla@google.com
from: JetStream Runs
secure: true
attachments: ~/test_dir/moe_8x7b.txt,~/test_dir/moe_8x22b.txt,~/test_dir/moe_8x22b_long_context_8k_prefill.txt,~/test_dir/moe_8x7b_jetstream.txt
body: workflow for ${{github.repository}} completed successfully!
- name: Cleanup
run: rm -rf ~/test_dir
54 changes: 54 additions & 0 deletions .github/workflows/test_moe_benchmarks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
#!/bin/bash
mkdir ~/test_dir
cd ~/test_dir
git clone https://github.com/google/maxtext.git

cd ~/test_dir
git clone https://github.com/google/JetStream.git
cd ~/test_dir
sudo apt-get -y update
sudo apt-get -y install python3.10-venv
sudo apt-get -y install jq
python -m venv .env
source .env/bin/activate

cd ~/test_dir
cd JetStream
pip install -e .
cd benchmarks
pip install -r requirements.in

cd ~/test_dir
cd maxtext/
pip3 install wheel
bash setup.sh MODE=stable DEVICE=tpu

pip install nltk==3.8.1


# moe 8x7b microbenchmark
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=8 megablox=False quantization=int8 quantize_kvcache=False checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16 > ~/test_dir/moe_8x7b.txt
tail -n5 ~/test_dir/moe_8x7b.txt > ~/test_dir/moe_8x7b.tmp && mv ~/test_dir/moe_8x7b.tmp ~/test_dir/moe_8x7b.txt

# moe 8x22B microbenchmark
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.inference_microbenchmark MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=True capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="128,1024" sparse_matmul=False model_call_mode=inference > ~/test_dir/moe_8x22b.txt
tail -n5 ~/test_dir/moe_8x22b.txt > ~/test_dir/moe_8x22b.tmp && mv ~/test_dir/moe_8x22b.tmp ~/test_dir/moe_8x22b.txt

# moe 8x22B 8k context length chunked prefill with 2k prefill chunk size
LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.benchmark_chunked_prefill MaxText/configs/inference.yml load_parameters_path=gs://jetstream-runner/8-22B-int8 max_prefill_predict_length=8192 max_target_length=9000 model_name=mixtral-8x22b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 scan_layers=false per_device_batch_size=24 attention=dot_product megablox=False quantization=int8 checkpoint_is_quantized=True quantize_kvcache=False capacity_factor=1 tokenizer_path=assets/tokenizer.mistral-v3 inference_microbenchmark_prefill_lengths="8192" sparse_matmul=False model_call_mode=inference ici_context_autoregressive_parallelism=8 use_chunked_prefill=True prefill_chunk_size=2048 > ~/test_dir/moe_8x22b_long_context_8k_prefill.txt
tail -n5 ~/test_dir/moe_8x22b_long_context_8k_prefill.txt > ~/test_dir/moe_8x22b_long_context_8k_prefill.tmp && mv ~/test_dir/moe_8x22b_long_context_8k_prefill.tmp ~/test_dir/moe_8x22b_long_context_8k_prefill.txt


# moe 8x7B Maxtext Jetstream

LIBTPU_INIT_ARGS="--xla_tpu_enable_windowed_einsum_for_reduce_scatter=false --xla_jf_spmd_threshold_for_windowed_einsum_mib=1000000" python -m MaxText.maxengine_server MaxText/configs/inference.yml tokenizer_path=assets/tokenizer.mistral-v1 max_prefill_predict_length=1024 max_target_length=2048 model_name=mixtral-8x7b ici_fsdp_parallelism=1 ici_autoregressive_parallelism=1 ici_tensor_parallelism=1 ici_context_autoregressive_parallelism=8 scan_layers=false weight_dtype=bfloat16 per_device_batch_size=24 megablox=False quantization=int8 quantize_kvcache=True checkpoint_is_quantized=True load_parameters_path=gs://jetstream-runner/8-7B-int8 capacity_factor=1 attention=dot_product model_call_mode=inference sparse_matmul=False weight_dtype=bfloat16

sleep 600

cd ..

python JetStream/benchmarks/benchmark_serving.py --tokenizer ~/test_dir/maxtext/assets/tokenizer.mistral-v1 --save-result --save-request-outputs --request-outputs-file-path outputs.json --num-prompts 1200 --max-output-length 1024 --dataset openorca --run-eval True > ~/test_dir/moe_8x7b_jetstream.txt
tail -n10 ~/test_dir/moe_8x7b_jetstream.txt > ~/test_dir/moe_8x7b_jetstream.tmp && mv ~/test_dir/moe_8x7b_jetstream.tmp ~/test_dir/moe_8x7b_jetstream.txt

# kill python jobs
sudo kill -9 $(ps aux | grep python | awk '{print $2}')
35 changes: 0 additions & 35 deletions .github/workflows/test_moe_microbenchmarks.sh

This file was deleted.