From 40c47e8c0eeeb769c304d8c08657e06d9fd98b99 Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Thu, 27 Mar 2025 20:46:26 +0000 Subject: [PATCH 1/3] first commit --- latency_throughput_curve.sh | 77 ++++++++++++++++--------------------- 1 file changed, 33 insertions(+), 44 deletions(-) diff --git a/latency_throughput_curve.sh b/latency_throughput_curve.sh index c3398fc..71b088e 100755 --- a/latency_throughput_curve.sh +++ b/latency_throughput_curve.sh @@ -1,18 +1,5 @@ #!/bin/bash -# Copyright 2024 Google Inc. All rights reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. set -o xtrace export IP=$IP @@ -24,48 +11,50 @@ if [[ "$PROMPT_DATASET" = "sharegpt" ]]; then fi PYTHON="python3" -PYTHON_OPTS="benchmark_serving.py " +BASE_PYTHON_OPTS=( + "benchmark_serving.py" + "--save-json-results" + "--host=$IP" + "--port=$PORT" + "--dataset=$PROMPT_DATASET_FILE" + "--tokenizer=$TOKENIZER" + "--backend=$BACKEND" + "--max-input-length=$INPUT_LENGTH" + "--max-output-length=$OUTPUT_LENGTH" + "--file-prefix=$FILE_PREFIX" + "--models=$MODELS" + "--pm-namespace=$PM_NAMESPACE" + "--pm-job=$PM_JOB" +) + +[[ "$TRAFFIC_SPLIT" ]] && BASE_PYTHON_OPTS+=("--traffic-split=$TRAFFIC_SPLIT") +[[ "$OUTPUT_BUCKET" ]] && BASE_PYTHON_OPTS+=("--output-bucket=$OUTPUT_BUCKET") +[[ "$SCRAPE_SERVER_METRICS" = "true" ]] && BASE_PYTHON_OPTS+=("--scrape-server-metrics") +[[ "$SAVE_AGGREGATED_RESULT" = "true" ]] && BASE_PYTHON_OPTS+=("--save-aggregated-result") +[[ "$STREAM_REQUEST" = "true" ]] && BASE_PYTHON_OPTS+=("--stream-request") +[[ "$OUTPUT_BUCKET_FILEPATH" ]] && BASE_PYTHON_OPTS+=("--output-bucket-filepath" "$OUTPUT_BUCKET_FILEPATH") + +SLEEP_TIME=${SLEEP_TIME:-0} + for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do echo "Benchmarking request rate: ${request_rate}" - # TODO: Check if profile already exists, if so then skip timestamp=$(date +"%Y-%m-%d_%H-%M-%S") output_file="latency-profile-${timestamp}.txt" - if [ ${request_rate} == 0 ]; then + + if [ "$request_rate" == "0" ]; then request_rate="inf" num_prompts=$MAX_NUM_PROMPTS else num_prompts=$(awk "BEGIN {print int($request_rate * $BENCHMARK_TIME_SECONDS)}") fi - - echo "TOTAL prompts: $num_prompts" - - # Build the python command options - PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP --port=$PORT --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$num_prompts --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH --file-prefix=$FILE_PREFIX --models=$MODELS --pm-namespace=$PM_NAMESPACE --pm-job=$PM_JOB" - - if [[ "$TRAFFIC_SPLIT" ]]; then - PYTHON_OPTS="$PYTHON_OPTS --traffic-split=$TRAFFIC_SPLIT" - fi - if [[ "$OUTPUT_BUCKET" ]]; then - PYTHON_OPTS="$PYTHON_OPTS --output-bucket=$OUTPUT_BUCKET" - fi - - if [[ "$SCRAPE_SERVER_METRICS" = "true" ]]; then - PYTHON_OPTS="$PYTHON_OPTS --scrape-server-metrics" - fi - if [[ "$SAVE_AGGREGATED_RESULT" = "true" ]]; then - PYTHON_OPTS="$PYTHON_OPTS --save-aggregated-result" - fi - if [[ "$STREAM_REQUEST" = "true" ]]; then - PYTHON_OPTS="$PYTHON_OPTS --stream-request" - fi - if [[ "$OUTPUT_BUCKET_FILEPATH" ]]; then - PYTHON_OPTS="$PYTHON_OPTS --output-bucket-filepath $OUTPUT_BUCKET_FILEPATH" - fi + echo "TOTAL prompts: $num_prompts" + PYTHON_OPTS=("${BASE_PYTHON_OPTS[@]}" "--request-rate=$request_rate" "--num-prompts=$num_prompts") - $PYTHON $PYTHON_OPTS > $output_file - cat $output_file - sleep 30 # wait 30 seconds before next run to ensure metrics isolation + $PYTHON "${PYTHON_OPTS[@]}" > "$output_file" + cat "$output_file" + echo "Sleeping for $SLEEP_TIME seconds..." + sleep $SLEEP_TIME done export LPG_FINISHED="true" From ec19c6103ec770efd93704dd88bb0c0f508c168d Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Thu, 27 Mar 2025 20:47:10 +0000 Subject: [PATCH 2/3] dont remove licence --- latency_throughput_curve.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/latency_throughput_curve.sh b/latency_throughput_curve.sh index 71b088e..64f47ad 100755 --- a/latency_throughput_curve.sh +++ b/latency_throughput_curve.sh @@ -1,5 +1,19 @@ #!/bin/bash +# Copyright 2024 Google Inc. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + set -o xtrace export IP=$IP From d606e2c9bd4f6cab3d7f93d1c3f4f61c42340570 Mon Sep 17 00:00:00 2001 From: Brendan Slabe Date: Thu, 27 Mar 2025 21:03:10 +0000 Subject: [PATCH 3/3] revert todo --- latency_throughput_curve.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/latency_throughput_curve.sh b/latency_throughput_curve.sh index 64f47ad..da182c3 100755 --- a/latency_throughput_curve.sh +++ b/latency_throughput_curve.sh @@ -52,6 +52,7 @@ SLEEP_TIME=${SLEEP_TIME:-0} for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do echo "Benchmarking request rate: ${request_rate}" + # TODO: Check if profile already exists, if so then skip timestamp=$(date +"%Y-%m-%d_%H-%M-%S") output_file="latency-profile-${timestamp}.txt"