Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
64 changes: 34 additions & 30 deletions latency_throughput_curve.sh
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

set -o xtrace

export IP=$IP
Expand All @@ -24,48 +25,51 @@ if [[ "$PROMPT_DATASET" = "sharegpt" ]]; then
fi

PYTHON="python3"
PYTHON_OPTS="benchmark_serving.py "
BASE_PYTHON_OPTS=(
"benchmark_serving.py"
"--save-json-results"
"--host=$IP"
"--port=$PORT"
"--dataset=$PROMPT_DATASET_FILE"
"--tokenizer=$TOKENIZER"
"--backend=$BACKEND"
"--max-input-length=$INPUT_LENGTH"
"--max-output-length=$OUTPUT_LENGTH"
"--file-prefix=$FILE_PREFIX"
"--models=$MODELS"
"--pm-namespace=$PM_NAMESPACE"
"--pm-job=$PM_JOB"
)

[[ "$TRAFFIC_SPLIT" ]] && BASE_PYTHON_OPTS+=("--traffic-split=$TRAFFIC_SPLIT")
[[ "$OUTPUT_BUCKET" ]] && BASE_PYTHON_OPTS+=("--output-bucket=$OUTPUT_BUCKET")
[[ "$SCRAPE_SERVER_METRICS" = "true" ]] && BASE_PYTHON_OPTS+=("--scrape-server-metrics")
[[ "$SAVE_AGGREGATED_RESULT" = "true" ]] && BASE_PYTHON_OPTS+=("--save-aggregated-result")
[[ "$STREAM_REQUEST" = "true" ]] && BASE_PYTHON_OPTS+=("--stream-request")
[[ "$OUTPUT_BUCKET_FILEPATH" ]] && BASE_PYTHON_OPTS+=("--output-bucket-filepath" "$OUTPUT_BUCKET_FILEPATH")

SLEEP_TIME=${SLEEP_TIME:-0}

for request_rate in $(echo $REQUEST_RATES | tr ',' ' '); do
echo "Benchmarking request rate: ${request_rate}"
# TODO: Check if profile already exists, if so then skip
timestamp=$(date +"%Y-%m-%d_%H-%M-%S")
output_file="latency-profile-${timestamp}.txt"
if [ ${request_rate} == 0 ]; then

if [ "$request_rate" == "0" ]; then
request_rate="inf"
num_prompts=$MAX_NUM_PROMPTS
else
num_prompts=$(awk "BEGIN {print int($request_rate * $BENCHMARK_TIME_SECONDS)}")
fi

echo "TOTAL prompts: $num_prompts"

# Build the python command options
PYTHON_OPTS="$PYTHON_OPTS --save-json-results --host=$IP --port=$PORT --dataset=$PROMPT_DATASET_FILE --tokenizer=$TOKENIZER --request-rate=$request_rate --backend=$BACKEND --num-prompts=$num_prompts --max-input-length=$INPUT_LENGTH --max-output-length=$OUTPUT_LENGTH --file-prefix=$FILE_PREFIX --models=$MODELS --pm-namespace=$PM_NAMESPACE --pm-job=$PM_JOB"

if [[ "$TRAFFIC_SPLIT" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --traffic-split=$TRAFFIC_SPLIT"
fi

if [[ "$OUTPUT_BUCKET" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --output-bucket=$OUTPUT_BUCKET"
fi

if [[ "$SCRAPE_SERVER_METRICS" = "true" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --scrape-server-metrics"
fi
if [[ "$SAVE_AGGREGATED_RESULT" = "true" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --save-aggregated-result"
fi
if [[ "$STREAM_REQUEST" = "true" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --stream-request"
fi
if [[ "$OUTPUT_BUCKET_FILEPATH" ]]; then
PYTHON_OPTS="$PYTHON_OPTS --output-bucket-filepath $OUTPUT_BUCKET_FILEPATH"
fi
echo "TOTAL prompts: $num_prompts"
PYTHON_OPTS=("${BASE_PYTHON_OPTS[@]}" "--request-rate=$request_rate" "--num-prompts=$num_prompts")

$PYTHON $PYTHON_OPTS > $output_file
cat $output_file
sleep 30 # wait 30 seconds before next run to ensure metrics isolation
$PYTHON "${PYTHON_OPTS[@]}" > "$output_file"
cat "$output_file"
echo "Sleeping for $SLEEP_TIME seconds..."
sleep $SLEEP_TIME
done

export LPG_FINISHED="true"
Expand Down