From e14c1a2db13395c40a36d3c321319eaf1fbfde00 Mon Sep 17 00:00:00 2001 From: Ayuei Date: Tue, 4 Apr 2023 13:46:10 +1000 Subject: [PATCH] Added pytorch 2.0 benchmark and results --- benchmark/README.md | 12 +++++++++++ benchmark/benchmark_pytorch_compile.sh | 16 +++++++++++++++ benchmark/compiled_pytorch.py | 28 ++++++++++++++++++++++++++ 3 files changed, 56 insertions(+) create mode 100755 benchmark/benchmark_pytorch_compile.sh create mode 100644 benchmark/compiled_pytorch.py diff --git a/benchmark/README.md b/benchmark/README.md index 5dad6ae..53da22c 100644 --- a/benchmark/README.md +++ b/benchmark/README.md @@ -32,3 +32,15 @@ often experiments are repeated several times. |----------|----------|--------|-------|-------| | Cache | 0.178 | 0.0049 | 0.181 | 0.169 | | No cache | 28.52 | 0.7664 | 29.03 | 27.19 | + +## Torch compile + +We also have a benchmark for testing the pytorch 2.0.0 compile performance gains: ```benchmark_pytorch_compile.sh``` +We don't see a noticeable difference, which shows that during the encode stage that calls to the GPU model is not the +bottleneck. +Most notably, it is the sentence segmentation that takes the longest. + +| | Mean (s) | Stdev | Max | Min | +|------------------|----------|-------|-------|-------| +| Pytorch Compiled | 28.47 | 0.786 | 29.65 | 27.29 | +| Pytorch | 28.29 | 0.473 | 29.10 | 27.72 | \ No newline at end of file diff --git a/benchmark/benchmark_pytorch_compile.sh b/benchmark/benchmark_pytorch_compile.sh new file mode 100755 index 0000000..ba974e8 --- /dev/null +++ b/benchmark/benchmark_pytorch_compile.sh @@ -0,0 +1,16 @@ +#!/bin/sh + +n=5 + + +echo "Testing compile" +for _ in $(seq 1 ${n}); do + rm -r cache/ + python compiled_pytorch.py +done + +echo "Testing no compile" +for _ in $(seq 1 ${n}); do + rm -r cache/ + python warm_start_cache.py +done \ No newline at end of file diff --git a/benchmark/compiled_pytorch.py b/benchmark/compiled_pytorch.py new file mode 100644 index 0000000..f7d3bbe --- /dev/null +++ b/benchmark/compiled_pytorch.py @@ -0,0 +1,28 @@ +import time + +import torch +from loguru import logger +from tqdm import tqdm + +from debeir import NIRPipeline + +logger.disable("debeir") + + +def run_all_queries(p): + for topic_num in tqdm(p.engine.query.topics): + p.engine.query.generate_query_embedding(topic_num) + + +if __name__ == "__main__": + p = NIRPipeline.build_from_config(config_fp="./config.toml", + engine="elasticsearch", + nir_config_fp="./nir.toml") + + p.run_config.encoder.model = torch.compile(p.run_config.encoder.model, mode="reduce-overhead") + + start = time.time() + run_all_queries(p) + end = time.time() + + print(end - start)