Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion benchmarks/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -28,8 +28,9 @@ Provide run.py Python script with following arguments:
- -p, prompt size(s) to benchmark, size of an input prompt; multiple prompt sizes can be provided and they will be treated as separate cases to benchmark
- -r, thread-range, e.g., on an 80-thread system, it should be input as 0-79, unless user wants to use just a subset of available threads, say 16-63 (48 threads indexed 16<>63)
- -fa, 0/1, disable/enable flash attention, default: 0
- -d, docker_image, docker image used for benchmarking, default: amperecomputingai/llama.cpp:latest
```bash
python3 run.py -m Meta-Llama-3-8B-Instruct.Q8_0.gguf -t 10 16 32 40 64 80 -b 1 2 4 8 16 32 64 -p 512 -r 0-79 -fa 1
python3 run.py -m Meta-Llama-3-8B-Instruct.Q8_0.gguf -t 10 16 32 40 64 80 -b 1 2 4 8 16 32 64 -p 512 -r 0-79 -fa 1 -d amperecomputingai/llama.cpp:3.2.1-ampereone
```

## Quick run on 80t OCI A1 system
Expand Down
29 changes: 18 additions & 11 deletions benchmarks/run.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,24 +6,24 @@
import subprocess
from utils.benchmark import parse_threads_range


def get_file_dir():
return os.path.dirname(os.path.realpath(__file__))


def docker_init(node):
tag = "amperecomputingai/llama.cpp:3.1.2"
if subprocess.run(
["docker", "pull", tag]).returncode != 0:
print("Docker pull process failed!")
sys.exit(1)

if subprocess.run(["docker", "inspect", docker_image], capture_output=True).returncode != 0:
print(f"Docker image {docker_image} doesn't exsit, try to pull it.")
if subprocess.run(["docker", "pull", docker_image], capture_output=True).returncode != 0:
print("Docker pull process failed!")
sys.exit(1)

container_name = f"llama_benchmark_n{node}"
subprocess.run(["docker", "rm", "-f", container_name])
subprocess.run(["docker", "rm", "-f", container_name], capture_output=True)
memory = (psutil.virtual_memory().total >> 30) - 30 # leave 30GB for OS
assert memory > 10, "less than 10GB of memory available on the system for llama.cpp"
if subprocess.run(
["docker", "run", "--privileged=true", "--cpuset-mems", f"{str(node)}", "--name", container_name, "-d", "-m", f"{str(memory)}g", "-v",
f"{get_file_dir()}:/runner", "--entrypoint", "/bin/bash", "-it", tag]).returncode != 0:
f"{get_file_dir()}:/runner", "--entrypoint", "/bin/bash", "-it", docker_image],capture_output=True).returncode != 0:
print("Docker run process failed!")
sys.exit(1)
return container_name
Expand All @@ -33,13 +33,13 @@ def docker_restart(docker_name):
break_time = 15

def docker_stop():
if subprocess.run(["docker", "stop", docker_name]).returncode != 0:
if subprocess.run(["docker", "stop", docker_name], capture_output=True).returncode != 0:
print(f"Stopping docker container {docker_name} failed, retrying in {break_time} seconds.")
time.sleep(break_time)
docker_stop()

def docker_start():
if subprocess.run(["docker", "start", docker_name]).returncode != 0:
if subprocess.run(["docker", "start", docker_name], capture_output=True).returncode != 0:
print(f"Starting docker container {docker_name} failed, retrying in {break_time} seconds.")
time.sleep(break_time)
docker_start()
Expand Down Expand Up @@ -94,6 +94,9 @@ def parse_args():
parser.add_argument("-m", "--model_names",
type=str, required=True, nargs="+",
help="model names, e.g. 'Meta-Llama-3-8B-Instruct.Q8_0.gguf'")
parser.add_argument("-d", "--docker_image",
type=str, required=True,
help="Docker image to use for benchmarking")
parser.add_argument("-t", "--num_threads",
type=int, required=True, nargs="+",
help="number of threads per process to use")
Expand Down Expand Up @@ -121,7 +124,11 @@ def parse_args():


def main():
global docker_image

args = parse_args()
docker_image = args.docker_image
print(f"Test with docker image {docker_image}")
benchmark(docker_init(args.numa), args)


Expand Down