@@ -9,63 +9,60 @@ CORE_RANGE=${CORE_RANGE:-48-95}
9
9
NUMA_NODE=${NUMA_NODE:- 1}
10
10
11
11
# Try building the docker image
12
- numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build -t cpu-test -f Dockerfile.cpu .
13
- numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --build-arg VLLM_CPU_DISABLE_AVX512=" true" -t cpu-test-avx2 -f Dockerfile.cpu .
12
+ numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build -t cpu-test- " $BUILDKITE_BUILD_NUMBER " -f Dockerfile.cpu .
13
+ numactl -C " $CORE_RANGE " -N " $NUMA_NODE " docker build --build-arg VLLM_CPU_DISABLE_AVX512=" true" -t cpu-test-" $BUILDKITE_BUILD_NUMBER " - avx2 -f Dockerfile.cpu .
14
14
15
15
# Setup cleanup
16
- remove_docker_container () { docker rm -f cpu-test-" $NUMA_NODE " cpu-test-avx2-" $NUMA_NODE " || true ; }
16
+ remove_docker_container () { set -e ; docker rm -f cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" cpu-test- " $BUILDKITE_BUILD_NUMBER " -avx2-" $NUMA_NODE " || true ; }
17
17
trap remove_docker_container EXIT
18
18
remove_docker_container
19
19
20
20
# Run the image, setting --shm-size=4g for tensor parallel.
21
21
docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=" $CORE_RANGE " \
22
- --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $NUMA_NODE " cpu-test
22
+ --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" cpu-test- " $BUILDKITE_BUILD_NUMBER "
23
23
docker run -itd --entrypoint /bin/bash -v ~ /.cache/huggingface:/root/.cache/huggingface --cpuset-cpus=" $CORE_RANGE " \
24
- --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-avx2-" $NUMA_NODE " cpu-test-avx2
24
+ --cpuset-mems=" $NUMA_NODE " --privileged=true --network host -e HF_TOKEN --env VLLM_CPU_KVCACHE_SPACE=4 --shm-size=4g --name cpu-test-" $BUILDKITE_BUILD_NUMBER " - avx2-" $NUMA_NODE " cpu-test- " $BUILDKITE_BUILD_NUMBER " -avx2
25
25
26
26
function cpu_tests() {
27
27
set -e
28
28
export NUMA_NODE=$2
29
29
30
30
# offline inference
31
- docker exec cpu-test-avx2-" $NUMA_NODE " bash -c "
31
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - avx2-" $NUMA_NODE " bash -c "
32
32
set -e
33
- python3 examples/offline_inference.py"
33
+ python3 examples/offline_inference/basic .py"
34
34
35
35
# Run basic model test
36
- docker exec cpu-test-" $NUMA_NODE " bash -c "
36
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
37
37
set -e
38
- pip install pytest pytest-asyncio \
39
- decord einops librosa peft Pillow sentence-transformers soundfile \
40
- transformers_stream_generator matplotlib datamodel_code_generator
41
- pip install torchvision --index-url https://download.pytorch.org/whl/cpu
38
+ pip install -r vllm/requirements-test.txt
42
39
pytest -v -s tests/models/decoder_only/language -m cpu_model
43
40
pytest -v -s tests/models/embedding/language -m cpu_model
44
41
pytest -v -s tests/models/encoder_decoder/language -m cpu_model
45
42
pytest -v -s tests/models/decoder_only/audio_language -m cpu_model
46
43
pytest -v -s tests/models/decoder_only/vision_language -m cpu_model"
47
44
48
45
# Run compressed-tensor test
49
- docker exec cpu-test-" $NUMA_NODE " bash -c "
46
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
50
47
set -e
51
48
pytest -s -v \
52
49
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_static_setup \
53
50
tests/quantization/test_compressed_tensors.py::test_compressed_tensors_w8a8_dynamic_per_token"
54
51
55
52
# Run AWQ test
56
- docker exec cpu-test-" $NUMA_NODE " bash -c "
53
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
57
54
set -e
58
55
pytest -s -v \
59
56
tests/quantization/test_ipex_quant.py"
60
57
61
58
# Run chunked-prefill and prefix-cache test
62
- docker exec cpu-test-" $NUMA_NODE " bash -c "
59
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
63
60
set -e
64
61
pytest -s -v -k cpu_model \
65
62
tests/basic_correctness/test_chunked_prefill.py"
66
63
67
- # online inference
68
- docker exec cpu-test-" $NUMA_NODE " bash -c "
64
+ # online serving
65
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " - " $ NUMA_NODE" bash -c "
69
66
set -e
70
67
export VLLM_CPU_KVCACHE_SPACE=10
71
68
export VLLM_CPU_OMP_THREADS_BIND=$1
@@ -78,6 +75,12 @@ function cpu_tests() {
78
75
--num-prompts 20 \
79
76
--endpoint /v1/completions \
80
77
--tokenizer facebook/opt-125m"
78
+
79
+ # Run multi-lora tests
80
+ docker exec cpu-test-" $BUILDKITE_BUILD_NUMBER " -" $NUMA_NODE " bash -c "
81
+ set -e
82
+ pytest -s -v \
83
+ tests/lora/test_qwen2vl.py"
81
84
}
82
85
83
86
# All of CPU tests are expected to be finished less than 25 mins.
0 commit comments