cms-sw · cmsbuild · Oct 2, 2020 · Sep 25, 2020 · Sep 25, 2020 · Sep 26, 2020
diff --git a/HeterogeneousCore/SonicTriton/test/README.md b/HeterogeneousCore/SonicTriton/test/README.md
@@ -1,35 +1,34 @@
 # SONIC TritonClient tests
 
-A test producer `TritonImageProducer` is available.
-It generates an arbitrary image for ResNet50 inference and prints the resulting classifications.
+Test producers `TritonImageProducer` and `TritonGraphProducer` are available.
+They generate arbitrary inputs for inference (with ResNet50 or Graph Attention Network, respectively) and print the resulting output.
 
-To run the tests, a local Triton server can be started using Docker.
-(This may require superuser permission.)
+To run the tests, a local Triton server can be started using Singularity (default, should not require superuser permission)
+or Docker (may require superuser permission).
+The server can utilize the local CPU (support for AVX instructions required) or a local Nvidia GPU, if one is available.
+The default local server address is `0.0.0.0`.
 
 First, the relevant data should be downloaded from Nvidia:
 ```
 ./fetch_model.sh
 ```
 
-Execute this Docker command to launch the local server:
-```bash
-docker run -d --rm --name tritonserver \
-  --shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
-  -p8000:8000 -p8001:8001 -p8002:8002 \
-  -v${CMSSW_BASE}/src/HeterogeneousCore/SonicTriton/data/models:/models \
-  -v${CMSSW_BASE}/src/HeterogeneousCore/SonicTriton/data/lib:/inputlib \
-  -e LD_LIBRARY_PATH="/opt/tritonserver/lib/pytorch:/usr/local/cuda/compat/lib:/usr/local/nvidia/lib:/usr/local/nvidia/lib64" \
-  -e LD_PRELOAD="/inputlib/libtorchscatter.so /inputlib/libtorchsparse.so" \
-  nvcr.io/nvidia/tritonserver:20.06-v1-py3 tritonserver --model-repository=/models
+The server can be managed with the `triton` script (using Singularity with CPU by default):
+```
+./triton start
+[run test commands]
+./triton stop
 ```
 
-If the machine has Nvidia GPUs, the flag `--gpus all` can be added to the command.
-Otherwise, the server will perform inference using the CPU (slower).
-
-To get more debugging information from the server, the flags `--log-verbose=1 --log-error=1 --log-info=1`
-can be added to the end of the command.
+The script has the following options:
+* `-d`: use Docker instead of Singularity
+* `-g`: use GPU instead of CPU
+* `-n`: name of container instance (default: triton_server_instance)
+* `-v`: (verbose) start: activate server debugging info; stop: keep server logs
+* `-w`: maximum time to wait for server to start (default: 60 seconds)
+* `-h`: print help message and exit
 
-The default local server address is `0.0.0.0`.
+## Test commands
 
 Run the image test:
 ```
@@ -40,3 +39,8 @@ Run the graph test:
 ```
 cmsRun tritonTest_cfg.py maxEvents=1 producer=TritonGraphProducer
 ```
+
+## Caveats
+
+* Local CPU server requires support for AVX instructions.
+* Multiple users cannot run servers on the same GPU (e.g. on a shared node).
diff --git a/HeterogeneousCore/SonicTriton/test/fetch_model.sh b/HeterogeneousCore/SonicTriton/test/fetch_model.sh
@@ -1,8 +1,9 @@
 #!/bin/bash
 
-# borrowed from https://github.com/NVIDIA/triton-inference-server/tree/master/docs/examples
+# borrowed from https://github.com/triton-inference-server/server/tree/master/docs/examples
 
-TRITON_VERSION=$(scram tool info triton-inference-server | grep "Version : " | cut -d' ' -f3)
+TRITON_REPO="https://github.com/triton-inference-server/server"
+TRITON_VERSION=$(scram tool info triton-inference-server | grep "Version : " | cut -d' ' -f3 | cut -d'-' -f1)
 
 TEST_DIR=`pwd`
 
@@ -11,8 +12,8 @@ cd $TEST_DIR
 mkdir -p $MODEL_DIR
 cd $MODEL_DIR
 
-curl -O -L https://github.com/NVIDIA/triton-inference-server/raw/v${TRITON_VERSION}/docs/examples/model_repository/resnet50_netdef/config.pbtxt
-curl -O -L https://github.com/NVIDIA/triton-inference-server/raw/v${TRITON_VERSION}/docs/examples/model_repository/resnet50_netdef/resnet50_labels.txt
+curl -O -L ${TRITON_REPO}/raw/v${TRITON_VERSION}/docs/examples/model_repository/resnet50_netdef/config.pbtxt
+curl -O -L ${TRITON_REPO}/raw/v${TRITON_VERSION}/docs/examples/model_repository/resnet50_netdef/resnet50_labels.txt
 
 mkdir -p 1
 
@@ -24,15 +25,30 @@ cd $TEST_DIR
 mkdir -p $GAT_DIR
 cd $GAT_DIR
 
-curl -O -L https://github.com/lgray/triton-torchgeo-gat-example/raw/cmssw_20.06-v1-py3/artifacts/models/gat_test/config.pbtxt
-mkdir -p 1
-curl -o 1/model.pt -L https://github.com/lgray/triton-torchgeo-gat-example/raw/cmssw_20.06-v1-py3/artifacts/models/gat_test/1/model.pt
-
-TORCH_DIR=${TEST_DIR}/../data/lib/
-cd $TEST_DIR
-mkdir -p $TORCH_DIR
-cd $TORCH_DIR
+cat << EOF > config.pbtxt
+name: "gat_test"
+platform: "pytorch_libtorch"
+max_batch_size: 0
+input [
+  {
+    name: "x__0"
+    data_type: TYPE_FP32
+    dims: [ -1, 1433 ]
+  },
+  {
+    name: "edgeindex__1"
+    data_type: TYPE_INT64
+    dims: [ 2, -1 ]
+  }
+]
+output [
+  {
+    name: "logits__0"
+    data_type: TYPE_FP32
+    dims: [ -1, 7 ]
+  }
+]
+EOF
 
-for lib in libtorchcluster.so libtorchscatter.so libtorchsparse.so libtorchsplineconv.so; do
-  curl -O -L https://github.com/lgray/triton-torchgeo-gat-example/raw/cmssw_20.06-v1-py3/artifacts/lib/$lib
-done
+mkdir -p 1
+cp /cvmfs/unpacked.cern.ch/registry.hub.docker.com/fastml/triton-torchgeo:20.06-v1-py3-geometric/torch_geometric/examples/model.pt 1/model.pt
diff --git a/HeterogeneousCore/SonicTriton/test/triton b/HeterogeneousCore/SonicTriton/test/triton
@@ -0,0 +1,168 @@
+#!/bin/bash
+
+# defaults
+USEDOCKER=""
+GPU=""
+VERBOSE=""
+WTIME=60
+SERVER=triton_server_instance
+
+usage() {
+	ECHO="echo -e"
+	$ECHO "triton [options] [start|stop]"
+	$ECHO
+	$ECHO "Options:"
+	$ECHO "-d        \t use Docker instead of Singularity"
+	$ECHO "-g        \t use GPU instead of CPU"
+	$ECHO "-n        \t name of container instance (default: ${SERVER})"
+	$ECHO "-v        \t (verbose) start: activate server debugging info; stop: keep server logs"
+	$ECHO "-w        \t maximum time to wait for server to start (default: ${WTIME} seconds)"
+	$ECHO "-h        \t print this message and exit"
+	$ECHO
+	$ECHO "Operations:"
+	$ECHO "start    \t start server"
+	$ECHO "stop    \t stop server"
+	exit $1
+}
+
+# check shm locations
+SHM=/dev/shm
+if [ -e /run/shm ]; then
+	SHM=/run/shm
+fi
+
+while getopts "dgvhw:n:" opt; do
+	case "$opt" in
+		d) USEDOCKER=true
+		;;
+		g) GPU=true
+		;;
+		v) VERBOSE="--log-verbose=1 --log-error=1 --log-info=1"
+		;;
+		h) usage 0
+		;;
+		w) WTIME="$OPTARG"
+		;;
+		n) SERVER="$OPTARG"
+		;;
+	esac
+done
+
+shift $(($OPTIND - 1))
+OP=$1
+
+if [ "$OP" != start ] && [ "$OP" != stop ]; then
+	usage 1
+fi
+
+DOCKER="sudo docker"
+IMAGE=fastml/triton-torchgeo:20.06-v1-py3-geometric
+MODELS=${CMSSW_BASE}/src/HeterogeneousCore/SonicTriton/data/models
+LOG=log_triton_server.log
+LIB=lib
+STARTED_INDICATOR="Started GRPCService"
+EXTRA=""
+
+start_docker(){
+	$DOCKER run -d --name ${SERVER} \
+		--shm-size=1g --ulimit memlock=-1 --ulimit stack=67108864 \
+		-p8000:8000 -p8001:8001 -p8002:8002 $EXTRA \
+		-v${MODELS}:/models \
+		${IMAGE} tritonserver --model-repository=/models $VERBOSE
+}
+
+start_singularity(){
+	# triton server image may need to modify contents of opt/tritonserver/lib/
+	# but cvmfs is read-only
+	# -> make a writable local directory with the same contents
+	mkdir ${LIB}
+	ln -s /cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE}/opt/tritonserver/lib/* ${LIB}/
+
+	# start instance
+	# need to bind /cvmfs for above symlinks to work inside container
+	singularity instance start \
+		-B ${SHM}:/run/shm -B ${MODELS}:/models -B ${LIB}:/opt/tritonserver/lib -B /cvmfs $EXTRA \
+		/cvmfs/unpacked.cern.ch/registry.hub.docker.com/${IMAGE} ${SERVER}
+
+	START_EXIT=$?
+	if [ "$START_EXIT" -ne 0 ]; then
+		rm -rf ${LIB}
+		return "$START_EXIT"
+	fi
+
+	# run the actual server
+	singularity run instance://${SERVER} \
+		tritonserver --model-repository=/models $VERBOSE >& ${LOG} &
+}
+
+stop_docker(){
+	# keep log
+	if [ -n "$VERBOSE" ]; then $DOCKER logs ${SERVER} >& ${LOG}; fi
+
+	$DOCKER stop ${SERVER}
+	$DOCKER rm ${SERVER}
+}
+
+stop_singularity(){
+	singularity instance stop ${SERVER}
+
+	# cleanup
+	rm -rf ${LIB}
+	if [ -z "$VERBOSE" ]; then rm ${LOG}; fi
+}
+
+test_docker(){
+	# docker logs print to stderr
+	${DOCKER} logs ${SERVER} |& grep "$STARTED_INDICATOR"
+}
+
+test_singularity(){
+	grep "$STARTED_INDICATOR" $LOG
+}
+
+wait_server(){
+	COUNT=0
+	while ! $WAIT_COND >& /dev/null; do
+		if [ "$COUNT" -gt "$WTIME" ]; then
+			echo "timed out waiting for server to start"
+			VERBOSE=true $STOP_FN
+			exit 1
+		else
+			COUNT=$(($COUNT + 1))
+			sleep 1
+		fi
+	done
+
+	echo "server is ready!"
+	exit 0
+}
+
+if [ -n "$USEDOCKER" ]; then
+	if [ -n "$GPU" ]; then
+		EXTRA="--gpus all"
+	fi
+	START_FN=start_docker
+	WAIT_COND=test_docker
+	STOP_FN=stop_docker
+	PROG_NAME=Docker
+else
+	if [ -n "$GPU" ]; then
+		EXTRA="--nv"
+	fi
+	START_FN=start_singularity
+	WAIT_COND=test_singularity
+	STOP_FN=stop_singularity
+	PROG_NAME=Singularity
+fi
+
+if [ "$OP" == start ]; then
+	$START_FN
+	START_EXIT=$?
+	if [ "$START_EXIT" -ne 0 ]; then
+		echo "Error from $PROG_NAME"
+		exit "$START_EXIT"
+	fi
+	wait_server
+else
+	$STOP_FN
+fi