From 9c46534e5e6f94930eaac9571088d53cf770bca4 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 28 Oct 2025 19:16:47 +0000 Subject: [PATCH 1/5] feat(medcat-service): Add Gunicorn config for max requests and jitter --- medcat-service/env/app.env | 4 +++- medcat-service/env/app_deid.env | 2 ++ medcat-service/start_service_production.sh | 12 ++++++++++++ 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/medcat-service/env/app.env b/medcat-service/env/app.env index cb68c397c..e59c7ad2f 100755 --- a/medcat-service/env/app.env +++ b/medcat-service/env/app.env @@ -36,6 +36,8 @@ SERVER_PORT=5000 SERVER_WORKERS=1 SERVER_WORKER_TIMEOUT=300 SERVER_THREADS=1 +SERVER_GUNICORN_MAX_REQUESTS=1000 +SERVER_GUNICORN_MAX_REQUESTS_JITTER=50 # set the number of torch threads, this should be used ONLY if you are using CPUs and the default image # set to -1 or 0 if you are using GPU @@ -43,4 +45,4 @@ APP_TORCH_THREADS=8 # GPU SETTING # CAUTION, use only if you are using the GPU docker image. -APP_CUDA_DEVICE_COUNT=1 +APP_CUDA_DEVICE_COUNT=-1 diff --git a/medcat-service/env/app_deid.env b/medcat-service/env/app_deid.env index 56607c72b..e59c7ad2f 100755 --- a/medcat-service/env/app_deid.env +++ b/medcat-service/env/app_deid.env @@ -36,6 +36,8 @@ SERVER_PORT=5000 SERVER_WORKERS=1 SERVER_WORKER_TIMEOUT=300 SERVER_THREADS=1 +SERVER_GUNICORN_MAX_REQUESTS=1000 +SERVER_GUNICORN_MAX_REQUESTS_JITTER=50 # set the number of torch threads, this should be used ONLY if you are using CPUs and the default image # set to -1 or 0 if you are using GPU diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh index f82cc67c9..209dfaaaa 100644 --- a/medcat-service/start_service_production.sh +++ b/medcat-service/start_service_production.sh @@ -33,6 +33,16 @@ if [ -z ${SERVER_WORKER_TIMEOUT+x} ]; then echo "SERVER_WORKER_TIMEOUT is unset -- setting to default (sec): $SERVER_WORKER_TIMEOUT"; fi +if [ -z ${SERVER_GUNICORN_MAX_REQUESTS+x} ]; then + SERVER_GUNICORN_MAX_REQUESTS=1000; + echo "SERVER_GUNICORN_MAX_REQUESTS is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS"; +fi + +if [ -z ${SERVER_GUNICORN_MAX_REQUESTS_JITTER+x} ]; then + SERVER_GUNICORN_MAX_REQUESTS_JITTER=50; + echo "SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS_JITTER"; +fi + # Note - SERVER_ACCESS_LOG_FORMAT is unused when worker-class is set to UvicornWorker SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)s\"" @@ -50,5 +60,7 @@ exec gunicorn \ --error-logfile=- \ --log-level info \ --config /cat/config.py \ + --max-requests="$SERVER_GUNICORN_MAX_REQUESTS" \ + --max-requests-jitter="$SERVER_GUNICORN_MAX_REQUESTS_JITTER" \ --worker-class uvicorn.workers.UvicornWorker \ medcat_service.main:app From a902354741423c1bf70cf7968a6d3b25bfe78e87 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 29 Oct 2025 09:23:58 +0000 Subject: [PATCH 2/5] feat(medcat-service): Support any extra args to gunicorn with env var --- medcat-service/start_service_production.sh | 3 +++ 1 file changed, 3 insertions(+) diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh index 209dfaaaa..bb42df9e4 100644 --- a/medcat-service/start_service_production.sh +++ b/medcat-service/start_service_production.sh @@ -50,6 +50,8 @@ SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)s # # Using Gunicorn, even though FastAPI recommends Uvicorn, to keep support for the post_fork config echo "Starting up the service using gunicorn server ..." +set -x + exec gunicorn \ --bind "$SERVER_HOST:$SERVER_PORT" \ --workers="$SERVER_WORKERS" \ @@ -62,5 +64,6 @@ exec gunicorn \ --config /cat/config.py \ --max-requests="$SERVER_GUNICORN_MAX_REQUESTS" \ --max-requests-jitter="$SERVER_GUNICORN_MAX_REQUESTS_JITTER" \ + ${SERVER_GUNICORN_EXTRA_ARGS:-} \ --worker-class uvicorn.workers.UvicornWorker \ medcat_service.main:app From 2c3d723355ec372d738fa58cf3d94682fc46cb89 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 29 Oct 2025 09:29:42 +0000 Subject: [PATCH 3/5] feat(medcat-service): Support any extra gunicorn args - add readme --- medcat-service/README.md | 3 +++ medcat-service/start_service_production.sh | 2 ++ 2 files changed, 5 insertions(+) diff --git a/medcat-service/README.md b/medcat-service/README.md index 95b89a9de..56bfdf74d 100644 --- a/medcat-service/README.md +++ b/medcat-service/README.md @@ -316,6 +316,9 @@ The following environment variables are available for tailoring the MedCAT Servi - `SERVER_PORT` - the port number used (default: `5000`), - `SERVER_WORKERS` - the number of workers serving the Flask app working in parallel (default: `1` ; only used in production server). - `SERVER_WORKER_TIMEOUT` - the max timeout (in sec) for receiving response from worker (default: `300` ; only used with production server). +- `SERVER_GUNICORN_MAX_REQUESTS` - maximum number of requests a worker will process before restarting (default: `1000`), +- `SERVER_GUNICORN_MAX_REQUESTS_JITTER` - adds randomness to `MAX_REQUESTS` to avoid all workers restarting simultaneously (default: `50`), +- `SERVER_GUNICORN_EXTRA_ARGS` - any additional Gunicorn CLI arguments you want to pass (default: none). (Example value: "SERVER_GUNICORN_EXTRA_ARGS=--backlog 20") The following environment variables are available for tailoring the MedCAT Service wrapper: diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh index bb42df9e4..c8bcd81d6 100644 --- a/medcat-service/start_service_production.sh +++ b/medcat-service/start_service_production.sh @@ -43,6 +43,8 @@ if [ -z ${SERVER_GUNICORN_MAX_REQUESTS_JITTER+x} ]; then echo "SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS_JITTER"; fi + ${SERVER_GUNICORN_EXTRA_ARGS:-} \ + # Note - SERVER_ACCESS_LOG_FORMAT is unused when worker-class is set to UvicornWorker SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)s\"" From 74b933694da520630977295e849254dc8a4ce6a6 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 29 Oct 2025 10:30:23 +0000 Subject: [PATCH 4/5] feat(medcat-service): Support any extra gunicorn args - add readme --- medcat-service/start_service_production.sh | 2 -- 1 file changed, 2 deletions(-) diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh index c8bcd81d6..bb42df9e4 100644 --- a/medcat-service/start_service_production.sh +++ b/medcat-service/start_service_production.sh @@ -43,8 +43,6 @@ if [ -z ${SERVER_GUNICORN_MAX_REQUESTS_JITTER+x} ]; then echo "SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS_JITTER"; fi - ${SERVER_GUNICORN_EXTRA_ARGS:-} \ - # Note - SERVER_ACCESS_LOG_FORMAT is unused when worker-class is set to UvicornWorker SERVER_ACCESS_LOG_FORMAT="%(t)s [ACCESS] %(h)s \"%(r)s\" %(s)s \"%(f)s\" \"%(a)s\"" From 887a96fd1f6c3649f05115d8a9d5337af114c9e6 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 29 Oct 2025 10:31:13 +0000 Subject: [PATCH 5/5] feat(medcat-service): Support gunicorn args - fixup log line --- medcat-service/start_service_production.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/medcat-service/start_service_production.sh b/medcat-service/start_service_production.sh index bb42df9e4..185009aeb 100644 --- a/medcat-service/start_service_production.sh +++ b/medcat-service/start_service_production.sh @@ -35,12 +35,12 @@ fi if [ -z ${SERVER_GUNICORN_MAX_REQUESTS+x} ]; then SERVER_GUNICORN_MAX_REQUESTS=1000; - echo "SERVER_GUNICORN_MAX_REQUESTS is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS"; + echo "SERVER_GUNICORN_MAX_REQUESTS is unset -- setting to default: $SERVER_GUNICORN_MAX_REQUESTS"; fi if [ -z ${SERVER_GUNICORN_MAX_REQUESTS_JITTER+x} ]; then SERVER_GUNICORN_MAX_REQUESTS_JITTER=50; - echo "SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default (sec): $SERVER_GUNICORN_MAX_REQUESTS_JITTER"; + echo "SERVER_GUNICORN_MAX_REQUESTS_JITTER is unset -- setting to default: $SERVER_GUNICORN_MAX_REQUESTS_JITTER"; fi # Note - SERVER_ACCESS_LOG_FORMAT is unused when worker-class is set to UvicornWorker