From 1d99f388ddfe40d6fa206f1a395fff367e9e533d Mon Sep 17 00:00:00 2001 From: Florian Behrens Date: Sun, 16 Jul 2023 12:29:42 +0200 Subject: [PATCH 1/8] Update launch config --- .vscode/launch.json | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index aae38fbe91..1cc706f9e9 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -106,6 +106,38 @@ "CUDA_VISIBLE_DEVICES": "1,2,3,4,5", "OMP_NUM_THREADS": "1" } - } + }, + { + "name": "Debug: Inference Server", + "type": "python", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5678 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/inference/server", + "remoteRoot": "/opt/inference/server" + } + ], + "justMyCode": false + }, + { + "name": "Debug: Worker", + "type": "python", + "request": "attach", + "connect": { + "host": "localhost", + "port": 5679 + }, + "pathMappings": [ + { + "localRoot": "${workspaceFolder}/inference/worker", + "remoteRoot": "/opt/inference/worker" + } + ], + "justMyCode": false + }, ] } From b9f814eaedebd85052a22a8a50bc7b20a8cb6d39 Mon Sep 17 00:00:00 2001 From: Florian Behrens Date: Sun, 16 Jul 2023 12:32:02 +0200 Subject: [PATCH 2/8] Update compose file --- docker-compose.yaml | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docker-compose.yaml b/docker-compose.yaml index 6497af6a1b..6ae8ce656c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -237,6 +237,7 @@ services: restart: unless-stopped ports: - "8000:8000" + - "5678:5678" # Port to attach debugger depends_on: inference-redis: condition: service_healthy @@ -257,6 +258,8 @@ services: volumes: - "./oasst-shared:/opt/inference/lib/oasst-shared" - "./inference/worker:/opt/inference/worker" + ports: + - "5679:5679" # Port to attach debugger deploy: replicas: 1 profiles: ["inference"] From b08dad63a0537539246f71342fe70a72d89d6ebe Mon Sep 17 00:00:00 2001 From: Florian Behrens Date: Sun, 16 Jul 2023 12:44:54 +0200 Subject: [PATCH 3/8] Add config for inference server --- docker/inference/Dockerfile.server | 5 +++-- inference/server/main.py | 9 +++++++++ inference/server/requirements.txt | 1 + 3 files changed, 13 insertions(+), 2 deletions(-) diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server index 17d69173d3..94445ca5a3 100644 --- a/docker/inference/Dockerfile.server +++ b/docker/inference/Dockerfile.server @@ -78,8 +78,9 @@ USER ${APP_USER} VOLUME [ "${APP_BASE}/lib/oasst-shared" ] VOLUME [ "${APP_BASE}/lib/oasst-data" ] - -CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}" +# Start the server within pydebug to allow attaching a debugger; add "--wait-for-client" if you want to halt execution +# until the debugger has been attached +CMD python -m pydebug --listen 0.0.0.0:5678 main.py diff --git a/inference/server/main.py b/inference/server/main.py index 711d75ee9c..8f96c8a009 100644 --- a/inference/server/main.py +++ b/inference/server/main.py @@ -148,3 +148,12 @@ async def maybe_add_debug_api_keys(): async def welcome_message(): logger.warning("Inference server started") logger.warning("To stop the server, press Ctrl+C") + + +if __name__ == "__main__": + # Entrypoint for the server in dev environments + # pydebug needs a Python process to attach to, so we start uvicorn from Python instead of invoking it directly + import uvicorn + import os + port = int(os.getenv('PORT', "8000")) + uvicorn.run("main:app", host="0.0.0.0", port=port, reload=True) diff --git a/inference/server/requirements.txt b/inference/server/requirements.txt index db5045481b..d97a2c260f 100644 --- a/inference/server/requirements.txt +++ b/inference/server/requirements.txt @@ -4,6 +4,7 @@ asyncpg authlib beautifulsoup4 # web_retriever plugin cryptography==39.0.0 +debugpy fastapi-limiter fastapi[all]==0.88.0 google-api-python-client From 23b5d98c8f9ce5029a2b08fc651a48589552c7cf Mon Sep 17 00:00:00 2001 From: Florian Behrens Date: Sun, 16 Jul 2023 13:21:47 +0200 Subject: [PATCH 4/8] Switch to running debugpy from Python --- docker-compose.yaml | 2 ++ inference/server/main.py | 13 ++++++++++--- inference/worker/__main__.py | 10 ++++++++++ inference/worker/requirements.txt | 1 + 4 files changed, 23 insertions(+), 3 deletions(-) diff --git a/docker-compose.yaml b/docker-compose.yaml index 6ae8ce656c..b44bc20235 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -231,6 +231,7 @@ services: TRUSTED_CLIENT_KEYS: "6969" ALLOW_DEBUG_AUTH: "True" API_ROOT: "http://localhost:8000" + DEBUG: "True" volumes: - "./oasst-shared:/opt/inference/lib/oasst-shared" - "./inference/server:/opt/inference/server" @@ -255,6 +256,7 @@ services: MODEL_CONFIG_NAME: ${MODEL_CONFIG_NAME:-distilgpt2} BACKEND_URL: "ws://inference-server:8000" PARALLELISM: 2 + DEBUG: "True" volumes: - "./oasst-shared:/opt/inference/lib/oasst-shared" - "./inference/worker:/opt/inference/worker" diff --git a/inference/server/main.py b/inference/server/main.py index 8f96c8a009..659518e817 100644 --- a/inference/server/main.py +++ b/inference/server/main.py @@ -151,9 +151,16 @@ async def welcome_message(): if __name__ == "__main__": - # Entrypoint for the server in dev environments - # pydebug needs a Python process to attach to, so we start uvicorn from Python instead of invoking it directly import uvicorn import os + port = int(os.getenv('PORT', "8000")) - uvicorn.run("main:app", host="0.0.0.0", port=port, reload=True) + is_debug = bool(os.getenv("DEBUG", "False")) + + if is_debug: + import debugpy + debugpy.listen(("0.0.0.0", "5679")) + # Uncomment to wait here until a debugger is attached + # debugpy.wait_for_client() + + uvicorn.run("main:app", host="0.0.0.0", port=port, reload=is_debug) diff --git a/inference/worker/__main__.py b/inference/worker/__main__.py index 569e340276..6dacdfc0a0 100644 --- a/inference/worker/__main__.py +++ b/inference/worker/__main__.py @@ -4,6 +4,8 @@ import time from contextlib import closing +import os + import pydantic import transformers import utils @@ -130,4 +132,12 @@ def main(): if __name__ == "__main__": + is_debug = bool(os.getenv("DEBUG", "False")) + + if is_debug: + import debugpy + debugpy.listen(("0.0.0.0", "5679")) + # Uncomment to wait here until a debugger is attached + # debugpy.wait_for_client() + main() diff --git a/inference/worker/requirements.txt b/inference/worker/requirements.txt index cbb25fe322..94a7dc18e1 100644 --- a/inference/worker/requirements.txt +++ b/inference/worker/requirements.txt @@ -1,4 +1,5 @@ aiohttp +debugpy hf_transfer huggingface_hub langchain==0.0.142 From a2f1235c8acfaa4242c90c12eb46366a533d2e7a Mon Sep 17 00:00:00 2001 From: Florian Behrens Date: Sun, 16 Jul 2023 13:23:38 +0200 Subject: [PATCH 5/8] Undo changes to Dockerfile --- docker/inference/Dockerfile.server | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server index 94445ca5a3..a1efbe4dad 100644 --- a/docker/inference/Dockerfile.server +++ b/docker/inference/Dockerfile.server @@ -78,9 +78,8 @@ USER ${APP_USER} VOLUME [ "${APP_BASE}/lib/oasst-shared" ] VOLUME [ "${APP_BASE}/lib/oasst-data" ] -# Start the server within pydebug to allow attaching a debugger; add "--wait-for-client" if you want to halt execution -# until the debugger has been attached -CMD python -m pydebug --listen 0.0.0.0:5678 main.py +# In the dev image, we start uvicorn from Python so that we can attach the debugger +CMD python main.py From 6369f7eba2ef0da5ad6d2f08b49f1575286e2bc1 Mon Sep 17 00:00:00 2001 From: Florian Behrens Date: Sun, 16 Jul 2023 13:42:54 +0200 Subject: [PATCH 6/8] Add documentation --- inference/README.md | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/inference/README.md b/inference/README.md index 3f9e339ed2..a992d88de9 100644 --- a/inference/README.md +++ b/inference/README.md @@ -60,6 +60,22 @@ python __main__.py # You'll soon see a `User:` prompt, where you can type your prompts. ``` +## Debugging + +The inference server and the worker allow attaching a Python debugger. +To do this from VS Code, start the inference server & worker using docker compose as described above +(e.g. with `docker compose --profile inference up --build`), then simply pick one of the following launch +profiles, depending on what you would like to debug: +- Debug: Inference Server +- Debug: Worker + +### Waiting for Debugger on Startup +It can be helpful to wait for the debugger before starting the application. +This can be achieved by uncommenting `debugpy.wait_for_client()` in the appropriate location: +- `inference/server/main.py` for the inference server +- `inference/worker/__main.py__` for the worker + + ## Distributed Testing We run distributed load tests using the From 87348730a52896fd04b098df1f0c1790bb38cc90 Mon Sep 17 00:00:00 2001 From: Florian Behrens Date: Wed, 2 Aug 2023 22:52:47 +0200 Subject: [PATCH 7/8] Fix pre-commit --- .pre-commit-config.yaml | 2 +- .vscode/launch.json | 6 +++--- docker-compose.yaml | 4 ++-- inference/README.md | 18 +++++++++++------- inference/server/main.py | 10 ++++++---- inference/worker/__main__.py | 8 ++++---- 6 files changed, 27 insertions(+), 21 deletions(-) diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index 979e8466bc..374f2d0e28 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -29,7 +29,7 @@ exclude: build|stubs|^bot/templates/$|openassistant/templates|docs/docs/api/openapi.json|scripts/postprocessing/regex_pii_detector.py default_language_version: - python: python3 + python: python3.10 ci: autofix_prs: true diff --git a/.vscode/launch.json b/.vscode/launch.json index 1cc706f9e9..0600b9aab5 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -133,11 +133,11 @@ }, "pathMappings": [ { - "localRoot": "${workspaceFolder}/inference/worker", - "remoteRoot": "/opt/inference/worker" + "localRoot": "${workspaceFolder}", + "remoteRoot": "/opt" } ], "justMyCode": false - }, + } ] } diff --git a/docker-compose.yaml b/docker-compose.yaml index b44bc20235..af7709ff3f 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -238,7 +238,7 @@ services: restart: unless-stopped ports: - "8000:8000" - - "5678:5678" # Port to attach debugger + - "5678:5678" # Port to attach debugger depends_on: inference-redis: condition: service_healthy @@ -261,7 +261,7 @@ services: - "./oasst-shared:/opt/inference/lib/oasst-shared" - "./inference/worker:/opt/inference/worker" ports: - - "5679:5679" # Port to attach debugger + - "5679:5679" # Port to attach debugger deploy: replicas: 1 profiles: ["inference"] diff --git a/inference/README.md b/inference/README.md index a992d88de9..9604b309e0 100644 --- a/inference/README.md +++ b/inference/README.md @@ -62,20 +62,24 @@ python __main__.py ## Debugging -The inference server and the worker allow attaching a Python debugger. -To do this from VS Code, start the inference server & worker using docker compose as described above -(e.g. with `docker compose --profile inference up --build`), then simply pick one of the following launch -profiles, depending on what you would like to debug: +The inference server and the worker allow attaching a Python debugger. To do +this from VS Code, start the inference server & worker using docker compose as +described above (e.g. with `docker compose --profile inference up --build`), +then simply pick one of the following launch profiles, depending on what you +would like to debug: + - Debug: Inference Server - Debug: Worker ### Waiting for Debugger on Startup -It can be helpful to wait for the debugger before starting the application. -This can be achieved by uncommenting `debugpy.wait_for_client()` in the appropriate location: + +It can be helpful to wait for the debugger before starting the application. This +can be achieved by uncommenting `debugpy.wait_for_client()` in the appropriate +location: + - `inference/server/main.py` for the inference server - `inference/worker/__main.py__` for the worker - ## Distributed Testing We run distributed load tests using the diff --git a/inference/server/main.py b/inference/server/main.py index 659518e817..4b39758e37 100644 --- a/inference/server/main.py +++ b/inference/server/main.py @@ -151,16 +151,18 @@ async def welcome_message(): if __name__ == "__main__": - import uvicorn import os - port = int(os.getenv('PORT', "8000")) + import uvicorn + + port = int(os.getenv("PORT", "8000")) is_debug = bool(os.getenv("DEBUG", "False")) if is_debug: import debugpy - debugpy.listen(("0.0.0.0", "5679")) + + debugpy.listen(("0.0.0.0", 5678)) # Uncomment to wait here until a debugger is attached # debugpy.wait_for_client() - + uvicorn.run("main:app", host="0.0.0.0", port=port, reload=is_debug) diff --git a/inference/worker/__main__.py b/inference/worker/__main__.py index 6dacdfc0a0..baa7f7d2cf 100644 --- a/inference/worker/__main__.py +++ b/inference/worker/__main__.py @@ -1,11 +1,10 @@ import concurrent.futures +import os import signal import sys import time from contextlib import closing -import os - import pydantic import transformers import utils @@ -136,8 +135,9 @@ def main(): if is_debug: import debugpy - debugpy.listen(("0.0.0.0", "5679")) + + debugpy.listen(("0.0.0.0", 5679)) # Uncomment to wait here until a debugger is attached # debugpy.wait_for_client() - + main() From 73bbce64ab84ee31443c7f371bd51c32814a208b Mon Sep 17 00:00:00 2001 From: Florian Behrens Date: Wed, 2 Aug 2023 23:20:07 +0200 Subject: [PATCH 8/8] Use long form --- .vscode/launch.json | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.vscode/launch.json b/.vscode/launch.json index 0600b9aab5..1a9997def9 100644 --- a/.vscode/launch.json +++ b/.vscode/launch.json @@ -133,8 +133,8 @@ }, "pathMappings": [ { - "localRoot": "${workspaceFolder}", - "remoteRoot": "/opt" + "localRoot": "${workspaceFolder}/inference/worker", + "remoteRoot": "/opt/inference/worker" } ], "justMyCode": false