From 1d99f388ddfe40d6fa206f1a395fff367e9e533d Mon Sep 17 00:00:00 2001
From: Florian Behrens <fb@hades.ai>
Date: Sun, 16 Jul 2023 12:29:42 +0200
Subject: [PATCH 1/8] Update launch config

---
 .vscode/launch.json | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index aae38fbe91..1cc706f9e9 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -106,6 +106,38 @@
         "CUDA_VISIBLE_DEVICES": "1,2,3,4,5",
         "OMP_NUM_THREADS": "1"
       }
-    }
+    },
+    {
+      "name": "Debug: Inference Server",
+      "type": "python",
+      "request": "attach",
+      "connect": {
+        "host": "localhost",
+        "port": 5678
+      },
+      "pathMappings": [
+        {
+          "localRoot": "${workspaceFolder}/inference/server",
+          "remoteRoot": "/opt/inference/server"
+        }
+      ],
+      "justMyCode": false
+    },
+    {
+      "name": "Debug: Worker",
+      "type": "python",
+      "request": "attach",
+      "connect": {
+        "host": "localhost",
+        "port": 5679
+      },
+      "pathMappings": [
+        {
+          "localRoot": "${workspaceFolder}/inference/worker",
+          "remoteRoot": "/opt/inference/worker"
+        }
+      ],
+      "justMyCode": false
+    },
   ]
 }

From b9f814eaedebd85052a22a8a50bc7b20a8cb6d39 Mon Sep 17 00:00:00 2001
From: Florian Behrens <fb@hades.ai>
Date: Sun, 16 Jul 2023 12:32:02 +0200
Subject: [PATCH 2/8] Update compose file

---
 docker-compose.yaml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index 6497af6a1b..6ae8ce656c 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -237,6 +237,7 @@ services:
     restart: unless-stopped
     ports:
       - "8000:8000"
+      - "5678:5678"  # Port to attach debugger
     depends_on:
       inference-redis:
         condition: service_healthy
@@ -257,6 +258,8 @@ services:
     volumes:
       - "./oasst-shared:/opt/inference/lib/oasst-shared"
       - "./inference/worker:/opt/inference/worker"
+    ports:
+      - "5679:5679"  # Port to attach debugger
     deploy:
       replicas: 1
     profiles: ["inference"]

From b08dad63a0537539246f71342fe70a72d89d6ebe Mon Sep 17 00:00:00 2001
From: Florian Behrens <fb@hades.ai>
Date: Sun, 16 Jul 2023 12:44:54 +0200
Subject: [PATCH 3/8] Add config for inference server

---
 docker/inference/Dockerfile.server | 5 +++--
 inference/server/main.py           | 9 +++++++++
 inference/server/requirements.txt  | 1 +
 3 files changed, 13 insertions(+), 2 deletions(-)

diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server
index 17d69173d3..94445ca5a3 100644
--- a/docker/inference/Dockerfile.server
+++ b/docker/inference/Dockerfile.server
@@ -78,8 +78,9 @@ USER ${APP_USER}
 VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
 VOLUME [ "${APP_BASE}/lib/oasst-data" ]
 
-
-CMD uvicorn main:app --reload --host 0.0.0.0 --port "${PORT}"
+# Start the server within pydebug to allow attaching a debugger; add "--wait-for-client" if you want to halt execution
+# until the debugger has been attached
+CMD python -m pydebug --listen 0.0.0.0:5678 main.py
 
 
 
diff --git a/inference/server/main.py b/inference/server/main.py
index 711d75ee9c..8f96c8a009 100644
--- a/inference/server/main.py
+++ b/inference/server/main.py
@@ -148,3 +148,12 @@ async def maybe_add_debug_api_keys():
 async def welcome_message():
     logger.warning("Inference server started")
     logger.warning("To stop the server, press Ctrl+C")
+
+
+if __name__ == "__main__":
+    # Entrypoint for the server in dev environments
+    # pydebug needs a Python process to attach to, so we start uvicorn from Python instead of invoking it directly
+    import uvicorn
+    import os
+    port = int(os.getenv('PORT', "8000"))
+    uvicorn.run("main:app", host="0.0.0.0", port=port, reload=True)
diff --git a/inference/server/requirements.txt b/inference/server/requirements.txt
index db5045481b..d97a2c260f 100644
--- a/inference/server/requirements.txt
+++ b/inference/server/requirements.txt
@@ -4,6 +4,7 @@ asyncpg
 authlib
 beautifulsoup4 # web_retriever plugin
 cryptography==39.0.0
+debugpy
 fastapi-limiter
 fastapi[all]==0.88.0
 google-api-python-client

From 23b5d98c8f9ce5029a2b08fc651a48589552c7cf Mon Sep 17 00:00:00 2001
From: Florian Behrens <fb@hades.ai>
Date: Sun, 16 Jul 2023 13:21:47 +0200
Subject: [PATCH 4/8] Switch to running debugpy from Python

---
 docker-compose.yaml               |  2 ++
 inference/server/main.py          | 13 ++++++++++---
 inference/worker/__main__.py      | 10 ++++++++++
 inference/worker/requirements.txt |  1 +
 4 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/docker-compose.yaml b/docker-compose.yaml
index 6ae8ce656c..b44bc20235 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -231,6 +231,7 @@ services:
       TRUSTED_CLIENT_KEYS: "6969"
       ALLOW_DEBUG_AUTH: "True"
       API_ROOT: "http://localhost:8000"
+      DEBUG: "True"
     volumes:
       - "./oasst-shared:/opt/inference/lib/oasst-shared"
       - "./inference/server:/opt/inference/server"
@@ -255,6 +256,7 @@ services:
       MODEL_CONFIG_NAME: ${MODEL_CONFIG_NAME:-distilgpt2}
       BACKEND_URL: "ws://inference-server:8000"
       PARALLELISM: 2
+      DEBUG: "True"
     volumes:
       - "./oasst-shared:/opt/inference/lib/oasst-shared"
       - "./inference/worker:/opt/inference/worker"
diff --git a/inference/server/main.py b/inference/server/main.py
index 8f96c8a009..659518e817 100644
--- a/inference/server/main.py
+++ b/inference/server/main.py
@@ -151,9 +151,16 @@ async def welcome_message():
 
 
 if __name__ == "__main__":
-    # Entrypoint for the server in dev environments
-    # pydebug needs a Python process to attach to, so we start uvicorn from Python instead of invoking it directly
     import uvicorn
     import os
+
     port = int(os.getenv('PORT', "8000"))
-    uvicorn.run("main:app", host="0.0.0.0", port=port, reload=True)
+    is_debug = bool(os.getenv("DEBUG", "False"))
+
+    if is_debug:
+        import debugpy
+        debugpy.listen(("0.0.0.0", "5679"))
+        # Uncomment to wait here until a debugger is attached
+        # debugpy.wait_for_client()
+    
+    uvicorn.run("main:app", host="0.0.0.0", port=port, reload=is_debug)
diff --git a/inference/worker/__main__.py b/inference/worker/__main__.py
index 569e340276..6dacdfc0a0 100644
--- a/inference/worker/__main__.py
+++ b/inference/worker/__main__.py
@@ -4,6 +4,8 @@
 import time
 from contextlib import closing
 
+import os
+
 import pydantic
 import transformers
 import utils
@@ -130,4 +132,12 @@ def main():
 
 
 if __name__ == "__main__":
+    is_debug = bool(os.getenv("DEBUG", "False"))
+
+    if is_debug:
+        import debugpy
+        debugpy.listen(("0.0.0.0", "5679"))
+        # Uncomment to wait here until a debugger is attached
+        # debugpy.wait_for_client()
+    
     main()
diff --git a/inference/worker/requirements.txt b/inference/worker/requirements.txt
index cbb25fe322..94a7dc18e1 100644
--- a/inference/worker/requirements.txt
+++ b/inference/worker/requirements.txt
@@ -1,4 +1,5 @@
 aiohttp
+debugpy
 hf_transfer
 huggingface_hub
 langchain==0.0.142

From a2f1235c8acfaa4242c90c12eb46366a533d2e7a Mon Sep 17 00:00:00 2001
From: Florian Behrens <fb@hades.ai>
Date: Sun, 16 Jul 2023 13:23:38 +0200
Subject: [PATCH 5/8] Undo changes to Dockerfile

---
 docker/inference/Dockerfile.server | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/docker/inference/Dockerfile.server b/docker/inference/Dockerfile.server
index 94445ca5a3..a1efbe4dad 100644
--- a/docker/inference/Dockerfile.server
+++ b/docker/inference/Dockerfile.server
@@ -78,9 +78,8 @@ USER ${APP_USER}
 VOLUME [ "${APP_BASE}/lib/oasst-shared" ]
 VOLUME [ "${APP_BASE}/lib/oasst-data" ]
 
-# Start the server within pydebug to allow attaching a debugger; add "--wait-for-client" if you want to halt execution
-# until the debugger has been attached
-CMD python -m pydebug --listen 0.0.0.0:5678 main.py
+# In the dev image, we start uvicorn from Python so that we can attach the debugger
+CMD python main.py
 
 
 

From 6369f7eba2ef0da5ad6d2f08b49f1575286e2bc1 Mon Sep 17 00:00:00 2001
From: Florian Behrens <fb@hades.ai>
Date: Sun, 16 Jul 2023 13:42:54 +0200
Subject: [PATCH 6/8] Add documentation

---
 inference/README.md | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/inference/README.md b/inference/README.md
index 3f9e339ed2..a992d88de9 100644
--- a/inference/README.md
+++ b/inference/README.md
@@ -60,6 +60,22 @@ python __main__.py
 # You'll soon see a `User:` prompt, where you can type your prompts.
 ```
 
+## Debugging
+
+The inference server and the worker allow attaching a Python debugger.
+To do this from VS Code, start the inference server & worker using docker compose as described above
+(e.g. with `docker compose --profile inference up --build`), then simply pick one of the following launch
+profiles, depending on what you would like to debug:
+- Debug: Inference Server
+- Debug: Worker
+
+### Waiting for Debugger on Startup
+It can be helpful to wait for the debugger before starting the application.
+This can be achieved by uncommenting `debugpy.wait_for_client()` in the appropriate location:
+- `inference/server/main.py` for the inference server
+- `inference/worker/__main.py__` for the worker
+
+
 ## Distributed Testing
 
 We run distributed load tests using the

From 87348730a52896fd04b098df1f0c1790bb38cc90 Mon Sep 17 00:00:00 2001
From: Florian Behrens <fb@hades.ai>
Date: Wed, 2 Aug 2023 22:52:47 +0200
Subject: [PATCH 7/8] Fix pre-commit

---
 .pre-commit-config.yaml      |  2 +-
 .vscode/launch.json          |  6 +++---
 docker-compose.yaml          |  4 ++--
 inference/README.md          | 18 +++++++++++-------
 inference/server/main.py     | 10 ++++++----
 inference/worker/__main__.py |  8 ++++----
 6 files changed, 27 insertions(+), 21 deletions(-)

diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 979e8466bc..374f2d0e28 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -29,7 +29,7 @@
 exclude: build|stubs|^bot/templates/$|openassistant/templates|docs/docs/api/openapi.json|scripts/postprocessing/regex_pii_detector.py
 
 default_language_version:
-  python: python3
+  python: python3.10
 
 ci:
   autofix_prs: true
diff --git a/.vscode/launch.json b/.vscode/launch.json
index 1cc706f9e9..0600b9aab5 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -133,11 +133,11 @@
       },
       "pathMappings": [
         {
-          "localRoot": "${workspaceFolder}/inference/worker",
-          "remoteRoot": "/opt/inference/worker"
+          "localRoot": "${workspaceFolder}",
+          "remoteRoot": "/opt"
         }
       ],
       "justMyCode": false
-    },
+    }
   ]
 }
diff --git a/docker-compose.yaml b/docker-compose.yaml
index b44bc20235..af7709ff3f 100644
--- a/docker-compose.yaml
+++ b/docker-compose.yaml
@@ -238,7 +238,7 @@ services:
     restart: unless-stopped
     ports:
       - "8000:8000"
-      - "5678:5678"  # Port to attach debugger
+      - "5678:5678" # Port to attach debugger
     depends_on:
       inference-redis:
         condition: service_healthy
@@ -261,7 +261,7 @@ services:
       - "./oasst-shared:/opt/inference/lib/oasst-shared"
       - "./inference/worker:/opt/inference/worker"
     ports:
-      - "5679:5679"  # Port to attach debugger
+      - "5679:5679" # Port to attach debugger
     deploy:
       replicas: 1
     profiles: ["inference"]
diff --git a/inference/README.md b/inference/README.md
index a992d88de9..9604b309e0 100644
--- a/inference/README.md
+++ b/inference/README.md
@@ -62,20 +62,24 @@ python __main__.py
 
 ## Debugging
 
-The inference server and the worker allow attaching a Python debugger.
-To do this from VS Code, start the inference server & worker using docker compose as described above
-(e.g. with `docker compose --profile inference up --build`), then simply pick one of the following launch
-profiles, depending on what you would like to debug:
+The inference server and the worker allow attaching a Python debugger. To do
+this from VS Code, start the inference server & worker using docker compose as
+described above (e.g. with `docker compose --profile inference up --build`),
+then simply pick one of the following launch profiles, depending on what you
+would like to debug:
+
 - Debug: Inference Server
 - Debug: Worker
 
 ### Waiting for Debugger on Startup
-It can be helpful to wait for the debugger before starting the application.
-This can be achieved by uncommenting `debugpy.wait_for_client()` in the appropriate location:
+
+It can be helpful to wait for the debugger before starting the application. This
+can be achieved by uncommenting `debugpy.wait_for_client()` in the appropriate
+location:
+
 - `inference/server/main.py` for the inference server
 - `inference/worker/__main.py__` for the worker
 
-
 ## Distributed Testing
 
 We run distributed load tests using the
diff --git a/inference/server/main.py b/inference/server/main.py
index 659518e817..4b39758e37 100644
--- a/inference/server/main.py
+++ b/inference/server/main.py
@@ -151,16 +151,18 @@ async def welcome_message():
 
 
 if __name__ == "__main__":
-    import uvicorn
     import os
 
-    port = int(os.getenv('PORT', "8000"))
+    import uvicorn
+
+    port = int(os.getenv("PORT", "8000"))
     is_debug = bool(os.getenv("DEBUG", "False"))
 
     if is_debug:
         import debugpy
-        debugpy.listen(("0.0.0.0", "5679"))
+
+        debugpy.listen(("0.0.0.0", 5678))
         # Uncomment to wait here until a debugger is attached
         # debugpy.wait_for_client()
-    
+
     uvicorn.run("main:app", host="0.0.0.0", port=port, reload=is_debug)
diff --git a/inference/worker/__main__.py b/inference/worker/__main__.py
index 6dacdfc0a0..baa7f7d2cf 100644
--- a/inference/worker/__main__.py
+++ b/inference/worker/__main__.py
@@ -1,11 +1,10 @@
 import concurrent.futures
+import os
 import signal
 import sys
 import time
 from contextlib import closing
 
-import os
-
 import pydantic
 import transformers
 import utils
@@ -136,8 +135,9 @@ def main():
 
     if is_debug:
         import debugpy
-        debugpy.listen(("0.0.0.0", "5679"))
+
+        debugpy.listen(("0.0.0.0", 5679))
         # Uncomment to wait here until a debugger is attached
         # debugpy.wait_for_client()
-    
+
     main()

From 73bbce64ab84ee31443c7f371bd51c32814a208b Mon Sep 17 00:00:00 2001
From: Florian Behrens <fb@hades.ai>
Date: Wed, 2 Aug 2023 23:20:07 +0200
Subject: [PATCH 8/8] Use long form

---
 .vscode/launch.json | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.vscode/launch.json b/.vscode/launch.json
index 0600b9aab5..1a9997def9 100644
--- a/.vscode/launch.json
+++ b/.vscode/launch.json
@@ -133,8 +133,8 @@
       },
       "pathMappings": [
         {
-          "localRoot": "${workspaceFolder}",
-          "remoteRoot": "/opt"
+          "localRoot": "${workspaceFolder}/inference/worker",
+          "remoteRoot": "/opt/inference/worker"
         }
       ],
       "justMyCode": false