diff --git a/.github/workflows/cogstack-cohorter-docker.yml b/.github/workflows/cogstack-cohorter-docker.yml new file mode 100644 index 0000000..d648ef6 --- /dev/null +++ b/.github/workflows/cogstack-cohorter-docker.yml @@ -0,0 +1,71 @@ +name: ci-build-cohorter + +on: + push: + branches: [main] + tags: + - "cohorter-v*.*.*" # e.g., cohorter-v1.2.3 + paths: + - "cogstack-cohorter/**" + - ".github/workflows/cogstack-cohorter-docker**" + pull_request: + paths: + - "cogstack-cohorter/**" + - ".github/workflows/cogstack-cohorter-docker**" + +jobs: + docker: + runs-on: ubuntu-latest + if: github.event.pull_request.user.login != 'dependabot[bot]' && github.repository == 'CogStack/cogstack-platform' + strategy: + matrix: + include: + - name: nl2dsl + context: cogstack-cohorter/NL2DSL + dockerfile: cogstack-cohorter/NL2DSL/Dockerfile + image: cogstacksystems/cogstack-cohorter-nl2dsl + + - name: webapp + context: cogstack-cohorter/WebAPP + dockerfile: cogstack-cohorter/WebAPP/Dockerfile + image: cogstacksystems/cogstack-cohorter-webapp + + steps: + - name: Checkout branch + uses: actions/checkout@v4 + + - name: Login to DockerHub + uses: docker/login-action@v3 + with: + username: ${{ secrets.DOCKERHUB_USERNAME }} + password: ${{ secrets.DOCKERHUB_TOKEN }} + + - name: Set up Docker Buildx + uses: docker/setup-buildx-action@v3 + + - name: Extract metadata (tags, labels) for Docker + id: meta + uses: docker/metadata-action@v5 + with: + images: ${{ matrix.image }} + tags: | + # latest tag on the default branch + type=raw,value=latest,enable={{is_default_branch}} + # Include all default tags + type=schedule + type=ref,event=branch + type=ref,event=tag + type=ref,event=pr + type=sha + + - name: Build and push Docker image + id: push + uses: docker/build-push-action@v6 + with: + context: ${{ matrix.context }} + file: ${{ matrix.dockerfile }} + push: ${{ github.event_name != 'pull_request' }} + tags: ${{ steps.meta.outputs.tags }} + labels: ${{ steps.meta.outputs.labels }} + cache-from: type=registry,ref=${{ matrix.image }}:buildcache + cache-to: type=registry,ref=${{ matrix.image }}:buildcache,mode=max diff --git a/cogstack-cohorter/WebAPP/.dockerignore b/cogstack-cohorter/WebAPP/.dockerignore new file mode 100644 index 0000000..087b685 --- /dev/null +++ b/cogstack-cohorter/WebAPP/.dockerignore @@ -0,0 +1,7 @@ +node_modules/ +client/node_modules/ +client-react/node_modules/ +server/node_modules/ + +# Data files are never baked into the image — supply them via volume mount at runtime +server/data/ diff --git a/cogstack-cohorter/WebAPP/Dockerfile b/cogstack-cohorter/WebAPP/Dockerfile index 421a824..7a10318 100644 --- a/cogstack-cohorter/WebAPP/Dockerfile +++ b/cogstack-cohorter/WebAPP/Dockerfile @@ -1,13 +1,13 @@ FROM node:latest WORKDIR /usr/src/app -COPY . . -RUN cd /usr/src/app/server/data && tar xzvf snomed_terms_data.tar.gz +COPY . . +RUN cd /usr/src/app/client-react && npm install && npm run build +RUN cd /usr/src/app/server && npm install -ARG random -RUN if [ "$random" = "true" ] ; then cd /usr/src/app/server/data && node --max-old-space-size=32768 gen_random_data.js ; fi +COPY entrypoint.sh /entrypoint.sh +RUN sed -i 's/\r$//' /entrypoint.sh && chmod +x /entrypoint.sh -RUN cd /usr/src/app/server && npm install EXPOSE 3000 WORKDIR /usr/src/app/server -CMD ["node", "--max-old-space-size=32768", "server.js"] +ENTRYPOINT ["/entrypoint.sh"] diff --git a/cogstack-cohorter/WebAPP/entrypoint.sh b/cogstack-cohorter/WebAPP/entrypoint.sh new file mode 100644 index 0000000..b5db8b6 --- /dev/null +++ b/cogstack-cohorter/WebAPP/entrypoint.sh @@ -0,0 +1,28 @@ +#!/bin/sh +set -e + +DATA_DIR=/usr/src/app/server/data + +# ── Step 1: extract archive if JSON data isn't already present ──────────────── +if [ ! -f "$DATA_DIR/snomed_terms.json" ]; then + if [ -f "$DATA_DIR/snomed_terms_data.tar.gz" ]; then + echo "[webapp] Extracting SNOMED data archive..." + tar xzvf "$DATA_DIR/snomed_terms_data.tar.gz" -C "$DATA_DIR" + else + echo "[webapp] ERROR: No data found at $DATA_DIR." >&2 + echo "[webapp] Mount a directory containing snomed_terms.json (and related files)" >&2 + echo "[webapp] or snomed_terms_data.tar.gz via a Docker volume:" >&2 + echo "[webapp] -v /your/data:/usr/src/app/server/data" >&2 + exit 1 + fi +fi + +# ── Step 2 (optional): generate random patient data ─────────────────────────── +# Set RANDOM_DATA=true in the container environment to generate synthetic data. +if [ "${RANDOM_DATA}" = "true" ]; then + echo "[webapp] Generating random demo patient data..." + node --max-old-space-size=32768 /usr/src/app/server/gen_random_data.js +fi + +# ── Step 3: start the server ────────────────────────────────────────────────── +exec node --max-old-space-size=32768 server.js diff --git a/cogstack-cohorter/WebAPP/server/gen_random_data.js b/cogstack-cohorter/WebAPP/server/gen_random_data.js new file mode 100644 index 0000000..d242e75 --- /dev/null +++ b/cogstack-cohorter/WebAPP/server/gen_random_data.js @@ -0,0 +1,66 @@ +// Use this script to generate random data for the app +// Run this script with the command: +// node --max-old-space-size=32768 gen_random_data.js +console.log('Generating random data') +const fs = require('fs'); +const snomed_terms = require('./data/snomed_terms.json'); + +// Returns a random integer between min (inclusive) and max (inclusive). +function random_int(min, max) { + min = Math.ceil(min); + max = Math.floor(max); + return Math.floor(Math.random() * (max - min + 1)) + min; +} + +const sex_id2code = ['Male', 'Female', 'Unknown'] +const eth_id2code = ['Asian', 'Black', 'White', 'Mixed', 'Other', 'Unknown']; +let ptt2age = {}; +let ptt2sex = {}; +let ptt2eth = {}; +let ptt2dod = {}; +let cui2ptt_pos = {}; +let cui2ptt_tsp = {}; + +let ptt_num = 100000; +let max_ptt = 1000; // max. number of ptt a term can have +let max_age = 100; +let die_pct = 10; // percentage of died ptt = 1 / die_pct + +// generate ptt_num random patient data +for (let i=0;i { pos_out.write(`{"${snomed_terms[k]['cui']}":` + JSON.stringify(cui2ptt_pos[k]) + '}\n'); }); +Object.keys(cui2ptt_tsp).forEach( k => { tsp_out.write(`{"${snomed_terms[k]['cui']}":` + JSON.stringify(cui2ptt_tsp[k]) + '}\n'); }); +console.log('Finished generating random data') \ No newline at end of file diff --git a/cogstack-cohorter/WebAPP/server/server.js b/cogstack-cohorter/WebAPP/server/server.js index e1157b8..744c597 100644 --- a/cogstack-cohorter/WebAPP/server/server.js +++ b/cogstack-cohorter/WebAPP/server/server.js @@ -28,7 +28,7 @@ app.use(session({ })) -const NL2DSL_SERVER = process.env.NL2DSL_SERVER || "http://localhost:4000/api/compile"; +const NL2DSL_SERVER = process.env.NL2DSL_SERVER || "http://localhost:3002/api/compile"; let port = process.env.PORT || 3000; console.log('Loading data...'); @@ -181,7 +181,7 @@ for (let i=0;i + sh -c " + until ollama list > /dev/null 2>&1; do + echo '[ollama-pull] Waiting for Ollama...'; sleep 2; + done && + ollama pull ${OLLAMA_MODEL:-gpt-oss:20b} + " + environment: + OLLAMA_HOST: "http://ollama:11434" + depends_on: + - ollama + restart: "no" + medcat: image: cogstacksystems/medcat-service:latest container_name: cohorter-medcat @@ -32,20 +48,25 @@ services: MEDCAT_URL: "http://cohorter-medcat:5000" ALLOW_ORIGINS: "*" depends_on: - - ollama + - ollama-init - medcat restart: unless-stopped webapp: build: context: ./WebAPP - args: - random: "false" # set to "true" to run gen_random_data.js during build container_name: cohorter-webapp ports: - "3000:3000" environment: - NL2DSL_URL: "http://cohorter-nl2dsl:3002" + NL2DSL_SERVER: "http://cohorter-nl2dsl:3002/api/compile" + # Set to "true" to generate synthetic patient data on startup. + RANDOM_DATA: "true" + volumes: + # Mount your data directory here. It should contain either: + # - snomed_terms_data.tar.gz (will be auto-extracted on startup), or + # - pre-extracted files: snomed_terms.json (SNOMED term lookup by Concept Unique Identifier) and cui_pt2ch.json (SNOMED CT ontology hierachy map). + - ./WebAPP/server/data:/usr/src/app/server/data depends_on: - medcat - nl2dsl