Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
71 changes: 71 additions & 0 deletions .github/workflows/cogstack-cohorter-docker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
name: ci-build-cohorter

on:
push:
branches: [main]
tags:
- "cohorter-v*.*.*" # e.g., cohorter-v1.2.3
paths:
- "cogstack-cohorter/**"
- ".github/workflows/cogstack-cohorter-docker**"
pull_request:
paths:
- "cogstack-cohorter/**"
- ".github/workflows/cogstack-cohorter-docker**"

jobs:
docker:
runs-on: ubuntu-latest
if: github.event.pull_request.user.login != 'dependabot[bot]' && github.repository == 'CogStack/cogstack-platform'
strategy:
matrix:
include:
- name: nl2dsl
context: cogstack-cohorter/NL2DSL
dockerfile: cogstack-cohorter/NL2DSL/Dockerfile
image: cogstacksystems/cogstack-cohorter-nl2dsl

- name: webapp
context: cogstack-cohorter/WebAPP
dockerfile: cogstack-cohorter/WebAPP/Dockerfile
image: cogstacksystems/cogstack-cohorter-webapp

steps:
- name: Checkout branch
uses: actions/checkout@v4

- name: Login to DockerHub
uses: docker/login-action@v3
with:
username: ${{ secrets.DOCKERHUB_USERNAME }}
password: ${{ secrets.DOCKERHUB_TOKEN }}

- name: Set up Docker Buildx
uses: docker/setup-buildx-action@v3

- name: Extract metadata (tags, labels) for Docker
id: meta
uses: docker/metadata-action@v5
with:
images: ${{ matrix.image }}
tags: |
# latest tag on the default branch
type=raw,value=latest,enable={{is_default_branch}}
# Include all default tags
type=schedule
type=ref,event=branch
type=ref,event=tag
type=ref,event=pr
type=sha

- name: Build and push Docker image
id: push
uses: docker/build-push-action@v6
with:
context: ${{ matrix.context }}
file: ${{ matrix.dockerfile }}
push: ${{ github.event_name != 'pull_request' }}
tags: ${{ steps.meta.outputs.tags }}
labels: ${{ steps.meta.outputs.labels }}
cache-from: type=registry,ref=${{ matrix.image }}:buildcache
cache-to: type=registry,ref=${{ matrix.image }}:buildcache,mode=max
7 changes: 7 additions & 0 deletions cogstack-cohorter/WebAPP/.dockerignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
node_modules/
client/node_modules/
client-react/node_modules/
server/node_modules/

# Data files are never baked into the image — supply them via volume mount at runtime
server/data/
12 changes: 6 additions & 6 deletions cogstack-cohorter/WebAPP/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,13 +1,13 @@
FROM node:latest
WORKDIR /usr/src/app
COPY . .

RUN cd /usr/src/app/server/data && tar xzvf snomed_terms_data.tar.gz
COPY . .
RUN cd /usr/src/app/client-react && npm install && npm run build
RUN cd /usr/src/app/server && npm install

ARG random
RUN if [ "$random" = "true" ] ; then cd /usr/src/app/server/data && node --max-old-space-size=32768 gen_random_data.js ; fi
COPY entrypoint.sh /entrypoint.sh
RUN sed -i 's/\r$//' /entrypoint.sh && chmod +x /entrypoint.sh

RUN cd /usr/src/app/server && npm install
EXPOSE 3000
WORKDIR /usr/src/app/server
CMD ["node", "--max-old-space-size=32768", "server.js"]
ENTRYPOINT ["/entrypoint.sh"]
28 changes: 28 additions & 0 deletions cogstack-cohorter/WebAPP/entrypoint.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
#!/bin/sh
set -e

DATA_DIR=/usr/src/app/server/data

# ── Step 1: extract archive if JSON data isn't already present ────────────────
if [ ! -f "$DATA_DIR/snomed_terms.json" ]; then
if [ -f "$DATA_DIR/snomed_terms_data.tar.gz" ]; then
echo "[webapp] Extracting SNOMED data archive..."
tar xzvf "$DATA_DIR/snomed_terms_data.tar.gz" -C "$DATA_DIR"
else
echo "[webapp] ERROR: No data found at $DATA_DIR." >&2
echo "[webapp] Mount a directory containing snomed_terms.json (and related files)" >&2
echo "[webapp] or snomed_terms_data.tar.gz via a Docker volume:" >&2
echo "[webapp] -v /your/data:/usr/src/app/server/data" >&2
exit 1
fi
fi

# ── Step 2 (optional): generate random patient data ───────────────────────────
# Set RANDOM_DATA=true in the container environment to generate synthetic data.
if [ "${RANDOM_DATA}" = "true" ]; then
echo "[webapp] Generating random demo patient data..."
node --max-old-space-size=32768 /usr/src/app/server/gen_random_data.js
fi

# ── Step 3: start the server ──────────────────────────────────────────────────
exec node --max-old-space-size=32768 server.js
66 changes: 66 additions & 0 deletions cogstack-cohorter/WebAPP/server/gen_random_data.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
// Use this script to generate random data for the app
// Run this script with the command:
// node --max-old-space-size=32768 gen_random_data.js
console.log('Generating random data')
const fs = require('fs');
const snomed_terms = require('./data/snomed_terms.json');

// Returns a random integer between min (inclusive) and max (inclusive).
function random_int(min, max) {
min = Math.ceil(min);
max = Math.floor(max);
return Math.floor(Math.random() * (max - min + 1)) + min;
}

const sex_id2code = ['Male', 'Female', 'Unknown']
const eth_id2code = ['Asian', 'Black', 'White', 'Mixed', 'Other', 'Unknown'];
let ptt2age = {};
let ptt2sex = {};
let ptt2eth = {};
let ptt2dod = {};
let cui2ptt_pos = {};
let cui2ptt_tsp = {};

let ptt_num = 100000;
let max_ptt = 1000; // max. number of ptt a term can have
let max_age = 100;
let die_pct = 10; // percentage of died ptt = 1 / die_pct

// generate ptt_num random patient data
for (let i=0;i<ptt_num;i++) {
ptt2age[i] = random_int(0,max_age);
ptt2sex[i] = sex_id2code[random_int(0,sex_id2code.length-1)];
ptt2eth[i] = eth_id2code[random_int(0,eth_id2code.length-1)];
ptt2dod[i] = random_int(0,die_pct) == 0 ? random_int(Math.floor(Date.now()/1000) - (60*60*24*365*10), Math.floor(Date.now()/1000)) : 0;
if (i%100000 == 0) console.log('ptt:', i, `${Math.floor((i/ptt_num)*100)}%`);
}

// for each snomed terms, generate some random mention data
for (let i=0;i<snomed_terms.length;i++) {
if (snomed_terms[i]['str'].search('(disorder)')==-1 && snomed_terms[i]['str'].search('(finding)')==-1 &&
snomed_terms[i]['str'].search('(procedure)')==-1 && snomed_terms[i]['str'].search('(substance)')==-1)
continue;
let picked = {};
cui2ptt_pos[i] = {};
cui2ptt_tsp[i] = {};
for (let j=0;j<random_int(0,max_ptt);j++) {
let ptt = random_int(0, ptt_num-1);
while (picked[ptt]) ptt = random_int(0, ptt_num-1);
picked[ptt] = true;
cui2ptt_pos[i][ptt] = random_int(1,100);
cui2ptt_tsp[i][ptt] = random_int(Math.floor(Date.now()/1000) - (60*60*24*365*10), Math.floor(Date.now()/1000));
}
if (i%100000 == 0) console.log('men:', i, `${Math.floor((i/snomed_terms.length)*100)}%`);
}

// write to files
console.log('Writing to files...')
fs.writeFileSync('data/ptt2age.json', JSON.stringify(ptt2age));
fs.writeFileSync('data/ptt2sex.json', JSON.stringify(ptt2sex));
fs.writeFileSync('data/ptt2eth.json', JSON.stringify(ptt2eth));
fs.writeFileSync('data/ptt2dod.json', JSON.stringify(ptt2dod));
const pos_out = fs.createWriteStream('data/cui2ptt_pos.jsonl', {flags: 'w'});
const tsp_out = fs.createWriteStream('data/cui2ptt_tsp.jsonl', {flags: 'w'});
Object.keys(cui2ptt_pos).forEach( k => { pos_out.write(`{"${snomed_terms[k]['cui']}":` + JSON.stringify(cui2ptt_pos[k]) + '}\n'); });
Object.keys(cui2ptt_tsp).forEach( k => { tsp_out.write(`{"${snomed_terms[k]['cui']}":` + JSON.stringify(cui2ptt_tsp[k]) + '}\n'); });
console.log('Finished generating random data')
4 changes: 2 additions & 2 deletions cogstack-cohorter/WebAPP/server/server.js
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ app.use(session({
}))


const NL2DSL_SERVER = process.env.NL2DSL_SERVER || "http://localhost:4000/api/compile";
const NL2DSL_SERVER = process.env.NL2DSL_SERVER || "http://localhost:3002/api/compile";

let port = process.env.PORT || 3000;
console.log('Loading data...');
Expand Down Expand Up @@ -181,7 +181,7 @@ for (let i=0;i<all_ptt_cnt;i++) ptt2cui_tsp_arr[i] = {};
}
console.log('Finished reading cui2ptt_tsp');
console.timeEnd('cui2ptt_tsp');
console.log('Loading data...Finsihed');
console.log('Finished loading data');
console.log(`Access the app on port ${port}`);
})();
//========================================================
Expand Down
29 changes: 25 additions & 4 deletions cogstack-cohorter/docker-compose.yml
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,22 @@ services:
- ollama:/root/.ollama
restart: unless-stopped

ollama-init:
image: ollama/ollama:latest
container_name: ollama-pull
entrypoint: >
sh -c "
until ollama list > /dev/null 2>&1; do
echo '[ollama-pull] Waiting for Ollama...'; sleep 2;
done &&
ollama pull ${OLLAMA_MODEL:-gpt-oss:20b}
"
environment:
OLLAMA_HOST: "http://ollama:11434"
depends_on:
- ollama
restart: "no"

medcat:
image: cogstacksystems/medcat-service:latest
container_name: cohorter-medcat
Expand All @@ -32,20 +48,25 @@ services:
MEDCAT_URL: "http://cohorter-medcat:5000"
ALLOW_ORIGINS: "*"
depends_on:
- ollama
- ollama-init
- medcat
restart: unless-stopped

webapp:
build:
context: ./WebAPP
args:
random: "false" # set to "true" to run gen_random_data.js during build
container_name: cohorter-webapp
ports:
- "3000:3000"
environment:
NL2DSL_URL: "http://cohorter-nl2dsl:3002"
NL2DSL_SERVER: "http://cohorter-nl2dsl:3002/api/compile"
# Set to "true" to generate synthetic patient data on startup.
RANDOM_DATA: "true"
volumes:
# Mount your data directory here. It should contain either:
# - snomed_terms_data.tar.gz (will be auto-extracted on startup), or
# - pre-extracted files: snomed_terms.json (SNOMED term lookup by Concept Unique Identifier) and cui_pt2ch.json (SNOMED CT ontology hierachy map).
- ./WebAPP/server/data:/usr/src/app/server/data
depends_on:
- medcat
- nl2dsl
Expand Down
Loading