From a8b98dcf1a9cbe18214966e5ed05db854f45ffec Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 4 Sep 2025 20:58:37 +0000 Subject: [PATCH 01/14] ops: Creating Helm Chart for Medcat Trainer. Initial k8s manifests --- .../api-db-backup-persistentvolumeclaim.yaml | 12 ++ .../api-db-persistentvolumeclaim.yaml | 12 ++ .../api-media-persistentvolumeclaim.yaml | 12 ++ .../api-static-persistentvolumeclaim.yaml | 12 ++ .../medcat-trainer-helm/env-configmap.yaml | 27 +++ ...ttrainer-claim4-persistentvolumeclaim.yaml | 12 ++ ...ttrainer-claim5-persistentvolumeclaim.yaml | 12 ++ .../medcattrainer-deployment.yaml | 171 +++++++++++++++++ .../medcattrainer-service.yaml | 16 ++ .../medcat-trainer-helm/nginx-configmap.yaml | 172 ++++++++++++++++++ .../medcat-trainer-helm/nginx-deployment.yaml | 155 ++++++++++++++++ .../medcat-trainer-helm/nginx-service.yaml | 16 ++ .../original/docker-compose.yml | 58 ++++++ .../medcat-trainer-helm/original/envs/env | 46 +++++ .../medcat-trainer-helm/original/nginx.conf | 135 ++++++++++++++ .../original/sites-enabled/medcattrainer | 30 +++ .../solr-data-persistentvolumeclaim.yaml | 12 ++ .../medcat-trainer-helm/solr-deployment.yaml | 141 ++++++++++++++ .../medcat-trainer-helm/solr-service.yaml | 16 ++ .../supervisord-configmap.yaml | 35 ++++ 20 files changed, 1102 insertions(+) create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/api-db-backup-persistentvolumeclaim.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/api-db-persistentvolumeclaim.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/api-media-persistentvolumeclaim.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/api-static-persistentvolumeclaim.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim4-persistentvolumeclaim.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim5-persistentvolumeclaim.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-service.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/nginx-configmap.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/nginx-deployment.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/nginx-service.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/original/docker-compose.yml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/original/envs/env create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/original/nginx.conf create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/original/sites-enabled/medcattrainer create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/solr-data-persistentvolumeclaim.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/solr-deployment.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/solr-service.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/supervisord-configmap.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/api-db-backup-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/api-db-backup-persistentvolumeclaim.yaml new file mode 100644 index 0000000..54de4e9 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/api-db-backup-persistentvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + io.kompose.service: api-db-backup + name: api-db-backup +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/api-db-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/api-db-persistentvolumeclaim.yaml new file mode 100644 index 0000000..0b90c73 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/api-db-persistentvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + io.kompose.service: api-db + name: api-db +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/api-media-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/api-media-persistentvolumeclaim.yaml new file mode 100644 index 0000000..9008a95 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/api-media-persistentvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + io.kompose.service: api-media + name: api-media +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/api-static-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/api-static-persistentvolumeclaim.yaml new file mode 100644 index 0000000..f11a863 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/api-static-persistentvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + io.kompose.service: api-static + name: api-static +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml new file mode 100644 index 0000000..2799762 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml @@ -0,0 +1,27 @@ +apiVersion: v1 +data: + CONCEPT_SEARCH_SERVICE_HOST: solr + CONCEPT_SEARCH_SERVICE_PORT: "8983" + CSRF_TRUSTED_ORIGINS: "" + DB_BACKUP_DIR: /home/api/db-backup + DB_DIR: /home/api/db + DB_PATH: /home/api/db/db.sqlite3 + DEBUG: "1" + EMAIL_HOST: mail.cogstack.org + EMAIL_PASS: to be changed + EMAIL_PORT: "465" + EMAIL_USER: example@cogstack.org + ENV: non-prod + LOAD_EXAMPLES: "1" + LOAD_NUM_DOC_PAGES: "10" + MAX_DATASET_SIZE: "10000" + MAX_MEDCAT_MODELS: "2" + MEDCAT_CONFIG_FILE: /home/configs/base.txt + OPENBLAS_NUM_THREADS: "1" + RESUBMIT_ALL_ON_STARTUP: "0" + UNIQUE_DOC_NAMES_IN_DATASETS: "True" +kind: ConfigMap +metadata: + labels: + io.kompose.service: medcattrainer-env + name: env diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim4-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim4-persistentvolumeclaim.yaml new file mode 100644 index 0000000..a112c3c --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim4-persistentvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + io.kompose.service: medcattrainer-claim4 + name: medcattrainer-claim4 +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim5-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim5-persistentvolumeclaim.yaml new file mode 100644 index 0000000..b0a7598 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim5-persistentvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + io.kompose.service: medcattrainer-claim5 + name: medcattrainer-claim5 +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml new file mode 100644 index 0000000..fdcfa01 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml @@ -0,0 +1,171 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: medcattrainer + name: medcattrainer +spec: + replicas: 1 + selector: + matchLabels: + io.kompose.service: medcattrainer + strategy: + type: Recreate + template: + metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: medcattrainer + spec: + containers: + - args: + - /usr/bin/supervisord + - -c + - /etc/supervisord.conf + env: + - name: CONCEPT_SEARCH_SERVICE_HOST + valueFrom: + configMapKeyRef: + key: CONCEPT_SEARCH_SERVICE_HOST + name: env + - name: CONCEPT_SEARCH_SERVICE_PORT + valueFrom: + configMapKeyRef: + key: CONCEPT_SEARCH_SERVICE_PORT + name: env + - name: CSRF_TRUSTED_ORIGINS + valueFrom: + configMapKeyRef: + key: CSRF_TRUSTED_ORIGINS + name: env + - name: DB_BACKUP_DIR + valueFrom: + configMapKeyRef: + key: DB_BACKUP_DIR + name: env + - name: DB_DIR + valueFrom: + configMapKeyRef: + key: DB_DIR + name: env + - name: DB_PATH + valueFrom: + configMapKeyRef: + key: DB_PATH + name: env + - name: DEBUG + valueFrom: + configMapKeyRef: + key: DEBUG + name: env + - name: EMAIL_HOST + valueFrom: + configMapKeyRef: + key: EMAIL_HOST + name: env + - name: EMAIL_PASS + valueFrom: + configMapKeyRef: + key: EMAIL_PASS + name: env + - name: EMAIL_PORT + valueFrom: + configMapKeyRef: + key: EMAIL_PORT + name: env + - name: EMAIL_USER + valueFrom: + configMapKeyRef: + key: EMAIL_USER + name: env + - name: ENV + valueFrom: + configMapKeyRef: + key: ENV + name: env + - name: LOAD_EXAMPLES + valueFrom: + configMapKeyRef: + key: LOAD_EXAMPLES + name: env + - name: LOAD_NUM_DOC_PAGES + valueFrom: + configMapKeyRef: + key: LOAD_NUM_DOC_PAGES + name: env + - name: MAX_DATASET_SIZE + valueFrom: + configMapKeyRef: + key: MAX_DATASET_SIZE + name: env + - name: MAX_MEDCAT_MODELS + valueFrom: + configMapKeyRef: + key: MAX_MEDCAT_MODELS + name: env + - name: MCT_VERSION + value: v2.22.1 + - name: MEDCAT_CONFIG_FILE + valueFrom: + configMapKeyRef: + key: MEDCAT_CONFIG_FILE + name: env + - name: OPENBLAS_NUM_THREADS + valueFrom: + configMapKeyRef: + key: OPENBLAS_NUM_THREADS + name: env + - name: RESUBMIT_ALL_ON_STARTUP + valueFrom: + configMapKeyRef: + key: RESUBMIT_ALL_ON_STARTUP + name: env + - name: UNIQUE_DOC_NAMES_IN_DATASETS + valueFrom: + configMapKeyRef: + key: UNIQUE_DOC_NAMES_IN_DATASETS + name: env + image: cogstacksystems/medcat-trainer:v2.22.1 + name: medcattrainer + volumeMounts: + - mountPath: /home/api/media + name: api-media + - mountPath: /home/api/static + name: api-static + - mountPath: /home/api/db + name: api-db + - mountPath: /home/api/db-backup + name: api-db-backup + - mountPath: /home/configs + name: medcattrainer-claim4 + - name: supervisord-config + mountPath: /etc/supervisord.conf + subPath: supervisord.conf # <-- ensures it's a file, not a folder + restartPolicy: Always + volumes: + - name: supervisord-config + configMap: + name: supervisord-config + - name: api-media + persistentVolumeClaim: + claimName: api-media + - name: api-static + persistentVolumeClaim: + claimName: api-static + - name: api-db + persistentVolumeClaim: + claimName: api-db + - name: api-db-backup + persistentVolumeClaim: + claimName: api-db-backup + - name: medcattrainer-claim4 + persistentVolumeClaim: + claimName: medcattrainer-claim4 + - name: medcattrainer-claim5 + persistentVolumeClaim: + claimName: medcattrainer-claim5 diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-service.yaml new file mode 100644 index 0000000..0300133 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: medcattrainer + name: medcattrainer +spec: + ports: + - name: "8000" + port: 8000 + targetPort: 8000 + selector: + io.kompose.service: medcattrainer diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/nginx-configmap.yaml new file mode 100644 index 0000000..2e00269 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/nginx-configmap.yaml @@ -0,0 +1,172 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: nginx-config +data: + nginx.conf: | + # Configuration File - Nginx Server Configs + # http://nginx.org/en/docs/dirindex.html + + # Sets the worker threads to the number of CPU cores available in the system for best performance. + # Should be > the number of CPU cores. + # Maximum number of connections = worker_processes * worker_connections + worker_processes auto; + + # Maximum number of open files per worker process. + # Should be > worker_connections. + worker_rlimit_nofile 8192; + + events { + # If you need more connections than this, you start optimizing your OS. + # That's probably the point at which you hire people who are smarter than you as this is *a lot* of requests. + # Should be < worker_rlimit_nofile. + worker_connections 8005; + } + + # Log errors and warnings to this file + # This is only used when you don't override it on a server{} level + #error_log logs/error.log warn; + + # The file storing the process ID of the main process + pid /var/run/nginx.pid; + + http { + + # Hide nginx version information. + server_tokens off; + + # Specify MIME types for files. + include mime.types; + default_type application/octet-stream; + + # Update charset_types to match updated mime.types. + # text/html is always included by charset module. + charset_types text/css text/plain text/vnd.wap.wml application/javascript application/json application/rss+xml application/xml; + + # Include $http_x_forwarded_for within default format used in log files + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + # Log access to this file + # This is only used when you don't override it on a server{} level + #access_log logs/access.log main; + + # How long to allow each connection to stay idle. + # Longer values are better for each individual client, particularly for SSL, + # but means that worker connections are tied up longer. + keepalive_timeout 3000s; + + # Timeouts + proxy_connect_timeout 3000; + proxy_send_timeout 3000; + proxy_read_timeout 3000; + send_timeout 3000; + + # increase client body size - Model packs can be over 3G.s + client_max_body_size 4000M; + # Speed up file transfers by using sendfile() to copy directly + # between descriptors rather than using read()/write(). + # For performance reasons, on FreeBSD systems w/ ZFS + # this option should be disabled as ZFS's ARC caches + # frequently used files in RAM by default. + sendfile on; + + # Don't send out partial frames; this increases throughput + # since TCP frames are filled up before being sent out. + tcp_nopush on; + + # Enable gzip compression. + gzip on; + + # Compression level (1-9). + # 5 is a perfect compromise between size and CPU usage, offering about + # 75% reduction for most ASCII files (almost identical to level 9). + gzip_comp_level 5; + + # Don't compress anything that's already small and unlikely to shrink much + # if at all (the default is 20 bytes, which is bad as that usually leads to + # larger files after gzipping). + gzip_min_length 256; + + # Compress data even for clients that are connecting to us via proxies, + # identified by the "Via" header (required for CloudFront). + gzip_proxied any; + + # Tell proxies to cache both the gzipped and regular version of a resource + # whenever the client's Accept-Encoding capabilities header varies; + # Avoids the issue where a non-gzip capable client (which is extremely rare + # today) would display gibberish if their proxy gave them the gzipped version. + gzip_vary on; + + # Compress all output labeled with one of the following MIME-types. + gzip_types + application/atom+xml + application/javascript + application/json + application/ld+json + application/manifest+json + application/rss+xml + application/vnd.geo+json + application/vnd.ms-fontobject + application/x-font-ttf + application/x-web-app-manifest+json + application/xhtml+xml + application/xml + font/opentype + image/bmp + image/svg+xml + image/x-icon + text/cache-manifest + text/css + text/plain + text/vcard + text/vnd.rim.location.xloc + text/vtt + text/x-component + text/x-cross-domain-policy; + # text/html is always compressed by gzip module + + # This should be turned on if you are going to have pre-compressed copies (.gz) of + # static files available. If not it should be left off as it will cause extra I/O + # for the check. It is best if you enable this in a location{} block for + # a specific directory, or on an individual server{} level. + # gzip_static on; + + # Include files in the sites-enabled folder. server{} configuration files should be + # placed in the sites-available folder, and then the configuration should be enabled + # by creating a symlink to it in the sites-enabled folder. + # See doc/sites-enabled.md for more info. + include sites-enabled/*; + } + sitesenabled.medcattrainer: | + server { + listen 8000; + server_name localhost; + charset utf-8; + large_client_header_buffers 4 32k; + + location /static { + alias /home/api/static; + } + + location /media { + alias /home/api/media; + } + + location /api/concepts/ { + proxy_pass http://solr:8983/solr/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location / { + proxy_pass http://medcattrainer:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + } + diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/nginx-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/nginx-deployment.yaml new file mode 100644 index 0000000..00b0402 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/nginx-deployment.yaml @@ -0,0 +1,155 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: nginx + name: nginx +spec: + replicas: 1 + selector: + matchLabels: + io.kompose.service: nginx + strategy: + type: Recreate + template: + metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: nginx + spec: + containers: + - env: + - name: CONCEPT_SEARCH_SERVICE_HOST + valueFrom: + configMapKeyRef: + key: CONCEPT_SEARCH_SERVICE_HOST + name: env + - name: CONCEPT_SEARCH_SERVICE_PORT + valueFrom: + configMapKeyRef: + key: CONCEPT_SEARCH_SERVICE_PORT + name: env + - name: CSRF_TRUSTED_ORIGINS + valueFrom: + configMapKeyRef: + key: CSRF_TRUSTED_ORIGINS + name: env + - name: DB_BACKUP_DIR + valueFrom: + configMapKeyRef: + key: DB_BACKUP_DIR + name: env + - name: DB_DIR + valueFrom: + configMapKeyRef: + key: DB_DIR + name: env + - name: DB_PATH + valueFrom: + configMapKeyRef: + key: DB_PATH + name: env + - name: DEBUG + valueFrom: + configMapKeyRef: + key: DEBUG + name: env + - name: EMAIL_HOST + valueFrom: + configMapKeyRef: + key: EMAIL_HOST + name: env + - name: EMAIL_PASS + valueFrom: + configMapKeyRef: + key: EMAIL_PASS + name: env + - name: EMAIL_PORT + valueFrom: + configMapKeyRef: + key: EMAIL_PORT + name: env + - name: EMAIL_USER + valueFrom: + configMapKeyRef: + key: EMAIL_USER + name: env + - name: ENV + valueFrom: + configMapKeyRef: + key: ENV + name: env + - name: LOAD_EXAMPLES + valueFrom: + configMapKeyRef: + key: LOAD_EXAMPLES + name: env + - name: LOAD_NUM_DOC_PAGES + valueFrom: + configMapKeyRef: + key: LOAD_NUM_DOC_PAGES + name: env + - name: MAX_DATASET_SIZE + valueFrom: + configMapKeyRef: + key: MAX_DATASET_SIZE + name: env + - name: MAX_MEDCAT_MODELS + valueFrom: + configMapKeyRef: + key: MAX_MEDCAT_MODELS + name: env + - name: MEDCAT_CONFIG_FILE + valueFrom: + configMapKeyRef: + key: MEDCAT_CONFIG_FILE + name: env + - name: OPENBLAS_NUM_THREADS + valueFrom: + configMapKeyRef: + key: OPENBLAS_NUM_THREADS + name: env + - name: RESUBMIT_ALL_ON_STARTUP + valueFrom: + configMapKeyRef: + key: RESUBMIT_ALL_ON_STARTUP + name: env + - name: UNIQUE_DOC_NAMES_IN_DATASETS + valueFrom: + configMapKeyRef: + key: UNIQUE_DOC_NAMES_IN_DATASETS + name: env + image: nginx:mainline-alpine3.22-perl + imagePullPolicy: IfNotPresent + name: nginx + ports: + - containerPort: 8000 + protocol: TCP + volumeMounts: + - mountPath: /home/api/media + name: api-media + - mountPath: /home/api/static + name: api-static + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-config + mountPath: /etc/nginx/sites-enabled/medcattrainer + subPath: sitesenabled.medcattrainer + restartPolicy: Always + volumes: + - name: nginx-config + configMap: + name: nginx-config + - name: api-media + persistentVolumeClaim: + claimName: api-media + - name: api-static + persistentVolumeClaim: + claimName: api-static + diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/nginx-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/nginx-service.yaml new file mode 100644 index 0000000..7238b1c --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/nginx-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: nginx + name: nginx +spec: + ports: + - name: "8001" + port: 8000 + targetPort: 8000 + selector: + io.kompose.service: nginx diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/original/docker-compose.yml b/deployment/kubernetes/charts/medcat-trainer-helm/original/docker-compose.yml new file mode 100644 index 0000000..091bef4 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/original/docker-compose.yml @@ -0,0 +1,58 @@ +# Default compose yml file - uses latest build of MedCATtrainer services. Default passwords and example +# projects are not used. + +services: + # medattrainer services + medcattrainer: + image: cogstacksystems/medcat-trainer:v2.22.1 + restart: always + volumes: + - api-media:/home/api/media + - api-static:/home/api/static + - api-db:/home/api/db + - api-db-backup:/home/api/db-backup + - ./configs:/home/configs + - ./supervisord.conf:/etc/supervisord.conf + env_file: + - ./envs/env + environment: + - MCT_VERSION=v2.22.1 + ports: + - 8000:8000 + command: /usr/bin/supervisord -c /etc/supervisord.conf + + nginx: + image: nginx + restart: always + volumes: + - api-media:/home/api/media + - api-static:/home/api/static + - ./nginx/nginx.conf:/etc/nginx/nginx.conf + - ./nginx/sites-enabled/:/etc/nginx/sites-enabled + env_file: + - ./envs/env + ports: + - ${MCTRAINER_PORT:-8001}:8000 + depends_on: + - medcattrainer + - solr + + solr: + container_name: mct_solr + image: solr:8 + restart: always + env_file: + - ./envs/env + ports: + - ${SOLR_PORT:-8983}:8983 + volumes: + - solr-data:/var/solr + command: + - -cloud + +volumes: + api-media: + api-static: + api-db: + api-db-backup: + solr-data: diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/original/envs/env b/deployment/kubernetes/charts/medcat-trainer-helm/original/envs/env new file mode 100644 index 0000000..b083d1e --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/original/envs/env @@ -0,0 +1,46 @@ +### Required for later numpy versions +OPENBLAS_NUM_THREADS=1 + +### MedCAT cfg ### +MEDCAT_CONFIG_FILE=/home/configs/base.txt +# number of MedCAT models that can be cached, run in bg processes at any one time +MAX_MEDCAT_MODELS=2 + +### Deployment Realm ### +ENV=non-prod + +# Complete once this is deployed +CSRF_TRUSTED_ORIGINS= + +### Django debug setting - to live-reload etc. ### +DEBUG=1 + +### Load example CDB, Vocab ### +LOAD_EXAMPLES=1 + +### Dataset conf ### +UNIQUE_DOC_NAMES_IN_DATASETS=True +MAX_DATASET_SIZE=10000 + +### Solr Concept Search Conf ### +CONCEPT_SEARCH_SERVICE_HOST=solr +CONCEPT_SEARCH_SERVICE_PORT=8983 + +### DB backup dir ### +# volume mount location, default docker host system volume location, this might be different in /etc/docker/daemon.json +DB_DIR=/home/api/db +# currently only supports sqlite3 dbs +DB_PATH=${DB_DIR}/db.sqlite3 +DB_BACKUP_DIR=/home/api/db-backup + +# Resubmit all on startup +RESUBMIT_ALL_ON_STARTUP=0 + +# Front end env vars +LOAD_NUM_DOC_PAGES=10 + +# SMTP email settings - when settings are configured go to webapp/frontend/.env and set VITE_APP_EMAIL to 1 +EMAIL_USER=example@cogstack.org +EMAIL_PASS="to be changed" +EMAIL_HOST=mail.cogstack.org +EMAIL_PORT=465 diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/original/nginx.conf b/deployment/kubernetes/charts/medcat-trainer-helm/original/nginx.conf new file mode 100644 index 0000000..1c8256a --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/original/nginx.conf @@ -0,0 +1,135 @@ +# Configuration File - Nginx Server Configs +# http://nginx.org/en/docs/dirindex.html + +# Sets the worker threads to the number of CPU cores available in the system for best performance. +# Should be > the number of CPU cores. +# Maximum number of connections = worker_processes * worker_connections +worker_processes auto; + +# Maximum number of open files per worker process. +# Should be > worker_connections. +worker_rlimit_nofile 8192; + +events { + # If you need more connections than this, you start optimizing your OS. + # That's probably the point at which you hire people who are smarter than you as this is *a lot* of requests. + # Should be < worker_rlimit_nofile. + worker_connections 8005; +} + +# Log errors and warnings to this file +# This is only used when you don't override it on a server{} level +#error_log logs/error.log warn; + +# The file storing the process ID of the main process +pid /var/run/nginx.pid; + +http { + + # Hide nginx version information. + server_tokens off; + + # Specify MIME types for files. + include mime.types; + default_type application/octet-stream; + + # Update charset_types to match updated mime.types. + # text/html is always included by charset module. + charset_types text/css text/plain text/vnd.wap.wml application/javascript application/json application/rss+xml application/xml; + + # Include $http_x_forwarded_for within default format used in log files + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + # Log access to this file + # This is only used when you don't override it on a server{} level + #access_log logs/access.log main; + + # How long to allow each connection to stay idle. + # Longer values are better for each individual client, particularly for SSL, + # but means that worker connections are tied up longer. + keepalive_timeout 3000s; + + # Timeouts + proxy_connect_timeout 3000; + proxy_send_timeout 3000; + proxy_read_timeout 3000; + send_timeout 3000; + + # increase client body size - Model packs can be over 3G.s + client_max_body_size 4000M; + # Speed up file transfers by using sendfile() to copy directly + # between descriptors rather than using read()/write(). + # For performance reasons, on FreeBSD systems w/ ZFS + # this option should be disabled as ZFS's ARC caches + # frequently used files in RAM by default. + sendfile on; + + # Don't send out partial frames; this increases throughput + # since TCP frames are filled up before being sent out. + tcp_nopush on; + + # Enable gzip compression. + gzip on; + + # Compression level (1-9). + # 5 is a perfect compromise between size and CPU usage, offering about + # 75% reduction for most ASCII files (almost identical to level 9). + gzip_comp_level 5; + + # Don't compress anything that's already small and unlikely to shrink much + # if at all (the default is 20 bytes, which is bad as that usually leads to + # larger files after gzipping). + gzip_min_length 256; + + # Compress data even for clients that are connecting to us via proxies, + # identified by the "Via" header (required for CloudFront). + gzip_proxied any; + + # Tell proxies to cache both the gzipped and regular version of a resource + # whenever the client's Accept-Encoding capabilities header varies; + # Avoids the issue where a non-gzip capable client (which is extremely rare + # today) would display gibberish if their proxy gave them the gzipped version. + gzip_vary on; + + # Compress all output labeled with one of the following MIME-types. + gzip_types + application/atom+xml + application/javascript + application/json + application/ld+json + application/manifest+json + application/rss+xml + application/vnd.geo+json + application/vnd.ms-fontobject + application/x-font-ttf + application/x-web-app-manifest+json + application/xhtml+xml + application/xml + font/opentype + image/bmp + image/svg+xml + image/x-icon + text/cache-manifest + text/css + text/plain + text/vcard + text/vnd.rim.location.xloc + text/vtt + text/x-component + text/x-cross-domain-policy; + # text/html is always compressed by gzip module + + # This should be turned on if you are going to have pre-compressed copies (.gz) of + # static files available. If not it should be left off as it will cause extra I/O + # for the check. It is best if you enable this in a location{} block for + # a specific directory, or on an individual server{} level. + # gzip_static on; + + # Include files in the sites-enabled folder. server{} configuration files should be + # placed in the sites-available folder, and then the configuration should be enabled + # by creating a symlink to it in the sites-enabled folder. + # See doc/sites-enabled.md for more info. + include sites-enabled/*; +} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/original/sites-enabled/medcattrainer b/deployment/kubernetes/charts/medcat-trainer-helm/original/sites-enabled/medcattrainer new file mode 100644 index 0000000..88b4c81 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/original/sites-enabled/medcattrainer @@ -0,0 +1,30 @@ +server { + listen 8000; + server_name localhost; + charset utf-8; + large_client_header_buffers 4 32k; + + location /static { + alias /home/api/static; + } + + location /media { + alias /home/api/media; + } + + location /api/concepts/ { + proxy_pass http://mct_solr:8983/solr/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location / { + proxy_pass http://medcattrainer:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + +} + diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/solr-data-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/solr-data-persistentvolumeclaim.yaml new file mode 100644 index 0000000..1b3f00e --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/solr-data-persistentvolumeclaim.yaml @@ -0,0 +1,12 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + labels: + io.kompose.service: solr-data + name: solr-data +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/solr-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/solr-deployment.yaml new file mode 100644 index 0000000..073fa5c --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/solr-deployment.yaml @@ -0,0 +1,141 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: solr + name: solr +spec: + replicas: 1 + selector: + matchLabels: + io.kompose.service: solr + strategy: + type: Recreate + template: + metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: solr + spec: + containers: + - args: + - -cloud + env: + - name: CONCEPT_SEARCH_SERVICE_HOST + valueFrom: + configMapKeyRef: + key: CONCEPT_SEARCH_SERVICE_HOST + name: env + - name: CONCEPT_SEARCH_SERVICE_PORT + valueFrom: + configMapKeyRef: + key: CONCEPT_SEARCH_SERVICE_PORT + name: env + - name: CSRF_TRUSTED_ORIGINS + valueFrom: + configMapKeyRef: + key: CSRF_TRUSTED_ORIGINS + name: env + - name: DB_BACKUP_DIR + valueFrom: + configMapKeyRef: + key: DB_BACKUP_DIR + name: env + - name: DB_DIR + valueFrom: + configMapKeyRef: + key: DB_DIR + name: env + - name: DB_PATH + valueFrom: + configMapKeyRef: + key: DB_PATH + name: env + - name: DEBUG + valueFrom: + configMapKeyRef: + key: DEBUG + name: env + - name: EMAIL_HOST + valueFrom: + configMapKeyRef: + key: EMAIL_HOST + name: env + - name: EMAIL_PASS + valueFrom: + configMapKeyRef: + key: EMAIL_PASS + name: env + - name: EMAIL_PORT + valueFrom: + configMapKeyRef: + key: EMAIL_PORT + name: env + - name: EMAIL_USER + valueFrom: + configMapKeyRef: + key: EMAIL_USER + name: env + - name: ENV + valueFrom: + configMapKeyRef: + key: ENV + name: env + - name: LOAD_EXAMPLES + valueFrom: + configMapKeyRef: + key: LOAD_EXAMPLES + name: env + - name: LOAD_NUM_DOC_PAGES + valueFrom: + configMapKeyRef: + key: LOAD_NUM_DOC_PAGES + name: env + - name: MAX_DATASET_SIZE + valueFrom: + configMapKeyRef: + key: MAX_DATASET_SIZE + name: env + - name: MAX_MEDCAT_MODELS + valueFrom: + configMapKeyRef: + key: MAX_MEDCAT_MODELS + name: env + - name: MEDCAT_CONFIG_FILE + valueFrom: + configMapKeyRef: + key: MEDCAT_CONFIG_FILE + name: env + - name: OPENBLAS_NUM_THREADS + valueFrom: + configMapKeyRef: + key: OPENBLAS_NUM_THREADS + name: env + - name: RESUBMIT_ALL_ON_STARTUP + valueFrom: + configMapKeyRef: + key: RESUBMIT_ALL_ON_STARTUP + name: env + - name: UNIQUE_DOC_NAMES_IN_DATASETS + valueFrom: + configMapKeyRef: + key: UNIQUE_DOC_NAMES_IN_DATASETS + name: env + image: solr:8 + name: mct-solr + ports: + - containerPort: 8983 + protocol: TCP + volumeMounts: + - mountPath: /var/solr + name: solr-data + restartPolicy: Always + volumes: + - name: solr-data + persistentVolumeClaim: + claimName: solr-data diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/solr-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/solr-service.yaml new file mode 100644 index 0000000..4b7b5b9 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/solr-service.yaml @@ -0,0 +1,16 @@ +apiVersion: v1 +kind: Service +metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: solr + name: solr +spec: + ports: + - name: "8983" + port: 8983 + targetPort: 8983 + selector: + io.kompose.service: solr diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/supervisord-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/supervisord-configmap.yaml new file mode 100644 index 0000000..0e115de --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/supervisord-configmap.yaml @@ -0,0 +1,35 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: supervisord-config +data: + supervisord.conf: | + [supervisord] + nodaemon=true + user=root + logfile=/var/log/supervisord.log + pidfile=/var/run/supervisord.pid + + [program:medcattrainer] + command=sh -c "exec /home/scripts/run.sh 2>&1 | sed 's/^/[medcattrainer] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true + + [program:bg-process] + command=sh -c "exec /home/scripts/run-bg-process.sh 2>&1 | sed 's/^/[bg-process] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true + + [program:db-backup] + command=sh -c "exec cron -f -l 2 2>&1 | sed 's/^/[db-backup] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true \ No newline at end of file From cb09a815605e3ec25af82e6f9aff1e75e34a5cbc Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Fri, 5 Sep 2025 11:50:29 +0000 Subject: [PATCH 02/14] ops: Creating Helm Chart for Medcat Trainer. Initial k8s manifests --- .../kubernetes/charts/medcat-trainer-helm/env-configmap.yaml | 2 +- .../charts/medcat-trainer-helm/medcattrainer-deployment.yaml | 1 + deployment/kubernetes/local_dev_startup.sh | 5 ++++- 3 files changed, 6 insertions(+), 2 deletions(-) diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml index 2799762..66ee3ac 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml @@ -2,7 +2,7 @@ apiVersion: v1 data: CONCEPT_SEARCH_SERVICE_HOST: solr CONCEPT_SEARCH_SERVICE_PORT: "8983" - CSRF_TRUSTED_ORIGINS: "" + CSRF_TRUSTED_ORIGINS: "http://localhost:8000" DB_BACKUP_DIR: /home/api/db-backup DB_DIR: /home/api/db DB_PATH: /home/api/db/db.sqlite3 diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml index fdcfa01..13d0302 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml @@ -28,6 +28,7 @@ spec: - -c - /etc/supervisord.conf env: + - name: CONCEPT_SEARCH_SERVICE_HOST valueFrom: configMapKeyRef: diff --git a/deployment/kubernetes/local_dev_startup.sh b/deployment/kubernetes/local_dev_startup.sh index b4999ee..4489250 100644 --- a/deployment/kubernetes/local_dev_startup.sh +++ b/deployment/kubernetes/local_dev_startup.sh @@ -16,4 +16,7 @@ helm test medcat-service --logs # Test with host header set for ingress routing # HOST_IP=10.211.112.82 -# curl --resolve chart-example.local:80:${HOST_IP} http://chart-example.local/api/info \ No newline at end of file +# curl --resolve chart-example.local:80:${HOST_IP} http://chart-example.local/api/info + +# Test medcat trainer +# kubectl port-forward svc/nginx 8000:8000 \ No newline at end of file From a1826b9644918d662e320d33fb4538223a6281e7 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Mon, 8 Sep 2025 10:23:00 +0000 Subject: [PATCH 03/14] ops: Creating Helm Chart for Medcat Trainer. Move Initial k8s manifests --- .../api-db-backup-persistentvolumeclaim.yaml | 0 .../{ => manual_manifests}/api-db-persistentvolumeclaim.yaml | 0 .../{ => manual_manifests}/api-media-persistentvolumeclaim.yaml | 0 .../{ => manual_manifests}/api-static-persistentvolumeclaim.yaml | 0 .../medcat-trainer-helm/{ => manual_manifests}/env-configmap.yaml | 0 .../medcattrainer-claim4-persistentvolumeclaim.yaml | 0 .../medcattrainer-claim5-persistentvolumeclaim.yaml | 0 .../{ => manual_manifests}/medcattrainer-deployment.yaml | 0 .../{ => manual_manifests}/medcattrainer-service.yaml | 0 .../{ => manual_manifests}/nginx-configmap.yaml | 0 .../{ => manual_manifests}/nginx-deployment.yaml | 0 .../medcat-trainer-helm/{ => manual_manifests}/nginx-service.yaml | 0 .../{ => manual_manifests}/solr-data-persistentvolumeclaim.yaml | 0 .../{ => manual_manifests}/solr-deployment.yaml | 0 .../medcat-trainer-helm/{ => manual_manifests}/solr-service.yaml | 0 .../{ => manual_manifests}/supervisord-configmap.yaml | 0 16 files changed, 0 insertions(+), 0 deletions(-) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/api-db-backup-persistentvolumeclaim.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/api-db-persistentvolumeclaim.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/api-media-persistentvolumeclaim.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/api-static-persistentvolumeclaim.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/env-configmap.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/medcattrainer-claim4-persistentvolumeclaim.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/medcattrainer-claim5-persistentvolumeclaim.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/medcattrainer-deployment.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/medcattrainer-service.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/nginx-configmap.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/nginx-deployment.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/nginx-service.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/solr-data-persistentvolumeclaim.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/solr-deployment.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/solr-service.yaml (100%) rename deployment/kubernetes/charts/medcat-trainer-helm/{ => manual_manifests}/supervisord-configmap.yaml (100%) diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/api-db-backup-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-backup-persistentvolumeclaim.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/api-db-backup-persistentvolumeclaim.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-backup-persistentvolumeclaim.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/api-db-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-persistentvolumeclaim.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/api-db-persistentvolumeclaim.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-persistentvolumeclaim.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/api-media-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-media-persistentvolumeclaim.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/api-media-persistentvolumeclaim.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-media-persistentvolumeclaim.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/api-static-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-static-persistentvolumeclaim.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/api-static-persistentvolumeclaim.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-static-persistentvolumeclaim.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/env-configmap.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim4-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim4-persistentvolumeclaim.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim4-persistentvolumeclaim.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim4-persistentvolumeclaim.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim5-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim5-persistentvolumeclaim.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-claim5-persistentvolumeclaim.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim5-persistentvolumeclaim.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-deployment.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-deployment.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-deployment.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-service.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/medcattrainer-service.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-service.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/nginx-configmap.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/nginx-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-deployment.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/nginx-deployment.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-deployment.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/nginx-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-service.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/nginx-service.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-service.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/solr-data-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-data-persistentvolumeclaim.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/solr-data-persistentvolumeclaim.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-data-persistentvolumeclaim.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/solr-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-deployment.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/solr-deployment.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-deployment.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/solr-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-service.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/solr-service.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-service.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/supervisord-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/supervisord-configmap.yaml similarity index 100% rename from deployment/kubernetes/charts/medcat-trainer-helm/supervisord-configmap.yaml rename to deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/supervisord-configmap.yaml From f04cd04d501267c49ac3f58d648b6d2773b3e6bb Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 9 Sep 2025 10:39:51 +0000 Subject: [PATCH 04/14] ops: Creating Chart. Initialize chart. Add PVC --- .../charts/medcat-trainer-helm/.helmignore | 23 +++ .../charts/medcat-trainer-helm/Chart.lock | 6 + .../charts/medcat-trainer-helm/Chart.yaml | 30 +++ .../manual_manifests/env-configmap.yaml | 2 +- .../manual_manifests/medcat-configmap.yaml | 20 ++ .../manual_manifests/nginx-configmap.yaml | 2 +- .../medcat-trainer-helm/templates/NOTES.txt | 22 +++ .../templates/_helpers.tpl | 63 +++++++ .../templates/deployment.yaml | 78 ++++++++ .../medcat-trainer-helm/templates/hpa.yaml | 32 ++++ .../templates/ingress.yaml | 43 +++++ .../templates/nginx-configmap.yaml | 174 ++++++++++++++++++ .../medcat-trainer-helm/templates/pvc.yaml | 65 +++++++ .../templates/service.yaml | 33 ++++ .../templates/serviceaccount.yaml | 13 ++ .../templates/tests/test-connection.yaml | 15 ++ .../charts/medcat-trainer-helm/values.yaml | 147 +++++++++++++++ deployment/kubernetes/local_dev_startup.sh | 5 +- 18 files changed, 770 insertions(+), 3 deletions(-) create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/.helmignore create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcat-configmap.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/deployment.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/hpa.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/ingress.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/serviceaccount.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/values.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/.helmignore b/deployment/kubernetes/charts/medcat-trainer-helm/.helmignore new file mode 100644 index 0000000..0e8a0eb --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/.helmignore @@ -0,0 +1,23 @@ +# Patterns to ignore when building packages. +# This supports shell glob matching, relative path matching, and +# negation (prefixed with !). Only one pattern per line. +.DS_Store +# Common VCS dirs +.git/ +.gitignore +.bzr/ +.bzrignore +.hg/ +.hgignore +.svn/ +# Common backup files +*.swp +*.bak +*.tmp +*.orig +*~ +# Various IDEs +.project +.idea/ +*.tmproj +.vscode/ diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock new file mode 100644 index 0000000..7366129 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock @@ -0,0 +1,6 @@ +dependencies: +- name: solr + repository: oci://registry-1.docker.io/bitnamicharts + version: 9.6.10 +digest: sha256:f62be9fbd53de5aebf148b21c3ce7ff568a4d196f77f99a14c1207755d55e0e5 +generated: "2025-09-08T10:31:52.835071366Z" diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml new file mode 100644 index 0000000..044bed4 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml @@ -0,0 +1,30 @@ +apiVersion: v2 +name: medcat-trainer-helm +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +# It is recommended to use it with quotes. +appVersion: "1.16.0" + +# Chart.yaml +dependencies: +- name: solr + version: "9.6.10" + repository: "oci://registry-1.docker.io/bitnamicharts" diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml index 66ee3ac..861c613 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml @@ -1,6 +1,6 @@ apiVersion: v1 data: - CONCEPT_SEARCH_SERVICE_HOST: solr + CONCEPT_SEARCH_SERVICE_HOST: "medcat-trainer-solr" CONCEPT_SEARCH_SERVICE_PORT: "8983" CSRF_TRUSTED_ORIGINS: "http://localhost:8000" DB_BACKUP_DIR: /home/api/db-backup diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcat-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcat-configmap.yaml new file mode 100644 index 0000000..f978095 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcat-configmap.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: medcat-config +data: + base.txt: | + cat.linking.optim = {'type': 'standard', 'lr': 0.1} + cat.linking.filter_before_disamb = True + # 20 - INFO; 10 - DEBUG + cat.general.log_level = 20 + # Recommended is to have this one negative + cat.linking.similarity_threshold = -5 + # And this one to be used as the real th + cat.linking.similarity_threshold_trainer = -5 + # Used for limiting the number of occ of a concept in a project + cat.general.cui_count_limit = 100000000 + # Is unlink full + cat.general.full_unlink = False + # use this spacy model + cat.general.spacy_model = 'en_core_web_md' diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml index 2e00269..4832a8a 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml @@ -155,7 +155,7 @@ data: } location /api/concepts/ { - proxy_pass http://solr:8983/solr/; + proxy_pass http://medcat-trainer-solr:8983/solr/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt b/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt new file mode 100644 index 0000000..ba118f3 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt @@ -0,0 +1,22 @@ +1. Get the application URL by running these commands: +{{- if .Values.ingress.enabled }} +{{- range $host := .Values.ingress.hosts }} + {{- range .paths }} + http{{ if $.Values.ingress.tls }}s{{ end }}://{{ $host.host }}{{ .path }} + {{- end }} +{{- end }} +{{- else if contains "NodePort" .Values.service.type }} + export NODE_PORT=$(kubectl get --namespace {{ .Release.Namespace }} -o jsonpath="{.spec.ports[0].nodePort}" services {{ include "medcat-trainer-helm.fullname" . }}) + export NODE_IP=$(kubectl get nodes --namespace {{ .Release.Namespace }} -o jsonpath="{.items[0].status.addresses[0].address}") + echo http://$NODE_IP:$NODE_PORT +{{- else if contains "LoadBalancer" .Values.service.type }} + NOTE: It may take a few minutes for the LoadBalancer IP to be available. + You can watch its status by running 'kubectl get --namespace {{ .Release.Namespace }} svc -w {{ include "medcat-trainer-helm.fullname" . }}' + export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "medcat-trainer-helm.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") + echo http://$SERVICE_IP:{{ .Values.service.port }} +{{- else if contains "ClusterIP" .Values.service.type }} + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "medcat-trainer-helm.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") + echo "Visit http://127.0.0.1:8080 to use your application" + kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl b/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl new file mode 100644 index 0000000..220624c --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl @@ -0,0 +1,63 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "medcat-trainer-helm.name" -}} +{{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "medcat-trainer-helm.fullname" -}} +{{- if .Values.fullnameOverride }} +{{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.nameOverride }} +{{- if contains $name .Release.Name }} +{{- .Release.Name | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Create chart name and version as used by the chart label. +*/}} +{{- define "medcat-trainer-helm.chart" -}} +{{- printf "%s-%s" .Chart.Name .Chart.Version | replace "+" "_" | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{/* +Common labels +*/}} +{{- define "medcat-trainer-helm.labels" -}} +helm.sh/chart: {{ include "medcat-trainer-helm.chart" . }} +{{ include "medcat-trainer-helm.selectorLabels" . }} +{{- if .Chart.AppVersion }} +app.kubernetes.io/version: {{ .Chart.AppVersion | quote }} +{{- end }} +app.kubernetes.io/managed-by: {{ .Release.Service }} +app.kubernetes.io/part-of: cogstack +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "medcat-trainer-helm.selectorLabels" -}} +app.kubernetes.io/name: {{ include "medcat-trainer-helm.name" . }} +app.kubernetes.io/instance: {{ .Release.Name }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "medcat-trainer-helm.serviceAccountName" -}} +{{- if .Values.serviceAccount.create }} +{{- default (include "medcat-trainer-helm.fullname" .) .Values.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.serviceAccount.name }} +{{- end }} +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/deployment.yaml new file mode 100644 index 0000000..9e30f67 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/deployment.yaml @@ -0,0 +1,78 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 8 }} + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "medcat-trainer-helm.serviceAccountName" . }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumeMounts }} + volumeMounts: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.volumes }} + volumes: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/hpa.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/hpa.yaml new file mode 100644 index 0000000..c1e4dfe --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/hpa.yaml @@ -0,0 +1,32 @@ +{{- if .Values.autoscaling.enabled }} +apiVersion: autoscaling/v2 +kind: HorizontalPodAutoscaler +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +spec: + scaleTargetRef: + apiVersion: apps/v1 + kind: Deployment + name: {{ include "medcat-trainer-helm.fullname" . }} + minReplicas: {{ .Values.autoscaling.minReplicas }} + maxReplicas: {{ .Values.autoscaling.maxReplicas }} + metrics: + {{- if .Values.autoscaling.targetCPUUtilizationPercentage }} + - type: Resource + resource: + name: cpu + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetCPUUtilizationPercentage }} + {{- end }} + {{- if .Values.autoscaling.targetMemoryUtilizationPercentage }} + - type: Resource + resource: + name: memory + target: + type: Utilization + averageUtilization: {{ .Values.autoscaling.targetMemoryUtilizationPercentage }} + {{- end }} +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/ingress.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/ingress.yaml new file mode 100644 index 0000000..08b71eb --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/ingress.yaml @@ -0,0 +1,43 @@ +{{- if .Values.ingress.enabled -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + {{- with .Values.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- with .Values.ingress.className }} + ingressClassName: {{ . }} + {{- end }} + {{- if .Values.ingress.tls }} + tls: + {{- range .Values.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + rules: + {{- range .Values.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- range .paths }} + - path: {{ .path }} + {{- with .pathType }} + pathType: {{ . }} + {{- end }} + backend: + service: + name: {{ include "medcat-trainer-helm.fullname" $ }} + port: + number: {{ $.Values.service.port }} + {{- end }} + {{- end }} +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml new file mode 100644 index 0000000..cf7eb26 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml @@ -0,0 +1,174 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-nginx-config + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +data: + nginx.conf: | + # Configuration File - Nginx Server Configs + # http://nginx.org/en/docs/dirindex.html + + # Sets the worker threads to the number of CPU cores available in the system for best performance. + # Should be > the number of CPU cores. + # Maximum number of connections = worker_processes * worker_connections + worker_processes auto; + + # Maximum number of open files per worker process. + # Should be > worker_connections. + worker_rlimit_nofile 8192; + + events { + # If you need more connections than this, you start optimizing your OS. + # That's probably the point at which you hire people who are smarter than you as this is *a lot* of requests. + # Should be < worker_rlimit_nofile. + worker_connections 8005; + } + + # Log errors and warnings to this file + # This is only used when you don't override it on a server{} level + #error_log logs/error.log warn; + + # The file storing the process ID of the main process + pid /var/run/nginx.pid; + + http { + + # Hide nginx version information. + server_tokens off; + + # Specify MIME types for files. + include mime.types; + default_type application/octet-stream; + + # Update charset_types to match updated mime.types. + # text/html is always included by charset module. + charset_types text/css text/plain text/vnd.wap.wml application/javascript application/json application/rss+xml application/xml; + + # Include $http_x_forwarded_for within default format used in log files + log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + '$status $body_bytes_sent "$http_referer" ' + '"$http_user_agent" "$http_x_forwarded_for"'; + + # Log access to this file + # This is only used when you don't override it on a server{} level + #access_log logs/access.log main; + + # How long to allow each connection to stay idle. + # Longer values are better for each individual client, particularly for SSL, + # but means that worker connections are tied up longer. + keepalive_timeout 3000s; + + # Timeouts + proxy_connect_timeout 3000; + proxy_send_timeout 3000; + proxy_read_timeout 3000; + send_timeout 3000; + + # increase client body size - Model packs can be over 3G.s + client_max_body_size 4000M; + # Speed up file transfers by using sendfile() to copy directly + # between descriptors rather than using read()/write(). + # For performance reasons, on FreeBSD systems w/ ZFS + # this option should be disabled as ZFS's ARC caches + # frequently used files in RAM by default. + sendfile on; + + # Don't send out partial frames; this increases throughput + # since TCP frames are filled up before being sent out. + tcp_nopush on; + + # Enable gzip compression. + gzip on; + + # Compression level (1-9). + # 5 is a perfect compromise between size and CPU usage, offering about + # 75% reduction for most ASCII files (almost identical to level 9). + gzip_comp_level 5; + + # Don't compress anything that's already small and unlikely to shrink much + # if at all (the default is 20 bytes, which is bad as that usually leads to + # larger files after gzipping). + gzip_min_length 256; + + # Compress data even for clients that are connecting to us via proxies, + # identified by the "Via" header (required for CloudFront). + gzip_proxied any; + + # Tell proxies to cache both the gzipped and regular version of a resource + # whenever the client's Accept-Encoding capabilities header varies; + # Avoids the issue where a non-gzip capable client (which is extremely rare + # today) would display gibberish if their proxy gave them the gzipped version. + gzip_vary on; + + # Compress all output labeled with one of the following MIME-types. + gzip_types + application/atom+xml + application/javascript + application/json + application/ld+json + application/manifest+json + application/rss+xml + application/vnd.geo+json + application/vnd.ms-fontobject + application/x-font-ttf + application/x-web-app-manifest+json + application/xhtml+xml + application/xml + font/opentype + image/bmp + image/svg+xml + image/x-icon + text/cache-manifest + text/css + text/plain + text/vcard + text/vnd.rim.location.xloc + text/vtt + text/x-component + text/x-cross-domain-policy; + # text/html is always compressed by gzip module + + # This should be turned on if you are going to have pre-compressed copies (.gz) of + # static files available. If not it should be left off as it will cause extra I/O + # for the check. It is best if you enable this in a location{} block for + # a specific directory, or on an individual server{} level. + # gzip_static on; + + # Include files in the sites-enabled folder. server{} configuration files should be + # placed in the sites-available folder, and then the configuration should be enabled + # by creating a symlink to it in the sites-enabled folder. + # See doc/sites-enabled.md for more info. + include sites-enabled/*; + } + sitesenabled.medcattrainer: | + server { + listen 8000; + server_name localhost; + charset utf-8; + large_client_header_buffers 4 32k; + + location /static { + alias /home/api/static; + } + + location /media { + alias /home/api/media; + } + + location /api/concepts/ { + proxy_pass http://medcat-trainer-solr:8983/solr/; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + location / { + proxy_pass http://medcattrainer:8000; + proxy_set_header Host $host; + proxy_set_header X-Real-IP $remote_addr; + proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; + } + + } + diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml new file mode 100644 index 0000000..f58c1b7 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml @@ -0,0 +1,65 @@ +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-api-media # This stores the uploaded CDB and Vocab models + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +spec: + {{- with .Values.persistence.media.storageClassName }} + storageClassName: {{.}} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{.Values.persistence.media.size}} +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-api-static # This stores the HTML for the website. Disk use was 72MB. + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +spec: + {{- with .Values.persistence.media.storageClassName }} + storageClassName: {{.}} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{.Values.persistence.static.size}} +--- +#TODO - only create if type using is sqlite +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-api-db # SQLiteDB + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +spec: + {{- with .Values.persistence.media.storageClassName }} + storageClassName: {{.}} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{.Values.persistence.sqlite.size}} +--- +#TODO - only create if type using is sqlite +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-api-db-backup # SQLiteDB + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +spec: + {{- with .Values.persistence.media.storageClassName }} + storageClassName: {{.}} + {{- end }} + accessModes: + - ReadWriteOnce + resources: + requests: + storage: {{.Values.persistence.sqlite.backupDbSize}} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml new file mode 100644 index 0000000..2a171e4 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml @@ -0,0 +1,33 @@ +apiVersion: v1 +kind: Service +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} +spec: + type: {{ .Values.service.type }} + ports: + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http + selector: + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 4 }} + +--- +apiVersion: v1 +kind: Service +metadata: + annotations: + kompose.cmd: kompose convert --file docker-compose.yml + kompose.version: 1.34.0 (cbf2835db) + labels: + io.kompose.service: medcattrainer + name: medcattrainer +spec: + ports: + - name: "8000" + port: 8000 + targetPort: 8000 + selector: + io.kompose.service: medcattrainer diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/serviceaccount.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/serviceaccount.yaml new file mode 100644 index 0000000..c7d642d --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/serviceaccount.yaml @@ -0,0 +1,13 @@ +{{- if .Values.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "medcat-trainer-helm.serviceAccountName" . }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + {{- with .Values.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +automountServiceAccountToken: {{ .Values.serviceAccount.automount }} +{{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml new file mode 100644 index 0000000..907b2d3 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Pod +metadata: + name: "{{ include "medcat-trainer-helm.fullname" . }}-test-connection" + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + annotations: + "helm.sh/hook": test +spec: + containers: + - name: wget + image: busybox + command: ['wget'] + args: ['{{ include "medcat-trainer-helm.fullname" . }}:{{ .Values.service.port }}'] + restartPolicy: Never diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml new file mode 100644 index 0000000..23e67a7 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml @@ -0,0 +1,147 @@ +# Default values for medcat-trainer-helm. +# This is a YAML-formatted file. +# Declare variables to be passed into your templates. + +# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +replicaCount: 1 + +# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +image: + repository: nginx + # This sets the pull policy for images. + pullPolicy: IfNotPresent + # Overrides the image tag whose default is the chart appVersion. + tag: "" + +# This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ +imagePullSecrets: [] +# This is to override the chart name. +nameOverride: "" +fullnameOverride: "" + +# This section builds out the service account more information can be found here: https://kubernetes.io/docs/concepts/security/service-accounts/ +serviceAccount: + # Specifies whether a service account should be created + create: true + # Automatically mount a ServiceAccount's API credentials? + automount: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + +# This is for setting Kubernetes Annotations to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/annotations/ +podAnnotations: {} +# This is for setting Kubernetes Labels to a Pod. +# For more information checkout: https://kubernetes.io/docs/concepts/overview/working-with-objects/labels/ +podLabels: {} + +podSecurityContext: {} + # fsGroup: 2000 + +securityContext: {} + # capabilities: + # drop: + # - ALL + # readOnlyRootFilesystem: true + # runAsNonRoot: true + # runAsUser: 1000 + +# This is for setting up a service more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/ +service: + # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types + type: ClusterIP + # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports + port: 80 + +# This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/ +ingress: + enabled: false + className: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: + - path: / + pathType: ImplementationSpecific + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + +resources: {} + # We usually recommend not to specify default resources and to leave this as a conscious + # choice for the user. This also increases chances charts run on environments with little + # resources, such as Minikube. If you do want to specify resources, uncomment the following + # lines, adjust them as necessary, and remove the curly braces after 'resources:'. + # limits: + # cpu: 100m + # memory: 128Mi + # requests: + # cpu: 100m + # memory: 128Mi + +# This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ +livenessProbe: + httpGet: + path: / + port: http +readinessProbe: + httpGet: + path: / + port: http + +# This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ +autoscaling: + enabled: false + minReplicas: 1 + maxReplicas: 100 + targetCPUUtilizationPercentage: 80 + # targetMemoryUtilizationPercentage: 80 + +# Additional volumes on the output Deployment definition. +volumes: [] +# - name: foo +# secret: +# secretName: mysecret +# optional: false + +# Additional volumeMounts on the output Deployment definition. +volumeMounts: [] +# - name: foo +# mountPath: "/etc/foo" +# readOnly: true + +nodeSelector: {} + +tolerations: [] + +affinity: {} + +persistence: + media: + size: 8Gi + static: + size: 100Mi + sqlite: + size: 100Mi + backupDbSize: 300Mi + storageClassName: "" + dbEngine: sqlite3 + + + +solr: + # replicaCount: 1 + # collectionShards: 1 + # collectionReplicas: 1 + # zookeeper: + # replicaCount: 1 + persistence: + size: 1Gi + auth: + enabled: false diff --git a/deployment/kubernetes/local_dev_startup.sh b/deployment/kubernetes/local_dev_startup.sh index 4489250..2265adc 100644 --- a/deployment/kubernetes/local_dev_startup.sh +++ b/deployment/kubernetes/local_dev_startup.sh @@ -19,4 +19,7 @@ helm test medcat-service --logs # curl --resolve chart-example.local:80:${HOST_IP} http://chart-example.local/api/info # Test medcat trainer -# kubectl port-forward svc/nginx 8000:8000 \ No newline at end of file +# kubectl port-forward svc/nginx 8000:8000 + +helm upgrade medcat-trainer ./medcat-trainer-helm --install --recreate-pods --wait --timeout 5m0s # Install if it doesnt already exist, else upgrade +# kubectl port-forward svc/medcat-trainer-solr 8983:8983 From 95530f12f94c7a5e9a5ef2db7bb77036c0951cd7 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 9 Sep 2025 14:22:38 +0000 Subject: [PATCH 05/14] ops: Creating Chart. Initialize chart. Add ConfigMaps --- .../templates/_helpers.tpl | 30 +++++++++ .../templates/env-configmap.yaml | 17 +++++ .../templates/medcat-configmap.yaml | 11 ++++ .../templates/nginx-configmap.yaml | 5 +- .../medcat-trainer-helm/templates/pvc.yaml | 4 ++ .../templates/service.yaml | 27 ++++---- .../templates/supervisord-configmap.yaml | 38 +++++++++++ .../charts/medcat-trainer-helm/values.yaml | 66 +++++++++++++++---- 8 files changed, 171 insertions(+), 27 deletions(-) create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/env-configmap.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-configmap.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/supervisord-configmap.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl b/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl index 220624c..89140b4 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl @@ -61,3 +61,33 @@ Create the name of the service account to use {{- default "default" .Values.serviceAccount.name }} {{- end }} {{- end }} + + +{{- /* +Return Solr host: either user-supplied or constructed from release name +*/ -}} +{{- define "medcat-trainer-helm.solrHost" -}} +{{- if .Values.solrHost }} +{{ .Values.solrHost }} +{{- else }} +{{- include "medcat-trainer-helm.fullname" . }}-solr +{{- end }} +{{- end }} + +{{- /* +Return Solr port: either user-supplied or default from values +*/ -}} +{{- define "medcat-trainer-helm.solrPort" -}} +{{- if .Values.solrPort }} +{{ .Values.solrPort }} +{{- else }} +{{- .Values.solr.service.ports.http }} +{{- end }} +{{- end }} + +{{- /* +Return full Solr URL: combines host and port +*/ -}} +{{- define "medcat-trainer-helm.solrURL" -}} +http://{{ include "medcat-trainer-helm.solrHost" . }}:{{ include "medcat-trainer-helm.solrPort" . }} +{{- end }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/env-configmap.yaml new file mode 100644 index 0000000..3da9ada --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/env-configmap.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-env + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +data: + CONCEPT_SEARCH_SERVICE_HOST: {{ include "medcat-trainer-helm.solrHost" . | quote }} + CONCEPT_SEARCH_SERVICE_PORT: {{ include "medcat-trainer-helm.solrPort" . | quote }} + MEDCAT_CONFIG_FILE: "/home/configs/base.txt" + DB_BACKUP_DIR: "/home/api/db-backup" + DB_DIR: "/home/api/db" + DB_PATH: "/home/api/db/db.sqlite3" +{{- range $key, $value := .Values.env }} + {{ $key }}: {{ $value | quote }} +{{- end }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-configmap.yaml new file mode 100644 index 0000000..34ad162 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-configmap.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-config + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +data: + # This is mounted in the path for MEDCAT_CONFIG_FILE in the backend. Default to /home/configs/base.txt + base.txt: | +{{ .Values.medcatConfig | indent 4 }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml index cf7eb26..f94be7f 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml @@ -4,6 +4,7 @@ metadata: name: {{ include "medcat-trainer-helm.fullname" . }}-nginx-config labels: {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: nginx data: nginx.conf: | # Configuration File - Nginx Server Configs @@ -157,14 +158,14 @@ data: } location /api/concepts/ { - proxy_pass http://medcat-trainer-solr:8983/solr/; + proxy_pass {{ include "medcat-trainer-helm.solrURL" . }}/solr/; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; } location / { - proxy_pass http://medcattrainer:8000; + proxy_pass http://{{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer:8000; proxy_set_header Host $host; proxy_set_header X-Real-IP $remote_addr; proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml index f58c1b7..ad4f814 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml @@ -4,6 +4,7 @@ metadata: name: {{ include "medcat-trainer-helm.fullname" . }}-api-media # This stores the uploaded CDB and Vocab models labels: {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer spec: {{- with .Values.persistence.media.storageClassName }} storageClassName: {{.}} @@ -20,6 +21,7 @@ metadata: name: {{ include "medcat-trainer-helm.fullname" . }}-api-static # This stores the HTML for the website. Disk use was 72MB. labels: {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer spec: {{- with .Values.persistence.media.storageClassName }} storageClassName: {{.}} @@ -37,6 +39,7 @@ metadata: name: {{ include "medcat-trainer-helm.fullname" . }}-api-db # SQLiteDB labels: {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer spec: {{- with .Values.persistence.media.storageClassName }} storageClassName: {{.}} @@ -54,6 +57,7 @@ metadata: name: {{ include "medcat-trainer-helm.fullname" . }}-api-db-backup # SQLiteDB labels: {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer spec: {{- with .Values.persistence.media.storageClassName }} storageClassName: {{.}} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml index 2a171e4..8151698 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/service.yaml @@ -4,30 +4,33 @@ metadata: name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer labels: {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer spec: - type: {{ .Values.service.type }} + type: ClusterIP ports: - - port: {{ .Values.service.port }} + - port: 8000 targetPort: http protocol: TCP name: http selector: {{- include "medcat-trainer-helm.selectorLabels" . | nindent 4 }} - + app.kubernetes.io/component: medcat-trainer --- apiVersion: v1 kind: Service metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) + name: {{ include "medcat-trainer-helm.fullname" . }}-nginx labels: - io.kompose.service: medcattrainer - name: medcattrainer + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: nginx spec: + type: {{ .Values.service.type }} ports: - - name: "8000" - port: 8000 - targetPort: 8000 + - port: {{ .Values.service.port }} + targetPort: http + protocol: TCP + name: http selector: - io.kompose.service: medcattrainer + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 4 }} + app.kubernetes.io/component: nginx +--- diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/supervisord-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/supervisord-configmap.yaml new file mode 100644 index 0000000..99f3588 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/supervisord-configmap.yaml @@ -0,0 +1,38 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-supervisord-config + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +data: + supervisord.conf: | + [supervisord] + nodaemon=true + user=root + logfile=/var/log/supervisord.log + pidfile=/var/run/supervisord.pid + + [program:medcattrainer] + command=sh -c "exec /home/scripts/run.sh 2>&1 | sed 's/^/[medcattrainer] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true + + [program:bg-process] + command=sh -c "exec /home/scripts/run-bg-process.sh 2>&1 | sed 's/^/[bg-process] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true + + [program:db-backup] + command=sh -c "exec cron -f -l 2 2>&1 | sed 's/^/[db-backup] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml index 23e67a7..0c34a0e 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml @@ -2,6 +2,58 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. +env: + CSRF_TRUSTED_ORIGINS: "http://localhost:8000" + DB_ENGINE: "sqlite3" + DEBUG: "1" + EMAIL_HOST: "mail.cogstack.org" + EMAIL_PASS: "to-be-changed" + EMAIL_PORT: "465" + EMAIL_USER: "example@cogstack.org" + ENV: "non-prod" + LOAD_EXAMPLES: "1" + LOAD_NUM_DOC_PAGES: "10" + MAX_DATASET_SIZE: "10000" + MAX_MEDCAT_MODELS: "2" + OPENBLAS_NUM_THREADS: "1" + RESUBMIT_ALL_ON_STARTUP: "0" + UNIQUE_DOC_NAMES_IN_DATASETS: "True" + + +persistence: + media: + # Size of PVC for files like model packs and other media downloaded by medcat trainer + size: 8Gi + static: + # Size of the PVC for the static HTML site + size: 100Mi + sqlite: + # Size of the PVC for the Sqlite database + size: 100Mi + # Size of the PVC for the Sqlite backups + backupDbSize: 300Mi + storageClassName: "" + # Engine can be 'sqlite3' or 'postgresql' + dbEngine: sqlite3 + +# MedCAT config as described here: https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py +medcatConfig: | + cat.linking.optim = {'type': 'standard', 'lr': 0.1} + cat.linking.filter_before_disamb = True + # 20 - INFO; 10 - DEBUG + cat.general.log_level = 20 + # Recommended is to have this one negative + cat.linking.similarity_threshold = -5 + # And this one to be used as the real th + cat.linking.similarity_threshold_trainer = -5 + # Used for limiting the number of occ of a concept in a project + cat.general.cui_count_limit = 100000000 + # Is unlink full + cat.general.full_unlink = False + # use this spacy model + cat.general.spacy_model = 'en_core_web_md' + + # This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ replicaCount: 1 @@ -122,19 +174,6 @@ tolerations: [] affinity: {} -persistence: - media: - size: 8Gi - static: - size: 100Mi - sqlite: - size: 100Mi - backupDbSize: 300Mi - storageClassName: "" - dbEngine: sqlite3 - - - solr: # replicaCount: 1 # collectionShards: 1 @@ -144,4 +183,5 @@ solr: persistence: size: 1Gi auth: + # TODO: support SOLR auth from medcat trainer API enabled: false From 8eed5e00cee724d1d43b5679a5683cd220b1af13 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 9 Sep 2025 15:44:18 +0000 Subject: [PATCH 06/14] ops: Creating Chart. Initialize chart. AddDeployments --- .../charts/medcat-trainer-helm/Chart.yaml | 4 +- .../medcat-trainer-helm/templates/NOTES.txt | 2 +- .../templates/_helpers.tpl | 4 +- .../templates/deployment.yaml | 78 ------------------- .../templates/env-configmap.yaml | 17 ---- .../templates/medcat-configmap.yaml | 11 --- .../templates/nginx-configmap.yaml | 2 +- .../templates/supervisord-configmap.yaml | 38 --------- .../charts/medcat-trainer-helm/values.yaml | 74 ++++++++++-------- 9 files changed, 49 insertions(+), 181 deletions(-) delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/deployment.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/env-configmap.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-configmap.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/supervisord-configmap.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml index 044bed4..be93b1a 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml @@ -15,13 +15,13 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 0.1.0 +version: 0.0.1 # This is the version number of the application being deployed. This version number should be # incremented each time you make changes to the application. Versions are not expected to # follow Semantic Versioning. They should reflect the version the application is using. # It is recommended to use it with quotes. -appVersion: "1.16.0" +appVersion: "latest" # Chart.yaml dependencies: diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt b/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt index ba118f3..fcfca0d 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/NOTES.txt @@ -15,7 +15,7 @@ export SERVICE_IP=$(kubectl get svc --namespace {{ .Release.Namespace }} {{ include "medcat-trainer-helm.fullname" . }} --template "{{"{{ range (index .status.loadBalancer.ingress 0) }}{{.}}{{ end }}"}}") echo http://$SERVICE_IP:{{ .Values.service.port }} {{- else if contains "ClusterIP" .Values.service.type }} - export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "medcat-trainer-helm.name" . }},app.kubernetes.io/instance={{ .Release.Name }}" -o jsonpath="{.items[0].metadata.name}") + export POD_NAME=$(kubectl get pods --namespace {{ .Release.Namespace }} -l "app.kubernetes.io/name={{ include "medcat-trainer-helm.name" . }},app.kubernetes.io/instance={{ .Release.Name }},app.kubernetes.io/component=nginx" -o jsonpath="{.items[0].metadata.name}") export CONTAINER_PORT=$(kubectl get pod --namespace {{ .Release.Namespace }} $POD_NAME -o jsonpath="{.spec.containers[0].ports[0].containerPort}") echo "Visit http://127.0.0.1:8080 to use your application" kubectl --namespace {{ .Release.Namespace }} port-forward $POD_NAME 8080:$CONTAINER_PORT diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl b/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl index 89140b4..c09de5f 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/_helpers.tpl @@ -65,12 +65,14 @@ Create the name of the service account to use {{- /* Return Solr host: either user-supplied or constructed from release name + +TODO: Make Solr use the chart fullname instead of release name */ -}} {{- define "medcat-trainer-helm.solrHost" -}} {{- if .Values.solrHost }} {{ .Values.solrHost }} {{- else }} -{{- include "medcat-trainer-helm.fullname" . }}-solr +{{- .Release.Name }}-solr {{- end }} {{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/deployment.yaml deleted file mode 100644 index 9e30f67..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/deployment.yaml +++ /dev/null @@ -1,78 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - name: {{ include "medcat-trainer-helm.fullname" . }} - labels: - {{- include "medcat-trainer-helm.labels" . | nindent 4 }} -spec: - {{- if not .Values.autoscaling.enabled }} - replicas: {{ .Values.replicaCount }} - {{- end }} - selector: - matchLabels: - {{- include "medcat-trainer-helm.selectorLabels" . | nindent 6 }} - template: - metadata: - {{- with .Values.podAnnotations }} - annotations: - {{- toYaml . | nindent 8 }} - {{- end }} - labels: - {{- include "medcat-trainer-helm.labels" . | nindent 8 }} - {{- with .Values.podLabels }} - {{- toYaml . | nindent 8 }} - {{- end }} - spec: - {{- with .Values.imagePullSecrets }} - imagePullSecrets: - {{- toYaml . | nindent 8 }} - {{- end }} - serviceAccountName: {{ include "medcat-trainer-helm.serviceAccountName" . }} - {{- with .Values.podSecurityContext }} - securityContext: - {{- toYaml . | nindent 8 }} - {{- end }} - containers: - - name: {{ .Chart.Name }} - {{- with .Values.securityContext }} - securityContext: - {{- toYaml . | nindent 12 }} - {{- end }} - image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" - imagePullPolicy: {{ .Values.image.pullPolicy }} - ports: - - name: http - containerPort: {{ .Values.service.port }} - protocol: TCP - {{- with .Values.livenessProbe }} - livenessProbe: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.readinessProbe }} - readinessProbe: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.resources }} - resources: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.volumeMounts }} - volumeMounts: - {{- toYaml . | nindent 12 }} - {{- end }} - {{- with .Values.volumes }} - volumes: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.nodeSelector }} - nodeSelector: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.affinity }} - affinity: - {{- toYaml . | nindent 8 }} - {{- end }} - {{- with .Values.tolerations }} - tolerations: - {{- toYaml . | nindent 8 }} - {{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/env-configmap.yaml deleted file mode 100644 index 3da9ada..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/env-configmap.yaml +++ /dev/null @@ -1,17 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-env - labels: - {{- include "medcat-trainer-helm.labels" . | nindent 4 }} - app.kubernetes.io/component: medcat-trainer -data: - CONCEPT_SEARCH_SERVICE_HOST: {{ include "medcat-trainer-helm.solrHost" . | quote }} - CONCEPT_SEARCH_SERVICE_PORT: {{ include "medcat-trainer-helm.solrPort" . | quote }} - MEDCAT_CONFIG_FILE: "/home/configs/base.txt" - DB_BACKUP_DIR: "/home/api/db-backup" - DB_DIR: "/home/api/db" - DB_PATH: "/home/api/db/db.sqlite3" -{{- range $key, $value := .Values.env }} - {{ $key }}: {{ $value | quote }} -{{- end }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-configmap.yaml deleted file mode 100644 index 34ad162..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-configmap.yaml +++ /dev/null @@ -1,11 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-config - labels: - {{- include "medcat-trainer-helm.labels" . | nindent 4 }} - app.kubernetes.io/component: medcat-trainer -data: - # This is mounted in the path for MEDCAT_CONFIG_FILE in the backend. Default to /home/configs/base.txt - base.txt: | -{{ .Values.medcatConfig | indent 4 }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml index f94be7f..988be8c 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml @@ -144,7 +144,7 @@ data: } sitesenabled.medcattrainer: | server { - listen 8000; + listen {{ .Values.service.port }}; server_name localhost; charset utf-8; large_client_header_buffers 4 32k; diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/supervisord-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/supervisord-configmap.yaml deleted file mode 100644 index 99f3588..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/supervisord-configmap.yaml +++ /dev/null @@ -1,38 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: {{ include "medcat-trainer-helm.fullname" . }}-supervisord-config - labels: - {{- include "medcat-trainer-helm.labels" . | nindent 4 }} - app.kubernetes.io/component: medcat-trainer -data: - supervisord.conf: | - [supervisord] - nodaemon=true - user=root - logfile=/var/log/supervisord.log - pidfile=/var/run/supervisord.pid - - [program:medcattrainer] - command=sh -c "exec /home/scripts/run.sh 2>&1 | sed 's/^/[medcattrainer] /'" - stdout_logfile=/dev/stdout - stdout_logfile_maxbytes=0 - stderr_logfile=/dev/stderr - stderr_logfile_maxbytes=0 - autorestart=true - - [program:bg-process] - command=sh -c "exec /home/scripts/run-bg-process.sh 2>&1 | sed 's/^/[bg-process] /'" - stdout_logfile=/dev/stdout - stdout_logfile_maxbytes=0 - stderr_logfile=/dev/stderr - stderr_logfile_maxbytes=0 - autorestart=true - - [program:db-backup] - command=sh -c "exec cron -f -l 2 2>&1 | sed 's/^/[db-backup] /'" - stdout_logfile=/dev/stdout - stdout_logfile_maxbytes=0 - stderr_logfile=/dev/stderr - stderr_logfile_maxbytes=0 - autorestart=true \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml index 0c34a0e..44068bd 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml @@ -2,6 +2,22 @@ # This is a YAML-formatted file. # Declare variables to be passed into your templates. +# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ +replicaCount: 1 + +# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ +image: + repository: cogstacksystems/medcat-trainer + # This sets the pull policy for images. + pullPolicy: Always + # Overrides the image tag whose default is the chart appVersion. + tag: "latest" +nginxImage: + repository: nginx + pullPolicy: IfNotPresent + tag: "1.29.1" + +# Add any environment variables here that should be set in the medcat-trainer container env: CSRF_TRUSTED_ORIGINS: "http://localhost:8000" DB_ENGINE: "sqlite3" @@ -53,17 +69,22 @@ medcatConfig: | # use this spacy model cat.general.spacy_model = 'en_core_web_md' - -# This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ -replicaCount: 1 - -# This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ -image: - repository: nginx - # This sets the pull policy for images. - pullPolicy: IfNotPresent - # Overrides the image tag whose default is the chart appVersion. - tag: "" +solr: + # replicaCount: 1 + # collectionShards: 1 + # collectionReplicas: 1 + zookeeper: + # replicaCount: 1 + persistence: + size: 1Gi + persistence: + size: 1Gi + auth: + # TODO: support SOLR auth from medcat trainer API + enabled: false + podLabels: + app.kubernetes.io/component: solr + app.kubernetes.io/part-of: cogstack # This is for the secrets for pulling an image from a private repository more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/pull-image-private-registry/ imagePullSecrets: [] @@ -106,7 +127,7 @@ service: # This sets the service type more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#publishing-services-service-types type: ClusterIP # This sets the ports more information can be found here: https://kubernetes.io/docs/concepts/services-networking/service/#field-spec-ports - port: 80 + port: 8000 # This block is for setting up the ingress for more information can be found here: https://kubernetes.io/docs/concepts/services-networking/ingress/ ingress: @@ -138,14 +159,15 @@ resources: {} # memory: 128Mi # This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ -livenessProbe: - httpGet: - path: / - port: http -readinessProbe: - httpGet: - path: / - port: http +# TODO Add liveness and readiness to django app +# livenessProbe: +# httpGet: +# path: / +# port: http +# readinessProbe: +# httpGet: +# path: / +# port: http # This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ autoscaling: @@ -173,15 +195,3 @@ nodeSelector: {} tolerations: [] affinity: {} - -solr: - # replicaCount: 1 - # collectionShards: 1 - # collectionReplicas: 1 - # zookeeper: - # replicaCount: 1 - persistence: - size: 1Gi - auth: - # TODO: support SOLR auth from medcat trainer API - enabled: false From 2cfad6dd3bc7762dbba79e091a9217b64cae31cb Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 9 Sep 2025 15:44:30 +0000 Subject: [PATCH 07/14] ops: Creating Chart. Initialize chart. AddDeployments --- .../templates/medcat-trainer-configmap.yaml | 41 ++++++ .../templates/medcat-trainer-deployment.yaml | 117 ++++++++++++++++++ .../medcat-trainer-env-configmap.yaml | 17 +++ .../templates/nginx-deployment.yaml | 101 +++++++++++++++ 4 files changed, 276 insertions(+) create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-configmap.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-env-configmap.yaml create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-configmap.yaml new file mode 100644 index 0000000..a9eed8c --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-configmap.yaml @@ -0,0 +1,41 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-config + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +data: + # This is mounted in the path for MEDCAT_CONFIG_FILE in the backend. Default to /home/configs/base.txt + medcat-base.txt: | +{{ .Values.medcatConfig | indent 4 }} + supervisord.conf: | + [supervisord] + nodaemon=true + user=root + logfile=/var/log/supervisord.log + pidfile=/var/run/supervisord.pid + + [program:medcattrainer] + command=sh -c "exec /home/scripts/run.sh 2>&1 | sed 's/^/[medcattrainer] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true + + [program:bg-process] + command=sh -c "exec /home/scripts/run-bg-process.sh 2>&1 | sed 's/^/[bg-process] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true + + [program:db-backup] + command=sh -c "exec cron -f -l 2 2>&1 | sed 's/^/[db-backup] /'" + stdout_logfile=/dev/stdout + stdout_logfile_maxbytes=0 + stderr_logfile=/dev/stderr + stderr_logfile_maxbytes=0 + autorestart=true \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml new file mode 100644 index 0000000..fa4ba42 --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml @@ -0,0 +1,117 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: medcat-trainer + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 8 }} + app.kubernetes.io/component: medcat-trainer + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "medcat-trainer-helm.serviceAccountName" . }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default .Chart.AppVersion }}" + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - name: http + containerPort: 8000 + protocol: TCP + args: + - /usr/bin/supervisord + - -c + - /etc/supervisord.conf + envFrom: + - configMapRef: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-env + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + volumeMounts: + - mountPath: /home/api/media + name: api-media + - mountPath: /home/api/static + name: api-static + - mountPath: /home/api/db + name: api-db + - mountPath: /home/api/db-backup + name: api-db-backup + - mountPath: /home/configs + name: medcat-trainer-config + subPath: medcat-base.txt + - mountPath: /etc/supervisord.conf + name: medcat-trainer-config + subPath: supervisord.conf + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: medcat-trainer-config + configMap: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-config + - name: api-media + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-media + - name: api-static + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-static + - name: api-db + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-db + - name: api-db-backup + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-db-backup + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-env-configmap.yaml new file mode 100644 index 0000000..3da9ada --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-env-configmap.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-env + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: medcat-trainer +data: + CONCEPT_SEARCH_SERVICE_HOST: {{ include "medcat-trainer-helm.solrHost" . | quote }} + CONCEPT_SEARCH_SERVICE_PORT: {{ include "medcat-trainer-helm.solrPort" . | quote }} + MEDCAT_CONFIG_FILE: "/home/configs/base.txt" + DB_BACKUP_DIR: "/home/api/db-backup" + DB_DIR: "/home/api/db" + DB_PATH: "/home/api/db/db.sqlite3" +{{- range $key, $value := .Values.env }} + {{ $key }}: {{ $value | quote }} +{{- end }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml new file mode 100644 index 0000000..788222e --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml @@ -0,0 +1,101 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "medcat-trainer-helm.fullname" . }}-nginx + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 4 }} + app.kubernetes.io/component: nginx +spec: + {{- if not .Values.autoscaling.enabled }} + replicas: {{ .Values.replicaCount }} + {{- end }} + selector: + matchLabels: + {{- include "medcat-trainer-helm.selectorLabels" . | nindent 6 }} + app.kubernetes.io/component: nginx + template: + metadata: + {{- with .Values.podAnnotations }} + annotations: + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "medcat-trainer-helm.labels" . | nindent 8 }} + app.kubernetes.io/component: nginx + {{- with .Values.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "medcat-trainer-helm.serviceAccountName" . }} + {{- with .Values.podSecurityContext }} + securityContext: + {{- toYaml . | nindent 8 }} + {{- end }} + containers: + - name: {{ .Chart.Name }} + {{- with .Values.securityContext }} + securityContext: + {{- toYaml . | nindent 12 }} + {{- end }} + image: "{{ .Values.nginxImage.repository }}:{{ .Values.nginxImage.tag }}" + imagePullPolicy: {{ .Values.nginxImage.pullPolicy }} + ports: + - name: http + containerPort: {{ .Values.service.port }} + protocol: TCP + {{- with .Values.livenessProbe }} + livenessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.readinessProbe }} + readinessProbe: + {{- toYaml . | nindent 12 }} + {{- end }} + {{- with .Values.resources }} + resources: + {{- toYaml . | nindent 12 }} + {{- end }} + + volumeMounts: + - name: nginx-config + mountPath: /etc/nginx/nginx.conf + subPath: nginx.conf + - name: nginx-config + mountPath: /etc/nginx/sites-enabled/medcattrainer + subPath: sitesenabled.medcattrainer + - mountPath: /home/api/media + name: api-media + - mountPath: /home/api/static + name: api-static + {{- with .Values.volumeMounts }} + {{- toYaml . | nindent 12 }} + {{- end }} + volumes: + - name: nginx-config + configMap: + name: {{ include "medcat-trainer-helm.fullname" . }}-nginx-config + - name: api-media + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-media + - name: api-static + persistentVolumeClaim: + claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-static + {{- with .Values.volumes }} + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} From 86a9a725cd664792aa8392e66d5a8052db295176 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Tue, 9 Sep 2025 17:05:18 +0000 Subject: [PATCH 08/14] ops: Creating Chart. Fixing deployments --- .../templates/medcat-trainer-deployment.yaml | 2 +- .../medcat-trainer-helm/templates/nginx-configmap.yaml | 5 +++++ .../templates/nginx-deployment.yaml | 4 ++-- .../templates/tests/test-connection.yaml | 2 +- .../kubernetes/charts/medcat-trainer-helm/values.yaml | 10 ++++++++++ 5 files changed, 19 insertions(+), 4 deletions(-) diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml index fa4ba42..5a86178 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml @@ -75,7 +75,7 @@ spec: name: api-db - mountPath: /home/api/db-backup name: api-db-backup - - mountPath: /home/configs + - mountPath: /home/configs/base.txt name: medcat-trainer-config subPath: medcat-base.txt - mountPath: /etc/supervisord.conf diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml index 988be8c..96a46bd 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml @@ -171,5 +171,10 @@ data: proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; } + location /healthz { + access_log off; + return 200 'OK'; + add_header Content-Type text/plain; + } } diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml index 788222e..4c0fbe0 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml @@ -47,11 +47,11 @@ spec: - name: http containerPort: {{ .Values.service.port }} protocol: TCP - {{- with .Values.livenessProbe }} + {{- with .Values.nginx.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} {{- end }} - {{- with .Values.readinessProbe }} + {{- with .Values.nginx.readinessProbe }} readinessProbe: {{- toYaml . | nindent 12 }} {{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml index 907b2d3..cf3217b 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml @@ -11,5 +11,5 @@ spec: - name: wget image: busybox command: ['wget'] - args: ['{{ include "medcat-trainer-helm.fullname" . }}:{{ .Values.service.port }}'] + args: ['{{ include "medcat-trainer-helm.fullname" . }}-nginx:{{ .Values.service.port }}/healthz', '-U helm-test {{ .Chart.Name }}-v{{ .Chart.Version }}'] restartPolicy: Never diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml index 44068bd..89ba12e 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml @@ -168,6 +168,16 @@ resources: {} # httpGet: # path: / # port: http +nginx: + livenessProbe: + httpGet: + path: /healthz + port: http + readinessProbe: + httpGet: + path: /healthz + port: http + # This section is for setting up autoscaling more information can be found here: https://kubernetes.io/docs/concepts/workloads/autoscaling/ autoscaling: From f00ea8a5942b863120223796dad5d798646f0c32 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 10 Sep 2025 15:52:37 +0000 Subject: [PATCH 09/14] ops: Creating Chart. Added postgres --- .../charts/medcat-trainer-helm/Chart.lock | 7 ++- .../charts/medcat-trainer-helm/Chart.yaml | 4 ++ .../charts/medcat-trainer-helm/README.md | 45 +++++++++++++++++++ .../templates/medcat-trainer-deployment.yaml | 10 +++++ .../templates/nginx-configmap.yaml | 2 +- .../templates/nginx-deployment.yaml | 2 +- .../medcat-trainer-helm/templates/pvc.yaml | 8 ++-- .../charts/medcat-trainer-helm/values.yaml | 43 ++++++++++++++---- deployment/kubernetes/local_dev_startup.sh | 2 +- 9 files changed, 107 insertions(+), 16 deletions(-) create mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/README.md diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock index 7366129..36417b3 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock +++ b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.lock @@ -2,5 +2,8 @@ dependencies: - name: solr repository: oci://registry-1.docker.io/bitnamicharts version: 9.6.10 -digest: sha256:f62be9fbd53de5aebf148b21c3ce7ff568a4d196f77f99a14c1207755d55e0e5 -generated: "2025-09-08T10:31:52.835071366Z" +- name: postgresql + repository: oci://registry-1.docker.io/bitnamicharts + version: 16.7.27 +digest: sha256:a02db326b15b24d92e4c0787792803d7b224babd4f288e28a73d66a7d4506a70 +generated: "2025-09-10T11:29:31.705470067Z" diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml index be93b1a..9a74ccd 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/Chart.yaml @@ -28,3 +28,7 @@ dependencies: - name: solr version: "9.6.10" repository: "oci://registry-1.docker.io/bitnamicharts" +- name: postgresql + version: 16.7.27 + repository: "oci://registry-1.docker.io/bitnamicharts" + condition: postgresql.enabled diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/README.md b/deployment/kubernetes/charts/medcat-trainer-helm/README.md new file mode 100644 index 0000000..8f54d9b --- /dev/null +++ b/deployment/kubernetes/charts/medcat-trainer-helm/README.md @@ -0,0 +1,45 @@ +# MedCAT Trainer Helm Chart + +This Helm chart deploys MedCAT Trainer and infrastructure to a Kubernetes cluster. + +By default the chart will: + +- Run MedCAT Trainer Django server +- Run NGINX for static site hosting and routing +- Run a SOLR and Zookeeper cluster for the Concept DB +- Run a Postgres database for persistence + + +## Installation + +```sh +helm install my-medcat-trainer oci://registry-1.docker.io/cogstacksystems/medcat-trainer-helm +``` + +## Configuration + +See these values for common configurations to change: + +| Setting |description | +| -------- | -------- | +| `env` | Environment variables as defined in the [MedCAT Trainer docs](https://docs.cogstack.org/projects/medcat-trainer/en/latest/installation.html). | +|`medcatConfig`|MedCAT config file as described [here](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py)| + +### Use Sqlite instead of Postgres + +Sqlite can be used for smaller single instance deployments + +Set these values: + +```yaml +DB_ENGINE: "sqlite3" + +postgresql: + enabled: false +``` + +### Missing features +These features are not yet existing but to be added in future: +- Use a pre existing postgres db +- Use a pre existing SOLR instnace +- Migrate from supervisord to standalone pods for background tasks for scaling purposes diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml index 5a86178..2a4112d 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml @@ -54,6 +54,16 @@ spec: envFrom: - configMapRef: name: {{ include "medcat-trainer-helm.fullname" . }}-medcat-trainer-env + {{- if .Values.postgresql.enabled }} + env: + - name: DB_HOST + value: {{ .Release.Name }}-postgresql + - name: DB_PASSWORD + valueFrom: + secretKeyRef: + name: {{ .Release.Name }}-postgresql + key: postgres-password + {{- end }} {{- with .Values.livenessProbe }} livenessProbe: {{- toYaml . | nindent 12 }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml index 96a46bd..d705eaf 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-configmap.yaml @@ -171,7 +171,7 @@ data: proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; } - location /healthz { + location /nginx/health/live { access_log off; return 200 'OK'; add_header Content-Type text/plain; diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml index 4c0fbe0..c721c30 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml @@ -7,7 +7,7 @@ metadata: app.kubernetes.io/component: nginx spec: {{- if not .Values.autoscaling.enabled }} - replicas: {{ .Values.replicaCount }} + replicas: {{ .Values.nginxReplicaCount }} {{- end }} selector: matchLabels: diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml index ad4f814..196e331 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/pvc.yaml @@ -32,7 +32,7 @@ spec: requests: storage: {{.Values.persistence.static.size}} --- -#TODO - only create if type using is sqlite +{{- if eq .Values.DB_ENGINE "sqlite3" }} apiVersion: v1 kind: PersistentVolumeClaim metadata: @@ -49,8 +49,9 @@ spec: resources: requests: storage: {{.Values.persistence.sqlite.size}} +{{- end }} --- -#TODO - only create if type using is sqlite +{{- if eq .Values.DB_ENGINE "sqlite3" }} apiVersion: v1 kind: PersistentVolumeClaim metadata: @@ -66,4 +67,5 @@ spec: - ReadWriteOnce resources: requests: - storage: {{.Values.persistence.sqlite.backupDbSize}} \ No newline at end of file + storage: {{.Values.persistence.sqlite.backupDbSize}} +{{- end }} \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml index 89ba12e..86e7bab 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml @@ -4,6 +4,7 @@ # This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ replicaCount: 1 +nginxReplicaCount: 3 # This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ image: @@ -20,7 +21,6 @@ nginxImage: # Add any environment variables here that should be set in the medcat-trainer container env: CSRF_TRUSTED_ORIGINS: "http://localhost:8000" - DB_ENGINE: "sqlite3" DEBUG: "1" EMAIL_HOST: "mail.cogstack.org" EMAIL_PASS: "to-be-changed" @@ -35,7 +35,27 @@ env: RESUBMIT_ALL_ON_STARTUP: "0" UNIQUE_DOC_NAMES_IN_DATASETS: "True" - + # TODO: Support custom DB overrides# + # DB_ENGINE: "sqlite3" + DB_ENGINE: "postgresql" + DB_NAME: "postgres" + DB_USER: "postgres" + DB_PORT: "5432" + # DB_PASSWORD: "" + # DB_HOST: "" + +postgresql: + enabled: true + # TODO: Support custom DB overrides + # auth: + # - name for a custom database + # database: "my_trainer_db" + # username: "trainer_admin" + # password: "changeme_changeme" + primary: + persistence: + # Size of the PVC for the postgres database + size: 500Mi persistence: media: # Size of PVC for files like model packs and other media downloaded by medcat trainer @@ -49,8 +69,7 @@ persistence: # Size of the PVC for the Sqlite backups backupDbSize: 300Mi storageClassName: "" - # Engine can be 'sqlite3' or 'postgresql' - dbEngine: sqlite3 + # MedCAT config as described here: https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py medcatConfig: | @@ -160,22 +179,30 @@ resources: {} # This is to setup the liveness and readiness probes more information can be found here: https://kubernetes.io/docs/tasks/configure-pod-container/configure-liveness-readiness-startup-probes/ # TODO Add liveness and readiness to django app +# https://github.com/CogStack/cogstack-nlp/pull/131/files # livenessProbe: # httpGet: -# path: / +# path: /api/health/live/ # port: http # readinessProbe: # httpGet: -# path: / +# path: /api/health/ready/ # port: http +# startupProbe: +# httpGet: +# path: /api/health/startup/ +# port: http +# failureThreshold: 30 +# periodSeconds: 10 + nginx: livenessProbe: httpGet: - path: /healthz + path: /nginx/health/live port: http readinessProbe: httpGet: - path: /healthz + path: /nginx/health/live port: http diff --git a/deployment/kubernetes/local_dev_startup.sh b/deployment/kubernetes/local_dev_startup.sh index 2265adc..7e876cb 100644 --- a/deployment/kubernetes/local_dev_startup.sh +++ b/deployment/kubernetes/local_dev_startup.sh @@ -21,5 +21,5 @@ helm test medcat-service --logs # Test medcat trainer # kubectl port-forward svc/nginx 8000:8000 -helm upgrade medcat-trainer ./medcat-trainer-helm --install --recreate-pods --wait --timeout 5m0s # Install if it doesnt already exist, else upgrade +helm upgrade my-test ./medcat-trainer-helm --install --recreate-pods --wait --timeout 5m0s # Install if it doesnt already exist, else upgrade # kubectl port-forward svc/medcat-trainer-solr 8983:8983 From e015c54bd895a12bd88758516c4d6f434dde0a1a Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Wed, 10 Sep 2025 16:36:54 +0000 Subject: [PATCH 10/14] ops: Created Medcat trainer helm. Add github action. Cleanup --- .../workflows/kubernetes-charts-build.yaml | 5 +- .../charts/medcat-trainer-helm/README.md | 10 +- .../api-db-backup-persistentvolumeclaim.yaml | 12 -- .../api-db-persistentvolumeclaim.yaml | 12 -- .../api-media-persistentvolumeclaim.yaml | 12 -- .../api-static-persistentvolumeclaim.yaml | 12 -- .../manual_manifests/env-configmap.yaml | 27 --- .../manual_manifests/medcat-configmap.yaml | 20 -- ...ttrainer-claim4-persistentvolumeclaim.yaml | 12 -- ...ttrainer-claim5-persistentvolumeclaim.yaml | 12 -- .../medcattrainer-deployment.yaml | 172 ------------------ .../medcattrainer-service.yaml | 16 -- .../manual_manifests/nginx-configmap.yaml | 172 ------------------ .../manual_manifests/nginx-deployment.yaml | 155 ---------------- .../manual_manifests/nginx-service.yaml | 16 -- .../solr-data-persistentvolumeclaim.yaml | 12 -- .../manual_manifests/solr-deployment.yaml | 141 -------------- .../manual_manifests/solr-service.yaml | 16 -- .../supervisord-configmap.yaml | 35 ---- .../original/docker-compose.yml | 58 ------ .../medcat-trainer-helm/original/envs/env | 46 ----- .../medcat-trainer-helm/original/nginx.conf | 135 -------------- .../original/sites-enabled/medcattrainer | 30 --- .../charts/medcat-trainer-helm/values.yaml | 4 +- 24 files changed, 13 insertions(+), 1129 deletions(-) delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-backup-persistentvolumeclaim.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-persistentvolumeclaim.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-media-persistentvolumeclaim.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-static-persistentvolumeclaim.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcat-configmap.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim4-persistentvolumeclaim.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim5-persistentvolumeclaim.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-deployment.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-service.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-deployment.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-service.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-data-persistentvolumeclaim.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-deployment.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-service.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/supervisord-configmap.yaml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/original/docker-compose.yml delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/original/envs/env delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/original/nginx.conf delete mode 100644 deployment/kubernetes/charts/medcat-trainer-helm/original/sites-enabled/medcattrainer diff --git a/.github/workflows/kubernetes-charts-build.yaml b/.github/workflows/kubernetes-charts-build.yaml index 88a372f..26a576f 100644 --- a/.github/workflows/kubernetes-charts-build.yaml +++ b/.github/workflows/kubernetes-charts-build.yaml @@ -70,7 +70,9 @@ jobs: uses: actions/checkout@v5 - name: Package Helm Charts - run: helm package ./charts/medcat-service-helm --version $CHART_VERSION + run: | + helm package ./charts/medcat-service-helm --version $CHART_VERSION + helm package ./charts/medcat-trainer-helm --version $CHART_VERSION - name: Helm OCI login to Docker Hub run: helm registry login registry-1.docker.io -u ${{ secrets.DOCKERHUB_USERNAME }} -p ${{ secrets.DOCKERHUB_TOKEN }} @@ -78,3 +80,4 @@ jobs: - name: Push Helm Chart to Docker Hub OCI run: | helm push ./medcat-service-helm-${CHART_VERSION}.tgz oci://registry-1.docker.io/cogstacksystems + helm push ./medcat-trainer-helm-${CHART_VERSION}.tgz oci://registry-1.docker.io/cogstacksystems diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/README.md b/deployment/kubernetes/charts/medcat-trainer-helm/README.md index 8f54d9b..030b943 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/README.md +++ b/deployment/kubernetes/charts/medcat-trainer-helm/README.md @@ -24,6 +24,8 @@ See these values for common configurations to change: | -------- | -------- | | `env` | Environment variables as defined in the [MedCAT Trainer docs](https://docs.cogstack.org/projects/medcat-trainer/en/latest/installation.html). | |`medcatConfig`|MedCAT config file as described [here](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py)| +| `env.CSRF_TRUSTED_ORIGINS` | The Host and Port to access the application on | + ### Use Sqlite instead of Postgres @@ -38,8 +40,10 @@ postgresql: enabled: false ``` -### Missing features +## Missing features These features are not yet existing but to be added in future: - Use a pre existing postgres db -- Use a pre existing SOLR instnace -- Migrate from supervisord to standalone pods for background tasks for scaling purposes +- Use a pre existing SOLR instance +- Migrate from supervisord to standalone deployment for background tasks for better scaling +- Support SOLR authentication from medcat trainer +- Support passing DB OPTIONS to medcat trainer for use in cloud environments diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-backup-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-backup-persistentvolumeclaim.yaml deleted file mode 100644 index 54de4e9..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-backup-persistentvolumeclaim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - labels: - io.kompose.service: api-db-backup - name: api-db-backup -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-persistentvolumeclaim.yaml deleted file mode 100644 index 0b90c73..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-db-persistentvolumeclaim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - labels: - io.kompose.service: api-db - name: api-db -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-media-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-media-persistentvolumeclaim.yaml deleted file mode 100644 index 9008a95..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-media-persistentvolumeclaim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - labels: - io.kompose.service: api-media - name: api-media -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-static-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-static-persistentvolumeclaim.yaml deleted file mode 100644 index f11a863..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/api-static-persistentvolumeclaim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - labels: - io.kompose.service: api-static - name: api-static -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml deleted file mode 100644 index 861c613..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/env-configmap.yaml +++ /dev/null @@ -1,27 +0,0 @@ -apiVersion: v1 -data: - CONCEPT_SEARCH_SERVICE_HOST: "medcat-trainer-solr" - CONCEPT_SEARCH_SERVICE_PORT: "8983" - CSRF_TRUSTED_ORIGINS: "http://localhost:8000" - DB_BACKUP_DIR: /home/api/db-backup - DB_DIR: /home/api/db - DB_PATH: /home/api/db/db.sqlite3 - DEBUG: "1" - EMAIL_HOST: mail.cogstack.org - EMAIL_PASS: to be changed - EMAIL_PORT: "465" - EMAIL_USER: example@cogstack.org - ENV: non-prod - LOAD_EXAMPLES: "1" - LOAD_NUM_DOC_PAGES: "10" - MAX_DATASET_SIZE: "10000" - MAX_MEDCAT_MODELS: "2" - MEDCAT_CONFIG_FILE: /home/configs/base.txt - OPENBLAS_NUM_THREADS: "1" - RESUBMIT_ALL_ON_STARTUP: "0" - UNIQUE_DOC_NAMES_IN_DATASETS: "True" -kind: ConfigMap -metadata: - labels: - io.kompose.service: medcattrainer-env - name: env diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcat-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcat-configmap.yaml deleted file mode 100644 index f978095..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcat-configmap.yaml +++ /dev/null @@ -1,20 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: medcat-config -data: - base.txt: | - cat.linking.optim = {'type': 'standard', 'lr': 0.1} - cat.linking.filter_before_disamb = True - # 20 - INFO; 10 - DEBUG - cat.general.log_level = 20 - # Recommended is to have this one negative - cat.linking.similarity_threshold = -5 - # And this one to be used as the real th - cat.linking.similarity_threshold_trainer = -5 - # Used for limiting the number of occ of a concept in a project - cat.general.cui_count_limit = 100000000 - # Is unlink full - cat.general.full_unlink = False - # use this spacy model - cat.general.spacy_model = 'en_core_web_md' diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim4-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim4-persistentvolumeclaim.yaml deleted file mode 100644 index a112c3c..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim4-persistentvolumeclaim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - labels: - io.kompose.service: medcattrainer-claim4 - name: medcattrainer-claim4 -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim5-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim5-persistentvolumeclaim.yaml deleted file mode 100644 index b0a7598..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-claim5-persistentvolumeclaim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - labels: - io.kompose.service: medcattrainer-claim5 - name: medcattrainer-claim5 -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-deployment.yaml deleted file mode 100644 index 13d0302..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-deployment.yaml +++ /dev/null @@ -1,172 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: medcattrainer - name: medcattrainer -spec: - replicas: 1 - selector: - matchLabels: - io.kompose.service: medcattrainer - strategy: - type: Recreate - template: - metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: medcattrainer - spec: - containers: - - args: - - /usr/bin/supervisord - - -c - - /etc/supervisord.conf - env: - - - name: CONCEPT_SEARCH_SERVICE_HOST - valueFrom: - configMapKeyRef: - key: CONCEPT_SEARCH_SERVICE_HOST - name: env - - name: CONCEPT_SEARCH_SERVICE_PORT - valueFrom: - configMapKeyRef: - key: CONCEPT_SEARCH_SERVICE_PORT - name: env - - name: CSRF_TRUSTED_ORIGINS - valueFrom: - configMapKeyRef: - key: CSRF_TRUSTED_ORIGINS - name: env - - name: DB_BACKUP_DIR - valueFrom: - configMapKeyRef: - key: DB_BACKUP_DIR - name: env - - name: DB_DIR - valueFrom: - configMapKeyRef: - key: DB_DIR - name: env - - name: DB_PATH - valueFrom: - configMapKeyRef: - key: DB_PATH - name: env - - name: DEBUG - valueFrom: - configMapKeyRef: - key: DEBUG - name: env - - name: EMAIL_HOST - valueFrom: - configMapKeyRef: - key: EMAIL_HOST - name: env - - name: EMAIL_PASS - valueFrom: - configMapKeyRef: - key: EMAIL_PASS - name: env - - name: EMAIL_PORT - valueFrom: - configMapKeyRef: - key: EMAIL_PORT - name: env - - name: EMAIL_USER - valueFrom: - configMapKeyRef: - key: EMAIL_USER - name: env - - name: ENV - valueFrom: - configMapKeyRef: - key: ENV - name: env - - name: LOAD_EXAMPLES - valueFrom: - configMapKeyRef: - key: LOAD_EXAMPLES - name: env - - name: LOAD_NUM_DOC_PAGES - valueFrom: - configMapKeyRef: - key: LOAD_NUM_DOC_PAGES - name: env - - name: MAX_DATASET_SIZE - valueFrom: - configMapKeyRef: - key: MAX_DATASET_SIZE - name: env - - name: MAX_MEDCAT_MODELS - valueFrom: - configMapKeyRef: - key: MAX_MEDCAT_MODELS - name: env - - name: MCT_VERSION - value: v2.22.1 - - name: MEDCAT_CONFIG_FILE - valueFrom: - configMapKeyRef: - key: MEDCAT_CONFIG_FILE - name: env - - name: OPENBLAS_NUM_THREADS - valueFrom: - configMapKeyRef: - key: OPENBLAS_NUM_THREADS - name: env - - name: RESUBMIT_ALL_ON_STARTUP - valueFrom: - configMapKeyRef: - key: RESUBMIT_ALL_ON_STARTUP - name: env - - name: UNIQUE_DOC_NAMES_IN_DATASETS - valueFrom: - configMapKeyRef: - key: UNIQUE_DOC_NAMES_IN_DATASETS - name: env - image: cogstacksystems/medcat-trainer:v2.22.1 - name: medcattrainer - volumeMounts: - - mountPath: /home/api/media - name: api-media - - mountPath: /home/api/static - name: api-static - - mountPath: /home/api/db - name: api-db - - mountPath: /home/api/db-backup - name: api-db-backup - - mountPath: /home/configs - name: medcattrainer-claim4 - - name: supervisord-config - mountPath: /etc/supervisord.conf - subPath: supervisord.conf # <-- ensures it's a file, not a folder - restartPolicy: Always - volumes: - - name: supervisord-config - configMap: - name: supervisord-config - - name: api-media - persistentVolumeClaim: - claimName: api-media - - name: api-static - persistentVolumeClaim: - claimName: api-static - - name: api-db - persistentVolumeClaim: - claimName: api-db - - name: api-db-backup - persistentVolumeClaim: - claimName: api-db-backup - - name: medcattrainer-claim4 - persistentVolumeClaim: - claimName: medcattrainer-claim4 - - name: medcattrainer-claim5 - persistentVolumeClaim: - claimName: medcattrainer-claim5 diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-service.yaml deleted file mode 100644 index 0300133..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/medcattrainer-service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: medcattrainer - name: medcattrainer -spec: - ports: - - name: "8000" - port: 8000 - targetPort: 8000 - selector: - io.kompose.service: medcattrainer diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml deleted file mode 100644 index 4832a8a..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-configmap.yaml +++ /dev/null @@ -1,172 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: nginx-config -data: - nginx.conf: | - # Configuration File - Nginx Server Configs - # http://nginx.org/en/docs/dirindex.html - - # Sets the worker threads to the number of CPU cores available in the system for best performance. - # Should be > the number of CPU cores. - # Maximum number of connections = worker_processes * worker_connections - worker_processes auto; - - # Maximum number of open files per worker process. - # Should be > worker_connections. - worker_rlimit_nofile 8192; - - events { - # If you need more connections than this, you start optimizing your OS. - # That's probably the point at which you hire people who are smarter than you as this is *a lot* of requests. - # Should be < worker_rlimit_nofile. - worker_connections 8005; - } - - # Log errors and warnings to this file - # This is only used when you don't override it on a server{} level - #error_log logs/error.log warn; - - # The file storing the process ID of the main process - pid /var/run/nginx.pid; - - http { - - # Hide nginx version information. - server_tokens off; - - # Specify MIME types for files. - include mime.types; - default_type application/octet-stream; - - # Update charset_types to match updated mime.types. - # text/html is always included by charset module. - charset_types text/css text/plain text/vnd.wap.wml application/javascript application/json application/rss+xml application/xml; - - # Include $http_x_forwarded_for within default format used in log files - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - # Log access to this file - # This is only used when you don't override it on a server{} level - #access_log logs/access.log main; - - # How long to allow each connection to stay idle. - # Longer values are better for each individual client, particularly for SSL, - # but means that worker connections are tied up longer. - keepalive_timeout 3000s; - - # Timeouts - proxy_connect_timeout 3000; - proxy_send_timeout 3000; - proxy_read_timeout 3000; - send_timeout 3000; - - # increase client body size - Model packs can be over 3G.s - client_max_body_size 4000M; - # Speed up file transfers by using sendfile() to copy directly - # between descriptors rather than using read()/write(). - # For performance reasons, on FreeBSD systems w/ ZFS - # this option should be disabled as ZFS's ARC caches - # frequently used files in RAM by default. - sendfile on; - - # Don't send out partial frames; this increases throughput - # since TCP frames are filled up before being sent out. - tcp_nopush on; - - # Enable gzip compression. - gzip on; - - # Compression level (1-9). - # 5 is a perfect compromise between size and CPU usage, offering about - # 75% reduction for most ASCII files (almost identical to level 9). - gzip_comp_level 5; - - # Don't compress anything that's already small and unlikely to shrink much - # if at all (the default is 20 bytes, which is bad as that usually leads to - # larger files after gzipping). - gzip_min_length 256; - - # Compress data even for clients that are connecting to us via proxies, - # identified by the "Via" header (required for CloudFront). - gzip_proxied any; - - # Tell proxies to cache both the gzipped and regular version of a resource - # whenever the client's Accept-Encoding capabilities header varies; - # Avoids the issue where a non-gzip capable client (which is extremely rare - # today) would display gibberish if their proxy gave them the gzipped version. - gzip_vary on; - - # Compress all output labeled with one of the following MIME-types. - gzip_types - application/atom+xml - application/javascript - application/json - application/ld+json - application/manifest+json - application/rss+xml - application/vnd.geo+json - application/vnd.ms-fontobject - application/x-font-ttf - application/x-web-app-manifest+json - application/xhtml+xml - application/xml - font/opentype - image/bmp - image/svg+xml - image/x-icon - text/cache-manifest - text/css - text/plain - text/vcard - text/vnd.rim.location.xloc - text/vtt - text/x-component - text/x-cross-domain-policy; - # text/html is always compressed by gzip module - - # This should be turned on if you are going to have pre-compressed copies (.gz) of - # static files available. If not it should be left off as it will cause extra I/O - # for the check. It is best if you enable this in a location{} block for - # a specific directory, or on an individual server{} level. - # gzip_static on; - - # Include files in the sites-enabled folder. server{} configuration files should be - # placed in the sites-available folder, and then the configuration should be enabled - # by creating a symlink to it in the sites-enabled folder. - # See doc/sites-enabled.md for more info. - include sites-enabled/*; - } - sitesenabled.medcattrainer: | - server { - listen 8000; - server_name localhost; - charset utf-8; - large_client_header_buffers 4 32k; - - location /static { - alias /home/api/static; - } - - location /media { - alias /home/api/media; - } - - location /api/concepts/ { - proxy_pass http://medcat-trainer-solr:8983/solr/; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - } - - location / { - proxy_pass http://medcattrainer:8000; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - } - - } - diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-deployment.yaml deleted file mode 100644 index 00b0402..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-deployment.yaml +++ /dev/null @@ -1,155 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: nginx - name: nginx -spec: - replicas: 1 - selector: - matchLabels: - io.kompose.service: nginx - strategy: - type: Recreate - template: - metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: nginx - spec: - containers: - - env: - - name: CONCEPT_SEARCH_SERVICE_HOST - valueFrom: - configMapKeyRef: - key: CONCEPT_SEARCH_SERVICE_HOST - name: env - - name: CONCEPT_SEARCH_SERVICE_PORT - valueFrom: - configMapKeyRef: - key: CONCEPT_SEARCH_SERVICE_PORT - name: env - - name: CSRF_TRUSTED_ORIGINS - valueFrom: - configMapKeyRef: - key: CSRF_TRUSTED_ORIGINS - name: env - - name: DB_BACKUP_DIR - valueFrom: - configMapKeyRef: - key: DB_BACKUP_DIR - name: env - - name: DB_DIR - valueFrom: - configMapKeyRef: - key: DB_DIR - name: env - - name: DB_PATH - valueFrom: - configMapKeyRef: - key: DB_PATH - name: env - - name: DEBUG - valueFrom: - configMapKeyRef: - key: DEBUG - name: env - - name: EMAIL_HOST - valueFrom: - configMapKeyRef: - key: EMAIL_HOST - name: env - - name: EMAIL_PASS - valueFrom: - configMapKeyRef: - key: EMAIL_PASS - name: env - - name: EMAIL_PORT - valueFrom: - configMapKeyRef: - key: EMAIL_PORT - name: env - - name: EMAIL_USER - valueFrom: - configMapKeyRef: - key: EMAIL_USER - name: env - - name: ENV - valueFrom: - configMapKeyRef: - key: ENV - name: env - - name: LOAD_EXAMPLES - valueFrom: - configMapKeyRef: - key: LOAD_EXAMPLES - name: env - - name: LOAD_NUM_DOC_PAGES - valueFrom: - configMapKeyRef: - key: LOAD_NUM_DOC_PAGES - name: env - - name: MAX_DATASET_SIZE - valueFrom: - configMapKeyRef: - key: MAX_DATASET_SIZE - name: env - - name: MAX_MEDCAT_MODELS - valueFrom: - configMapKeyRef: - key: MAX_MEDCAT_MODELS - name: env - - name: MEDCAT_CONFIG_FILE - valueFrom: - configMapKeyRef: - key: MEDCAT_CONFIG_FILE - name: env - - name: OPENBLAS_NUM_THREADS - valueFrom: - configMapKeyRef: - key: OPENBLAS_NUM_THREADS - name: env - - name: RESUBMIT_ALL_ON_STARTUP - valueFrom: - configMapKeyRef: - key: RESUBMIT_ALL_ON_STARTUP - name: env - - name: UNIQUE_DOC_NAMES_IN_DATASETS - valueFrom: - configMapKeyRef: - key: UNIQUE_DOC_NAMES_IN_DATASETS - name: env - image: nginx:mainline-alpine3.22-perl - imagePullPolicy: IfNotPresent - name: nginx - ports: - - containerPort: 8000 - protocol: TCP - volumeMounts: - - mountPath: /home/api/media - name: api-media - - mountPath: /home/api/static - name: api-static - - name: nginx-config - mountPath: /etc/nginx/nginx.conf - subPath: nginx.conf - - name: nginx-config - mountPath: /etc/nginx/sites-enabled/medcattrainer - subPath: sitesenabled.medcattrainer - restartPolicy: Always - volumes: - - name: nginx-config - configMap: - name: nginx-config - - name: api-media - persistentVolumeClaim: - claimName: api-media - - name: api-static - persistentVolumeClaim: - claimName: api-static - diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-service.yaml deleted file mode 100644 index 7238b1c..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/nginx-service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: nginx - name: nginx -spec: - ports: - - name: "8001" - port: 8000 - targetPort: 8000 - selector: - io.kompose.service: nginx diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-data-persistentvolumeclaim.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-data-persistentvolumeclaim.yaml deleted file mode 100644 index 1b3f00e..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-data-persistentvolumeclaim.yaml +++ /dev/null @@ -1,12 +0,0 @@ -apiVersion: v1 -kind: PersistentVolumeClaim -metadata: - labels: - io.kompose.service: solr-data - name: solr-data -spec: - accessModes: - - ReadWriteOnce - resources: - requests: - storage: 100Mi diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-deployment.yaml deleted file mode 100644 index 073fa5c..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-deployment.yaml +++ /dev/null @@ -1,141 +0,0 @@ -apiVersion: apps/v1 -kind: Deployment -metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: solr - name: solr -spec: - replicas: 1 - selector: - matchLabels: - io.kompose.service: solr - strategy: - type: Recreate - template: - metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: solr - spec: - containers: - - args: - - -cloud - env: - - name: CONCEPT_SEARCH_SERVICE_HOST - valueFrom: - configMapKeyRef: - key: CONCEPT_SEARCH_SERVICE_HOST - name: env - - name: CONCEPT_SEARCH_SERVICE_PORT - valueFrom: - configMapKeyRef: - key: CONCEPT_SEARCH_SERVICE_PORT - name: env - - name: CSRF_TRUSTED_ORIGINS - valueFrom: - configMapKeyRef: - key: CSRF_TRUSTED_ORIGINS - name: env - - name: DB_BACKUP_DIR - valueFrom: - configMapKeyRef: - key: DB_BACKUP_DIR - name: env - - name: DB_DIR - valueFrom: - configMapKeyRef: - key: DB_DIR - name: env - - name: DB_PATH - valueFrom: - configMapKeyRef: - key: DB_PATH - name: env - - name: DEBUG - valueFrom: - configMapKeyRef: - key: DEBUG - name: env - - name: EMAIL_HOST - valueFrom: - configMapKeyRef: - key: EMAIL_HOST - name: env - - name: EMAIL_PASS - valueFrom: - configMapKeyRef: - key: EMAIL_PASS - name: env - - name: EMAIL_PORT - valueFrom: - configMapKeyRef: - key: EMAIL_PORT - name: env - - name: EMAIL_USER - valueFrom: - configMapKeyRef: - key: EMAIL_USER - name: env - - name: ENV - valueFrom: - configMapKeyRef: - key: ENV - name: env - - name: LOAD_EXAMPLES - valueFrom: - configMapKeyRef: - key: LOAD_EXAMPLES - name: env - - name: LOAD_NUM_DOC_PAGES - valueFrom: - configMapKeyRef: - key: LOAD_NUM_DOC_PAGES - name: env - - name: MAX_DATASET_SIZE - valueFrom: - configMapKeyRef: - key: MAX_DATASET_SIZE - name: env - - name: MAX_MEDCAT_MODELS - valueFrom: - configMapKeyRef: - key: MAX_MEDCAT_MODELS - name: env - - name: MEDCAT_CONFIG_FILE - valueFrom: - configMapKeyRef: - key: MEDCAT_CONFIG_FILE - name: env - - name: OPENBLAS_NUM_THREADS - valueFrom: - configMapKeyRef: - key: OPENBLAS_NUM_THREADS - name: env - - name: RESUBMIT_ALL_ON_STARTUP - valueFrom: - configMapKeyRef: - key: RESUBMIT_ALL_ON_STARTUP - name: env - - name: UNIQUE_DOC_NAMES_IN_DATASETS - valueFrom: - configMapKeyRef: - key: UNIQUE_DOC_NAMES_IN_DATASETS - name: env - image: solr:8 - name: mct-solr - ports: - - containerPort: 8983 - protocol: TCP - volumeMounts: - - mountPath: /var/solr - name: solr-data - restartPolicy: Always - volumes: - - name: solr-data - persistentVolumeClaim: - claimName: solr-data diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-service.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-service.yaml deleted file mode 100644 index 4b7b5b9..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/solr-service.yaml +++ /dev/null @@ -1,16 +0,0 @@ -apiVersion: v1 -kind: Service -metadata: - annotations: - kompose.cmd: kompose convert --file docker-compose.yml - kompose.version: 1.34.0 (cbf2835db) - labels: - io.kompose.service: solr - name: solr -spec: - ports: - - name: "8983" - port: 8983 - targetPort: 8983 - selector: - io.kompose.service: solr diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/supervisord-configmap.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/supervisord-configmap.yaml deleted file mode 100644 index 0e115de..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/manual_manifests/supervisord-configmap.yaml +++ /dev/null @@ -1,35 +0,0 @@ -apiVersion: v1 -kind: ConfigMap -metadata: - name: supervisord-config -data: - supervisord.conf: | - [supervisord] - nodaemon=true - user=root - logfile=/var/log/supervisord.log - pidfile=/var/run/supervisord.pid - - [program:medcattrainer] - command=sh -c "exec /home/scripts/run.sh 2>&1 | sed 's/^/[medcattrainer] /'" - stdout_logfile=/dev/stdout - stdout_logfile_maxbytes=0 - stderr_logfile=/dev/stderr - stderr_logfile_maxbytes=0 - autorestart=true - - [program:bg-process] - command=sh -c "exec /home/scripts/run-bg-process.sh 2>&1 | sed 's/^/[bg-process] /'" - stdout_logfile=/dev/stdout - stdout_logfile_maxbytes=0 - stderr_logfile=/dev/stderr - stderr_logfile_maxbytes=0 - autorestart=true - - [program:db-backup] - command=sh -c "exec cron -f -l 2 2>&1 | sed 's/^/[db-backup] /'" - stdout_logfile=/dev/stdout - stdout_logfile_maxbytes=0 - stderr_logfile=/dev/stderr - stderr_logfile_maxbytes=0 - autorestart=true \ No newline at end of file diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/original/docker-compose.yml b/deployment/kubernetes/charts/medcat-trainer-helm/original/docker-compose.yml deleted file mode 100644 index 091bef4..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/original/docker-compose.yml +++ /dev/null @@ -1,58 +0,0 @@ -# Default compose yml file - uses latest build of MedCATtrainer services. Default passwords and example -# projects are not used. - -services: - # medattrainer services - medcattrainer: - image: cogstacksystems/medcat-trainer:v2.22.1 - restart: always - volumes: - - api-media:/home/api/media - - api-static:/home/api/static - - api-db:/home/api/db - - api-db-backup:/home/api/db-backup - - ./configs:/home/configs - - ./supervisord.conf:/etc/supervisord.conf - env_file: - - ./envs/env - environment: - - MCT_VERSION=v2.22.1 - ports: - - 8000:8000 - command: /usr/bin/supervisord -c /etc/supervisord.conf - - nginx: - image: nginx - restart: always - volumes: - - api-media:/home/api/media - - api-static:/home/api/static - - ./nginx/nginx.conf:/etc/nginx/nginx.conf - - ./nginx/sites-enabled/:/etc/nginx/sites-enabled - env_file: - - ./envs/env - ports: - - ${MCTRAINER_PORT:-8001}:8000 - depends_on: - - medcattrainer - - solr - - solr: - container_name: mct_solr - image: solr:8 - restart: always - env_file: - - ./envs/env - ports: - - ${SOLR_PORT:-8983}:8983 - volumes: - - solr-data:/var/solr - command: - - -cloud - -volumes: - api-media: - api-static: - api-db: - api-db-backup: - solr-data: diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/original/envs/env b/deployment/kubernetes/charts/medcat-trainer-helm/original/envs/env deleted file mode 100644 index b083d1e..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/original/envs/env +++ /dev/null @@ -1,46 +0,0 @@ -### Required for later numpy versions -OPENBLAS_NUM_THREADS=1 - -### MedCAT cfg ### -MEDCAT_CONFIG_FILE=/home/configs/base.txt -# number of MedCAT models that can be cached, run in bg processes at any one time -MAX_MEDCAT_MODELS=2 - -### Deployment Realm ### -ENV=non-prod - -# Complete once this is deployed -CSRF_TRUSTED_ORIGINS= - -### Django debug setting - to live-reload etc. ### -DEBUG=1 - -### Load example CDB, Vocab ### -LOAD_EXAMPLES=1 - -### Dataset conf ### -UNIQUE_DOC_NAMES_IN_DATASETS=True -MAX_DATASET_SIZE=10000 - -### Solr Concept Search Conf ### -CONCEPT_SEARCH_SERVICE_HOST=solr -CONCEPT_SEARCH_SERVICE_PORT=8983 - -### DB backup dir ### -# volume mount location, default docker host system volume location, this might be different in /etc/docker/daemon.json -DB_DIR=/home/api/db -# currently only supports sqlite3 dbs -DB_PATH=${DB_DIR}/db.sqlite3 -DB_BACKUP_DIR=/home/api/db-backup - -# Resubmit all on startup -RESUBMIT_ALL_ON_STARTUP=0 - -# Front end env vars -LOAD_NUM_DOC_PAGES=10 - -# SMTP email settings - when settings are configured go to webapp/frontend/.env and set VITE_APP_EMAIL to 1 -EMAIL_USER=example@cogstack.org -EMAIL_PASS="to be changed" -EMAIL_HOST=mail.cogstack.org -EMAIL_PORT=465 diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/original/nginx.conf b/deployment/kubernetes/charts/medcat-trainer-helm/original/nginx.conf deleted file mode 100644 index 1c8256a..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/original/nginx.conf +++ /dev/null @@ -1,135 +0,0 @@ -# Configuration File - Nginx Server Configs -# http://nginx.org/en/docs/dirindex.html - -# Sets the worker threads to the number of CPU cores available in the system for best performance. -# Should be > the number of CPU cores. -# Maximum number of connections = worker_processes * worker_connections -worker_processes auto; - -# Maximum number of open files per worker process. -# Should be > worker_connections. -worker_rlimit_nofile 8192; - -events { - # If you need more connections than this, you start optimizing your OS. - # That's probably the point at which you hire people who are smarter than you as this is *a lot* of requests. - # Should be < worker_rlimit_nofile. - worker_connections 8005; -} - -# Log errors and warnings to this file -# This is only used when you don't override it on a server{} level -#error_log logs/error.log warn; - -# The file storing the process ID of the main process -pid /var/run/nginx.pid; - -http { - - # Hide nginx version information. - server_tokens off; - - # Specify MIME types for files. - include mime.types; - default_type application/octet-stream; - - # Update charset_types to match updated mime.types. - # text/html is always included by charset module. - charset_types text/css text/plain text/vnd.wap.wml application/javascript application/json application/rss+xml application/xml; - - # Include $http_x_forwarded_for within default format used in log files - log_format main '$remote_addr - $remote_user [$time_local] "$request" ' - '$status $body_bytes_sent "$http_referer" ' - '"$http_user_agent" "$http_x_forwarded_for"'; - - # Log access to this file - # This is only used when you don't override it on a server{} level - #access_log logs/access.log main; - - # How long to allow each connection to stay idle. - # Longer values are better for each individual client, particularly for SSL, - # but means that worker connections are tied up longer. - keepalive_timeout 3000s; - - # Timeouts - proxy_connect_timeout 3000; - proxy_send_timeout 3000; - proxy_read_timeout 3000; - send_timeout 3000; - - # increase client body size - Model packs can be over 3G.s - client_max_body_size 4000M; - # Speed up file transfers by using sendfile() to copy directly - # between descriptors rather than using read()/write(). - # For performance reasons, on FreeBSD systems w/ ZFS - # this option should be disabled as ZFS's ARC caches - # frequently used files in RAM by default. - sendfile on; - - # Don't send out partial frames; this increases throughput - # since TCP frames are filled up before being sent out. - tcp_nopush on; - - # Enable gzip compression. - gzip on; - - # Compression level (1-9). - # 5 is a perfect compromise between size and CPU usage, offering about - # 75% reduction for most ASCII files (almost identical to level 9). - gzip_comp_level 5; - - # Don't compress anything that's already small and unlikely to shrink much - # if at all (the default is 20 bytes, which is bad as that usually leads to - # larger files after gzipping). - gzip_min_length 256; - - # Compress data even for clients that are connecting to us via proxies, - # identified by the "Via" header (required for CloudFront). - gzip_proxied any; - - # Tell proxies to cache both the gzipped and regular version of a resource - # whenever the client's Accept-Encoding capabilities header varies; - # Avoids the issue where a non-gzip capable client (which is extremely rare - # today) would display gibberish if their proxy gave them the gzipped version. - gzip_vary on; - - # Compress all output labeled with one of the following MIME-types. - gzip_types - application/atom+xml - application/javascript - application/json - application/ld+json - application/manifest+json - application/rss+xml - application/vnd.geo+json - application/vnd.ms-fontobject - application/x-font-ttf - application/x-web-app-manifest+json - application/xhtml+xml - application/xml - font/opentype - image/bmp - image/svg+xml - image/x-icon - text/cache-manifest - text/css - text/plain - text/vcard - text/vnd.rim.location.xloc - text/vtt - text/x-component - text/x-cross-domain-policy; - # text/html is always compressed by gzip module - - # This should be turned on if you are going to have pre-compressed copies (.gz) of - # static files available. If not it should be left off as it will cause extra I/O - # for the check. It is best if you enable this in a location{} block for - # a specific directory, or on an individual server{} level. - # gzip_static on; - - # Include files in the sites-enabled folder. server{} configuration files should be - # placed in the sites-available folder, and then the configuration should be enabled - # by creating a symlink to it in the sites-enabled folder. - # See doc/sites-enabled.md for more info. - include sites-enabled/*; -} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/original/sites-enabled/medcattrainer b/deployment/kubernetes/charts/medcat-trainer-helm/original/sites-enabled/medcattrainer deleted file mode 100644 index 88b4c81..0000000 --- a/deployment/kubernetes/charts/medcat-trainer-helm/original/sites-enabled/medcattrainer +++ /dev/null @@ -1,30 +0,0 @@ -server { - listen 8000; - server_name localhost; - charset utf-8; - large_client_header_buffers 4 32k; - - location /static { - alias /home/api/static; - } - - location /media { - alias /home/api/media; - } - - location /api/concepts/ { - proxy_pass http://mct_solr:8983/solr/; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - } - - location / { - proxy_pass http://medcattrainer:8000; - proxy_set_header Host $host; - proxy_set_header X-Real-IP $remote_addr; - proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; - } - -} - diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml index 86e7bab..7836be1 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml @@ -20,7 +20,7 @@ nginxImage: # Add any environment variables here that should be set in the medcat-trainer container env: - CSRF_TRUSTED_ORIGINS: "http://localhost:8000" + CSRF_TRUSTED_ORIGINS: "http://localhost:8080" DEBUG: "1" EMAIL_HOST: "mail.cogstack.org" EMAIL_PASS: "to-be-changed" @@ -56,6 +56,7 @@ postgresql: persistence: # Size of the PVC for the postgres database size: 500Mi + persistence: media: # Size of PVC for files like model packs and other media downloaded by medcat trainer @@ -70,7 +71,6 @@ persistence: backupDbSize: 300Mi storageClassName: "" - # MedCAT config as described here: https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py medcatConfig: | cat.linking.optim = {'type': 'standard', 'lr': 0.1} From 5ca779a6d64fdbaafc0d89becfadb46a954ccae1 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 11 Sep 2025 09:12:59 +0000 Subject: [PATCH 11/14] ops: Created Medcat trainer helm. Add github action --- .github/workflows/kubernetes-charts-build.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/kubernetes-charts-build.yaml b/.github/workflows/kubernetes-charts-build.yaml index 26a576f..9c43a49 100644 --- a/.github/workflows/kubernetes-charts-build.yaml +++ b/.github/workflows/kubernetes-charts-build.yaml @@ -72,7 +72,7 @@ jobs: - name: Package Helm Charts run: | helm package ./charts/medcat-service-helm --version $CHART_VERSION - helm package ./charts/medcat-trainer-helm --version $CHART_VERSION + helm package ./charts/medcat-trainer-helm --version $CHART_VERSION --dependency-update - name: Helm OCI login to Docker Hub run: helm registry login registry-1.docker.io -u ${{ secrets.DOCKERHUB_USERNAME }} -p ${{ secrets.DOCKERHUB_TOKEN }} From c83483fca9eab70b64c50a73af0aacc775b72cc1 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 11 Sep 2025 09:14:43 +0000 Subject: [PATCH 12/14] ops: Created Medcat trainer helm. Add helm test --- .../medcat-trainer-helm/templates/tests/test-connection.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml index cf3217b..8a9f4cb 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/tests/test-connection.yaml @@ -11,5 +11,5 @@ spec: - name: wget image: busybox command: ['wget'] - args: ['{{ include "medcat-trainer-helm.fullname" . }}-nginx:{{ .Values.service.port }}/healthz', '-U helm-test {{ .Chart.Name }}-v{{ .Chart.Version }}'] + args: ['{{ include "medcat-trainer-helm.fullname" . }}-nginx:{{ .Values.service.port }}/nginx/health/live', '-U helm-test {{ .Chart.Name }}-v{{ .Chart.Version }}'] restartPolicy: Never From b07cb0daf8e136821ed37b7ada9024bff9badc31 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 11 Sep 2025 11:05:08 +0000 Subject: [PATCH 13/14] ops: Created Medcat trainer helm. Add github action. Cleanup --- .../templates/medcat-trainer-deployment.yaml | 18 +++++++++++------- .../templates/nginx-deployment.yaml | 2 +- .../charts/medcat-trainer-helm/values.yaml | 10 +++++----- deployment/kubernetes/local_dev_startup.sh | 2 ++ 4 files changed, 19 insertions(+), 13 deletions(-) diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml index 2a4112d..03cf3cc 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/medcat-trainer-deployment.yaml @@ -36,7 +36,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} containers: - - name: {{ .Chart.Name }} + - name: medcat-trainer {{- with .Values.securityContext }} securityContext: {{- toYaml . | nindent 12 }} @@ -77,20 +77,22 @@ spec: {{- toYaml . | nindent 12 }} {{- end }} volumeMounts: + - mountPath: /home/configs/base.txt + name: medcat-trainer-config + subPath: medcat-base.txt + - mountPath: /etc/supervisord.conf + name: medcat-trainer-config + subPath: supervisord.conf - mountPath: /home/api/media name: api-media - mountPath: /home/api/static name: api-static + {{- if eq .Values.DB_ENGINE "sqlite3" }} - mountPath: /home/api/db name: api-db - mountPath: /home/api/db-backup name: api-db-backup - - mountPath: /home/configs/base.txt - name: medcat-trainer-config - subPath: medcat-base.txt - - mountPath: /etc/supervisord.conf - name: medcat-trainer-config - subPath: supervisord.conf + {{- end }} {{- with .Values.volumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} @@ -104,12 +106,14 @@ spec: - name: api-static persistentVolumeClaim: claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-static + {{- if eq .Values.DB_ENGINE "sqlite3" }} - name: api-db persistentVolumeClaim: claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-db - name: api-db-backup persistentVolumeClaim: claimName: {{ include "medcat-trainer-helm.fullname" . }}-api-db-backup + {{- end }} {{- with .Values.volumes }} {{- toYaml . | nindent 8 }} {{- end }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml index c721c30..daecee0 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/templates/nginx-deployment.yaml @@ -36,7 +36,7 @@ spec: {{- toYaml . | nindent 8 }} {{- end }} containers: - - name: {{ .Chart.Name }} + - name: nginx {{- with .Values.securityContext }} securityContext: {{- toYaml . | nindent 12 }} diff --git a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml index 7836be1..845e579 100644 --- a/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml +++ b/deployment/kubernetes/charts/medcat-trainer-helm/values.yaml @@ -4,7 +4,7 @@ # This will set the replicaset count more information can be found here: https://kubernetes.io/docs/concepts/workloads/controllers/replicaset/ replicaCount: 1 -nginxReplicaCount: 3 +nginxReplicaCount: 1 # This sets the container image more information can be found here: https://kubernetes.io/docs/concepts/containers/images/ image: @@ -89,11 +89,11 @@ medcatConfig: | cat.general.spacy_model = 'en_core_web_md' solr: - # replicaCount: 1 - # collectionShards: 1 - # collectionReplicas: 1 + replicaCount: 1 + collectionShards: 1 + collectionReplicas: 1 zookeeper: - # replicaCount: 1 + replicaCount: 1 persistence: size: 1Gi persistence: diff --git a/deployment/kubernetes/local_dev_startup.sh b/deployment/kubernetes/local_dev_startup.sh index 7e876cb..6347bb1 100644 --- a/deployment/kubernetes/local_dev_startup.sh +++ b/deployment/kubernetes/local_dev_startup.sh @@ -23,3 +23,5 @@ helm test medcat-service --logs helm upgrade my-test ./medcat-trainer-helm --install --recreate-pods --wait --timeout 5m0s # Install if it doesnt already exist, else upgrade # kubectl port-forward svc/medcat-trainer-solr 8983:8983 + +## helm install trainer-registry oci://registry-1.docker.io/cogstacksystems/medcat-trainer-helm --wait --timeout 5m0s From d3f17f2c79ca7b09f23137fce96cfaf27ae707a3 Mon Sep 17 00:00:00 2001 From: alhendrickson <159636032+alhendrickson@users.noreply.github.com.> Date: Thu, 11 Sep 2025 11:35:08 +0000 Subject: [PATCH 14/14] ops: Created Medcat trainer helm. Add docs --- .../platform/deployment/helm/charts/_index.md | 3 ++ .../helm/charts/medcat-trainer-helm.md | 49 +++++++++++++++++++ 2 files changed, 52 insertions(+) create mode 100644 docs/platform/deployment/helm/charts/medcat-trainer-helm.md diff --git a/docs/platform/deployment/helm/charts/_index.md b/docs/platform/deployment/helm/charts/_index.md index fe202c8..84810a0 100644 --- a/docs/platform/deployment/helm/charts/_index.md +++ b/docs/platform/deployment/helm/charts/_index.md @@ -6,10 +6,13 @@ The Helm charts for CogStack are published to Docker Hub, which is an OCI-compli - **MedCAT Service:** https://hub.docker.com/r/cogstacksystems/medcat-service-helm +- **MedCAT Trainer:** + https://hub.docker.com/r/cogstacksystems/medcat-trainer-helm ```{toctree} :maxdepth: 1 medcat-service-helm +medcat-trainer-helm ``` ## Chart Publishing diff --git a/docs/platform/deployment/helm/charts/medcat-trainer-helm.md b/docs/platform/deployment/helm/charts/medcat-trainer-helm.md new file mode 100644 index 0000000..030b943 --- /dev/null +++ b/docs/platform/deployment/helm/charts/medcat-trainer-helm.md @@ -0,0 +1,49 @@ +# MedCAT Trainer Helm Chart + +This Helm chart deploys MedCAT Trainer and infrastructure to a Kubernetes cluster. + +By default the chart will: + +- Run MedCAT Trainer Django server +- Run NGINX for static site hosting and routing +- Run a SOLR and Zookeeper cluster for the Concept DB +- Run a Postgres database for persistence + + +## Installation + +```sh +helm install my-medcat-trainer oci://registry-1.docker.io/cogstacksystems/medcat-trainer-helm +``` + +## Configuration + +See these values for common configurations to change: + +| Setting |description | +| -------- | -------- | +| `env` | Environment variables as defined in the [MedCAT Trainer docs](https://docs.cogstack.org/projects/medcat-trainer/en/latest/installation.html). | +|`medcatConfig`|MedCAT config file as described [here](https://github.com/CogStack/cogstack-nlp/blob/main/medcat-v2/medcat/config/config.py)| +| `env.CSRF_TRUSTED_ORIGINS` | The Host and Port to access the application on | + + +### Use Sqlite instead of Postgres + +Sqlite can be used for smaller single instance deployments + +Set these values: + +```yaml +DB_ENGINE: "sqlite3" + +postgresql: + enabled: false +``` + +## Missing features +These features are not yet existing but to be added in future: +- Use a pre existing postgres db +- Use a pre existing SOLR instance +- Migrate from supervisord to standalone deployment for background tasks for better scaling +- Support SOLR authentication from medcat trainer +- Support passing DB OPTIONS to medcat trainer for use in cloud environments