diff --git a/.github/workflows/medcat-trainer_ci.yml b/.github/workflows/medcat-trainer_ci.yml index ed964c66f..8e11de07c 100644 --- a/.github/workflows/medcat-trainer_ci.yml +++ b/.github/workflows/medcat-trainer_ci.yml @@ -3,8 +3,8 @@ name: medcat-trainer ci-build on: pull_request: paths: - - 'medcat-trainer/**' - - '.github/workflows/medcat-trainer**' + - "medcat-trainer/**" + - ".github/workflows/medcat-trainer**" defaults: run: @@ -23,7 +23,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v6 with: - python-version: '3.10' + python-version: "3.10" - name: Install dependencies run: | @@ -45,7 +45,6 @@ jobs: cd client python -m build - test-frontend: runs-on: ubuntu-latest steps: @@ -55,25 +54,67 @@ jobs: - name: Set up Node.js uses: actions/setup-node@v6 with: - node-version: '20' + node-version: "20" - name: Install dependencies run: | - cd webapp/frontend - npm ci + cd webapp/frontend + npm ci - name: Run frontend tests run: | cd webapp/frontend npm run test:unit - + test-backend: + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v5 + with: + ref: ${{ github.ref }} + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.11" + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential + + - name: Install Rust + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + cd webapp + pip install -r requirements.txt + + - name: Download spaCy model + run: | + python -m spacy download en_core_web_md + + - name: Run Django tests + env: + DB_ENGINE: sqlite3 + SECRET_KEY: test-secret-key + DEBUG: 1 + run: | + cd webapp/api + python manage.py test + # Build and test webapp container build-and-push: runs-on: ubuntu-latest - needs: + needs: - test-client - test-frontend + - test-backend steps: - name: Checkout main uses: actions/checkout@v5 diff --git a/.github/workflows/medcat-trainer_qa.yml b/.github/workflows/medcat-trainer_qa.yml index c5823aaa1..e90579a1f 100644 --- a/.github/workflows/medcat-trainer_qa.yml +++ b/.github/workflows/medcat-trainer_qa.yml @@ -2,7 +2,7 @@ name: medcat-trainer qa-build on: push: - branches: [ main ] + branches: [main] permissions: id-token: write @@ -19,12 +19,12 @@ jobs: - name: Checkout main uses: actions/checkout@v5 with: - ref: 'main' + ref: "main" - name: Set up Python uses: actions/setup-python@v6 with: - python-version: '3.10' + python-version: "3.10" - name: Install dependencies run: | @@ -64,29 +64,72 @@ jobs: - name: Set up Node.js uses: actions/setup-node@v6 with: - node-version: '20' + node-version: "20" - name: Install dependencies run: | - cd webapp/frontend - npm ci + cd webapp/frontend + npm ci - name: Run frontend tests run: | cd webapp/frontend npm run test:unit + test-backend: + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v5 + with: + ref: "main" + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.11" + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential + + - name: Install Rust + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + cd webapp + pip install -r requirements.txt + + - name: Download spaCy model + run: | + python -m spacy download en_core_web_md + + - name: Run Django tests + env: + DB_ENGINE: sqlite3 + SECRET_KEY: test-secret-key + DEBUG: 1 + run: | + cd webapp/api + python manage.py test + # Build and test webapp container build-and-push: runs-on: ubuntu-latest - needs: + needs: - test-client - test-frontend + - test-backend steps: - name: Checkout main uses: actions/checkout@v5 with: - ref: 'main' + ref: "main" - name: Build run: | diff --git a/.github/workflows/medcat-trainer_release.yml b/.github/workflows/medcat-trainer_release.yml index b0e9d5d13..49afac561 100644 --- a/.github/workflows/medcat-trainer_release.yml +++ b/.github/workflows/medcat-trainer_release.yml @@ -3,7 +3,7 @@ name: medcat-trainer release-build on: push: tags: - - 'medcat-trainer/v*.*.*' + - "medcat-trainer/v*.*.*" permissions: id-token: write @@ -29,7 +29,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v6 with: - python-version: '3.10' + python-version: "3.10" - name: Install dependencies run: | @@ -57,10 +57,78 @@ jobs: with: packages_dir: medcat-trainer/client/dist + test-frontend: + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v5 + with: + ref: "main" + + - name: Set up Node.js + uses: actions/setup-node@v6 + with: + node-version: "20" + + - name: Install dependencies + run: | + cd webapp/frontend + npm ci + + - name: Run frontend tests + run: | + cd webapp/frontend + npm run test:unit + + test-backend: + runs-on: ubuntu-latest + steps: + - name: Checkout main + uses: actions/checkout@v5 + with: + ref: "main" + + - name: Set up Python + uses: actions/setup-python@v6 + with: + python-version: "3.11" + + - name: Install system dependencies + run: | + sudo apt-get update + sudo apt-get install -y build-essential + + - name: Install Rust + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | sh -s -- -y + echo "$HOME/.cargo/bin" >> $GITHUB_PATH + + - name: Install Python dependencies + run: | + python -m pip install --upgrade pip + cd webapp + pip install -r requirements.txt + + - name: Download spaCy model + run: | + python -m spacy download en_core_web_md + + - name: Run Django tests + env: + DB_ENGINE: sqlite3 + SECRET_KEY: test-secret-key + DEBUG: 1 + run: | + cd webapp/api + python manage.py test + # Build and test webapp container build-and-push: runs-on: ubuntu-latest - needs: test-and-publish-client + needs: + - test-and-publish-client + - test-frontend + - test-backend steps: - name: Checkout main uses: actions/checkout@v5 diff --git a/medcat-trainer/webapp/api/api/data_utils.py b/medcat-trainer/webapp/api/api/data_utils.py index 9ecde91ee..e22e5b89b 100644 --- a/medcat-trainer/webapp/api/api/data_utils.py +++ b/medcat-trainer/webapp/api/api/data_utils.py @@ -1,10 +1,12 @@ import json import logging +import os import re from collections import defaultdict from datetime import datetime from typing import Dict, List +import pandas as pd from django.contrib.auth.models import User from django.db import transaction from django.db.models import Q @@ -19,6 +21,11 @@ logger = logging.getLogger(__name__) +class InvalidParameterError(Exception): + """Exception raised when invalid parameters are provided""" + pass + + def dataset_from_file(dataset: Dataset): if '.csv' in dataset.original_file.path: df = pd.read_csv(dataset.original_file.path, on_bad_lines='error') @@ -210,8 +217,10 @@ def upload_projects_export( m_a = MetaAnnotation() m_a.annotated_entity = a # there will be at least one or more of these available. - m_a = MetaTask.objects.filter(name=task_name).first() + m_a.meta_task = MetaTask.objects.filter(name=task_name).first() m_a.validated = meta_anno['validated'] + m_a.acc = meta_anno['acc'] + m_a.meta_task_value = MetaTaskValue.objects.filter(name=meta_anno['value']).first() m_a.save() # missing acc on the model anno_to_doc_ind = {a.start_ind: a for a in annos} diff --git a/medcat-trainer/webapp/api/api/tests.py b/medcat-trainer/webapp/api/api/tests.py deleted file mode 100644 index 7ce503c2d..000000000 --- a/medcat-trainer/webapp/api/api/tests.py +++ /dev/null @@ -1,3 +0,0 @@ -from django.test import TestCase - -# Create your tests here. diff --git a/medcat-trainer/webapp/api/api/tests/__init__.py b/medcat-trainer/webapp/api/api/tests/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/medcat-trainer/webapp/api/api/tests/fixtures/example.json b/medcat-trainer/webapp/api/api/tests/fixtures/example.json new file mode 100644 index 000000000..740ce25a8 --- /dev/null +++ b/medcat-trainer/webapp/api/api/tests/fixtures/example.json @@ -0,0 +1,60 @@ +{ + "projects": [ + { + "id": 999, + "name": "SMAll_SYNT-EXAMPLE-AD", + "cuis": "", + "tuis": "", + "documents": [ + { + "id": 1787, + "name": "Doc-iotost", + "last_modified": "2025-11-11T17:02:49.377416", + "text": "Patient reports increasing forgetfulness and difficulty recalling recent events. Family members have observed changes in behaviour and personality. Cognitive assessment revealed deficits consistent with early cognitive impairment. Differential diagnosis includes Neurodegenerative dementia and vascular dementia. Further investigations, including MRI brain and neuropsychological testing, are planned to clarify the diagnosis. The patient denies any significant head trauma.", + "annotations": [ + { + "start": 265, + "end": 291, + "cui": "26929004", + "value": "Neurodegenerative dementia", + "validated": true, + "user": "synthetic_reviewer", + "meta_anns": { + "Presence": { + "name": "Presence", + "value": "True", + "acc": 1, + "validated": true + }, + "Subject": { + "name": "Subject", + "value": "Patient", + "acc": 1, + "validated": true + }, + "Time": { + "name": "Time", + "value": "Recent", + "acc": 1, + "validated": true + } + }, + "correct": true, + "deleted": false, + "alternative": false, + "killed": false, + "irrelevant": false, + "manually_created": false, + "acc": 1.0, + "id": 182294767 + } + ] + } + ], + "project_group_id": null, + "project_group_name": null, + "project_status": "A", + "project_locked": false + } + ] +} \ No newline at end of file diff --git a/medcat-trainer/webapp/api/api/tests/test_data_utils.py b/medcat-trainer/webapp/api/api/tests/test_data_utils.py new file mode 100644 index 000000000..87d230ee4 --- /dev/null +++ b/medcat-trainer/webapp/api/api/tests/test_data_utils.py @@ -0,0 +1,379 @@ +import json +import os +import tempfile +from unittest.mock import patch + +from django.contrib.auth.models import User +from django.core.files.uploadedfile import SimpleUploadedFile +from django.test import TestCase, override_settings + +from ..data_utils import upload_projects_export, InvalidParameterError +from ..models import ( + ProjectAnnotateEntities, ConceptDB, Vocabulary, ModelPack, + Dataset, Document, Entity, AnnotatedEntity, MetaTask, MetaTaskValue, + MetaAnnotation, Relation, EntityRelation +) + + +class UploadProjectsExportTestCase(TestCase): + """Test cases for upload_projects_export function""" + + def setUp(self): + """Set up test fixtures""" + # Create a test user + self.user = User.objects.create_user( + username='synthetic_reviewer', + email='test@example.com', + password='testpass123' + ) + + # Create test ConceptDB and Vocabulary with skip_load to avoid file validation + self.cdb = ConceptDB( + name='test_cdb', + cdb_file='test_cdb.dat', + use_for_training=True + ) + self.cdb.save(skip_load=True) + + self.vocab = Vocabulary( + name='test_vocab', + vocab_file='test_vocab.dat' + ) + self.vocab.save(skip_load=True) + + # Load example JSON from test fixtures + example_json_path = os.path.join( + os.path.dirname(__file__), + 'fixtures', 'example.json' + ) + with open(example_json_path, 'r') as f: + self.medcat_export = json.load(f) + + def _patch_media_root(self): + """Helper method to patch MEDIA_ROOT in data_utils module""" + from django.conf import settings + from .. import data_utils + original_media_root = data_utils.MEDIA_ROOT + data_utils.MEDIA_ROOT = settings.MEDIA_ROOT + return original_media_root + + def _restore_media_root(self, original_media_root): + """Helper method to restore MEDIA_ROOT in data_utils module""" + from .. import data_utils + data_utils.MEDIA_ROOT = original_media_root + + @override_settings(MEDIA_ROOT=tempfile.mkdtemp()) + def test_upload_projects_export_with_cdb_vocab(self): + """Test uploading projects export with CDB and Vocabulary""" + original_media_root = self._patch_media_root() + try: + # Call the function + upload_projects_export( + medcat_export=self.medcat_export, + cdb_id=str(self.cdb.id), + vocab_id=str(self.vocab.id), + modelpack_id=None + ) + + # Verify project was created + project = ProjectAnnotateEntities.objects.get( + name="SMAll_SYNT-EXAMPLE-AD IMPORTED" + ) + self.assertIsNotNone(project) + self.assertEqual(project.concept_db.id, self.cdb.id) + self.assertEqual(project.vocab.id, self.vocab.id) + + # Verify dataset was created + dataset = project.dataset + self.assertIsNotNone(dataset) + self.assertEqual(dataset.name, "SMAll_SYNT-EXAMPLE-AD IMPORTED_dataset") + + # Verify document was created + document = Document.objects.filter(dataset=dataset).first() + self.assertIsNotNone(document) + self.assertIn("Patient reports increasing forgetfulness", document.text) + + # Verify entity was created + entity = Entity.objects.get(label="26929004") + self.assertIsNotNone(entity) + + # Verify annotated entity was created + annotated_entity = AnnotatedEntity.objects.filter( + project=project, + document=document, + entity=entity + ).first() + self.assertIsNotNone(annotated_entity) + self.assertEqual(annotated_entity.value, "Neurodegenerative dementia") + self.assertEqual(annotated_entity.start_ind, 265) + self.assertEqual(annotated_entity.end_ind, 291) + self.assertEqual(annotated_entity.user, self.user) + self.assertTrue(annotated_entity.validated) + self.assertTrue(annotated_entity.correct) + self.assertFalse(annotated_entity.deleted) + self.assertFalse(annotated_entity.alternative) + self.assertFalse(annotated_entity.killed) + self.assertFalse(annotated_entity.irrelevant) + self.assertFalse(annotated_entity.manually_created) + self.assertEqual(annotated_entity.acc, 1.0) + + # Verify meta tasks were created + meta_task_presence = MetaTask.objects.get(name="Presence") + meta_task_subject = MetaTask.objects.get(name="Subject") + meta_task_time = MetaTask.objects.get(name="Time") + self.assertIsNotNone(meta_task_presence) + self.assertIsNotNone(meta_task_subject) + self.assertIsNotNone(meta_task_time) + + # Verify meta task values were created + meta_value_true = MetaTaskValue.objects.get(name="True") + meta_value_patient = MetaTaskValue.objects.get(name="Patient") + meta_value_recent = MetaTaskValue.objects.get(name="Recent") + self.assertIsNotNone(meta_value_true) + self.assertIsNotNone(meta_value_patient) + self.assertIsNotNone(meta_value_recent) + + # Verify meta annotations were created + meta_ann_presence = MetaAnnotation.objects.filter( + annotated_entity=annotated_entity, + meta_task=meta_task_presence + ).first() + self.assertIsNotNone(meta_ann_presence) + self.assertEqual(meta_ann_presence.meta_task_value, meta_value_true) + self.assertTrue(meta_ann_presence.validated) + self.assertEqual(meta_ann_presence.acc, 1) + + meta_ann_subject = MetaAnnotation.objects.filter( + annotated_entity=annotated_entity, + meta_task=meta_task_subject + ).first() + self.assertIsNotNone(meta_ann_subject) + self.assertEqual(meta_ann_subject.meta_task_value, meta_value_patient) + + meta_ann_time = MetaAnnotation.objects.filter( + annotated_entity=annotated_entity, + meta_task=meta_task_time + ).first() + self.assertIsNotNone(meta_ann_time) + self.assertEqual(meta_ann_time.meta_task_value, meta_value_recent) + finally: + self._restore_media_root(original_media_root) + + @override_settings(MEDIA_ROOT=tempfile.mkdtemp()) + @patch('api.models.CAT.attempt_unpack') + @patch('api.models.CAT.load_cdb') + @patch('api.models.CAT.load_addons') + @patch('api.models.Vocab.load') + @patch('os.path.exists') + def test_upload_projects_export_with_modelpack(self, mock_exists, mock_vocab_load, mock_load_addons, mock_cdb_load, mock_unpack): + """Test uploading projects export with ModelPack""" + original_media_root = self._patch_media_root() + try: + # Mock all file operations + mock_exists.return_value = False + mock_load_addons.return_value = [] + # Create a model pack - the save will be mocked to avoid actual file operations + from django.core.files.uploadedfile import SimpleUploadedFile + modelpack = ModelPack( + name='test_modelpack', + model_pack=SimpleUploadedFile('test_modelpack.zip', b'fake zip') + ) + # Save with mocked file operations - it will fail on file loading but that's ok + try: + modelpack.save() + except (FileNotFoundError, Exception): + # If save fails, create it directly in the database + from django.utils import timezone + ModelPack.objects.filter(name='test_modelpack').delete() + modelpack = ModelPack.objects.create( + name='test_modelpack', + model_pack='test_modelpack.zip' + ) + # Manually set the file field to avoid save() being called again + ModelPack.objects.filter(id=modelpack.id).update(model_pack='test_modelpack.zip') + modelpack.refresh_from_db() + + # Call the function + upload_projects_export( + medcat_export=self.medcat_export, + cdb_id=None, + vocab_id=None, + modelpack_id=str(modelpack.id) + ) + + # Verify project was created with modelpack + project = ProjectAnnotateEntities.objects.get( + name="SMAll_SYNT-EXAMPLE-AD IMPORTED" + ) + self.assertIsNotNone(project) + self.assertEqual(project.model_pack.id, modelpack.id) + finally: + self._restore_media_root(original_media_root) + + @override_settings(MEDIA_ROOT=tempfile.mkdtemp()) + def test_upload_projects_export_no_cdb_vocab_modelpack(self): + """Test that InvalidParameterError is raised when no cdb/vocab/modelpack provided""" + with self.assertRaises(InvalidParameterError) as context: + upload_projects_export( + medcat_export=self.medcat_export, + cdb_id=None, + vocab_id=None, + modelpack_id=None + ) + self.assertIn("No cdb, vocab, or modelpack provided", str(context.exception)) + + @override_settings(MEDIA_ROOT=tempfile.mkdtemp()) + def test_upload_projects_export_skips_empty_projects(self): + """Test that projects with no documents are skipped""" + original_media_root = self._patch_media_root() + try: + # Create export with empty project + empty_export = { + "projects": [ + { + "name": "Empty Project", + "cuis": "", + "documents": [] + } + ] + } + + upload_projects_export( + medcat_export=empty_export, + cdb_id=str(self.cdb.id), + vocab_id=str(self.vocab.id), + modelpack_id=None + ) + + # Verify no project was created + self.assertFalse( + ProjectAnnotateEntities.objects.filter(name__contains="Empty Project").exists() + ) + finally: + self._restore_media_root(original_media_root) + + @override_settings(MEDIA_ROOT=tempfile.mkdtemp()) + def test_upload_projects_export_with_custom_suffix(self): + """Test uploading with custom project name suffix""" + original_media_root = self._patch_media_root() + try: + upload_projects_export( + medcat_export=self.medcat_export, + cdb_id=str(self.cdb.id), + vocab_id=str(self.vocab.id), + modelpack_id=None, + project_name_suffix=' - CUSTOM' + ) + + # Verify project was created with custom suffix + project = ProjectAnnotateEntities.objects.get( + name="SMAll_SYNT-EXAMPLE-AD - CUSTOM" + ) + self.assertIsNotNone(project) + finally: + self._restore_media_root(original_media_root) + + @override_settings(MEDIA_ROOT=tempfile.mkdtemp()) + def test_upload_projects_export_with_members(self): + """Test uploading with members""" + original_media_root = self._patch_media_root() + try: + user2 = User.objects.create_user( + username='user2', + email='user2@example.com', + password='testpass123' + ) + + upload_projects_export( + medcat_export=self.medcat_export, + cdb_id=str(self.cdb.id), + vocab_id=str(self.vocab.id), + modelpack_id=None, + members=[str(self.user.id), str(user2.id)] + ) + + # Verify project was created with members + project = ProjectAnnotateEntities.objects.get( + name="SMAll_SYNT-EXAMPLE-AD IMPORTED" + ) + self.assertIn(self.user, project.members.all()) + self.assertIn(user2, project.members.all()) + finally: + self._restore_media_root(original_media_root) + + @override_settings(MEDIA_ROOT=tempfile.mkdtemp()) + def test_upload_projects_export_with_set_validated_docs(self): + """Test uploading with set_validated_docs=True""" + original_media_root = self._patch_media_root() + try: + upload_projects_export( + medcat_export=self.medcat_export, + cdb_id=str(self.cdb.id), + vocab_id=str(self.vocab.id), + modelpack_id=None, + set_validated_docs=True + ) + + # Verify project was created + project = ProjectAnnotateEntities.objects.get( + name="SMAll_SYNT-EXAMPLE-AD IMPORTED" + ) + dataset = project.dataset + documents = Document.objects.filter(dataset=dataset) + + # Verify all documents are in validated_documents + for doc in documents: + self.assertIn(doc, project.validated_documents.all()) + finally: + self._restore_media_root(original_media_root) + + @override_settings(MEDIA_ROOT=tempfile.mkdtemp()) + def test_upload_projects_export_with_unavailable_user(self): + """Test that unavailable users cause KeyError when trying to create annotations""" + original_media_root = self._patch_media_root() + try: + # Create export with non-existent user + export_with_unknown_user = { + "projects": [ + { + "name": "Test Project", + "cuis": "", + "documents": [ + { + "name": "Doc1", + "text": "Test text", + "annotations": [ + { + "start": 0, + "end": 4, + "cui": "C123456", + "value": "Test", + "validated": True, + "user": "nonexistent_user", + "meta_anns": {}, + "correct": True, + "deleted": False, + "alternative": False, + "killed": False, + "irrelevant": False, + "manually_created": False, + "acc": 1.0 + } + ] + } + ] + } + ] + } + + # This should raise a KeyError when trying to access the non-existent user + with self.assertRaises(KeyError): + upload_projects_export( + medcat_export=export_with_unknown_user, + cdb_id=str(self.cdb.id), + vocab_id=str(self.vocab.id), + modelpack_id=None + ) + finally: + self._restore_media_root(original_media_root) + diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py index b2e5215ae..988be0149 100644 --- a/medcat-trainer/webapp/api/api/views.py +++ b/medcat-trainer/webapp/api/api/views.py @@ -707,8 +707,8 @@ def upload_deployment(request): set_validated_docs) return Response("successfully uploaded", 200) except Exception as e: - logger.error(f"Failed to upload projects export: {str(e)}", exc_info=e) - return Response(f"Failed to upload projects export: {e.message}", 500) + logger.error(f"Failed to upload projects export: {e}", exc_info=e) + return Response(f"Failed to upload projects export: {e}", 500) @api_view(http_method_names=['GET', 'DELETE'])