From a5a6557a6b31dcb4ec17be6874054e03f811698a Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Wed, 1 Oct 2025 11:06:55 +0100
Subject: [PATCH 1/5] feat(medcat-trainer): improve client api, not importing
 empty projects

---
 medcat-trainer/client/mctclient.py          |  6 ++++--
 medcat-trainer/webapp/api/api/data_utils.py | 10 +++++++---
 medcat-trainer/webapp/api/api/views.py      | 10 +++++++---
 3 files changed, 18 insertions(+), 8 deletions(-)

diff --git a/medcat-trainer/client/mctclient.py b/medcat-trainer/client/mctclient.py
index e119c835..27da892f 100644
--- a/medcat-trainer/client/mctclient.py
+++ b/medcat-trainer/client/mctclient.py
@@ -595,7 +595,8 @@ def get_project_annos(self, projects: List[MCTProject]):
     def upload_projects_export(self, projects: Dict[str, Any],
                                cdb: Union[MCTConceptDB, str]=None,
                                vocab: Union[MCTVocab, str]=None,
-                               modelpack: Union[MCTModelPack, str]=None):
+                               modelpack: Union[MCTModelPack, str]=None,
+                               import_project_name_suffix: str=' IMPORTED'):
         """Upload Trainer export as a list of projects to a MedCATTrainer instance.
 
         Args:
@@ -612,7 +613,8 @@ def upload_projects_export(self, projects: Dict[str, Any],
             modelpack = [m for m in self.get_model_packs() if m.name == modelpack].pop()
 
         payload = {
-            'exported_projects': projects
+            'exported_projects': projects,
+            'project_name_suffix': import_project_name_suffix
         }
 
         if cdb and vocab:
diff --git a/medcat-trainer/webapp/api/api/data_utils.py b/medcat-trainer/webapp/api/api/data_utils.py
index 27d41140..7199bd0a 100644
--- a/medcat-trainer/webapp/api/api/data_utils.py
+++ b/medcat-trainer/webapp/api/api/data_utils.py
@@ -3,7 +3,7 @@
 import re
 from collections import defaultdict
 from datetime import datetime
-from typing import Dict
+from typing import Dict, List
 
 from django.contrib.auth.models import User
 from django.db import transaction
@@ -66,10 +66,14 @@ def delete_orphan_docs(dataset: Dataset):
     Document.objects.filter(dataset__id=dataset.id).delete()
 
 
-def upload_projects_export(medcat_export: Dict, cdb_id: str, vocab_id: str, modelpack_id: str):
+def upload_projects_export(medcat_export: Dict, cdb_id: str, vocab_id: str, modelpack_id: str,
+                            project_name_suffix: str=' IMPORTED'):
     for proj in medcat_export['projects']:
+        if len(proj['documents']) == 0:
+            # don't add projects with no documents
+            continue
         p = ProjectAnnotateEntities()
-        p.name = proj['name'] + ' IMPORTED'
+        p.name = f"{proj['name']}{project_name_suffix}"
         if len(proj['cuis']) > 1000:
             # store large CUI lists in a json file.
             cuis_file_name = MEDIA_ROOT + '/' + re.sub('/|\.', '_', p.name + '_cuis_file') + '.json'
diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py
index 1fe2cb99..49282f05 100644
--- a/medcat-trainer/webapp/api/api/views.py
+++ b/medcat-trainer/webapp/api/api/views.py
@@ -686,13 +686,17 @@ def upload_deployment(request):
     cdb_id = deployment_export.get('cdb_id', None)
     vocab_id = deployment_export.get('vocab_id', None)
     modelpack_id = deployment_export.get('modelpack_id', None)
+    project_name_suffix = deployment_export.get('project_name_suffix', ' IMPORTED')
 
     if all(x is None for x in [cdb_id, vocab_id, modelpack_id]):
         return Response("No cdb, vocab, or modelpack provided", 400)
 
-    upload_projects_export(deployment_upload, cdb_id, vocab_id, modelpack_id)
-    # logger.info(f'Errors encountered during previous deployment upload\n{errs}')
-    return Response("successfully uploaded", 200)
+    try:
+        upload_projects_export(deployment_upload, cdb_id, vocab_id, modelpack_id,
+                               project_name_suffix)
+        return Response("successfully uploaded", 200)
+    except Exception as e:
+        return Response(f"Failed to upload projects export: {str(e)}", 500)
 
 
 @api_view(http_method_names=['GET', 'DELETE'])

From d1aee8fc337e65ecc7a7cf7694e007ce3d44d26a Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Wed, 1 Oct 2025 11:28:57 +0100
Subject: [PATCH 2/5] fix test

---
 medcat-trainer/client/tests/test_mctclient.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/medcat-trainer/client/tests/test_mctclient.py b/medcat-trainer/client/tests/test_mctclient.py
index adea343e..e3a9bc9b 100644
--- a/medcat-trainer/client/tests/test_mctclient.py
+++ b/medcat-trainer/client/tests/test_mctclient.py
@@ -156,6 +156,7 @@ def post_side_effect(url, *args, **kwargs):
             headers=session.headers,
             json={
                 'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
                 'cdb_id': '20',
                 'vocab_id': '30'
             }
@@ -210,6 +211,7 @@ def post_side_effect(url, *args, **kwargs):
             headers=session.headers,
             json={
                 'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
                 'cdb_id': '20',
                 'vocab_id': '30'
             }
@@ -249,6 +251,7 @@ def post_side_effect(url, *args, **kwargs):
             headers=session.headers,
             json={
                 'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
                 'modelpack_id': '40'
             }
         )
@@ -297,6 +300,7 @@ def post_side_effect(url, *args, **kwargs):
             headers=session.headers,
             json={
                 'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
                 'modelpack_id': '40'
             }
         )

From d05680bb2948883aeeef86db0c75e72b72da326f Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Tue, 7 Oct 2025 01:14:11 +0100
Subject: [PATCH 3/5] feat(medcat-trainer): improve client for synthetic data
 gen validation

---
 medcat-trainer/client/mctclient.py            |  57 +++++--
 .../notebook_docs/Client_API_Tutorials.ipynb  | 159 +++++-------------
 medcat-trainer/webapp/api/api/data_utils.py   |  25 ++-
 ...edproject_cdb_search_filter_id_and_more.py |  42 +++++
 medcat-trainer/webapp/api/api/models.py       |  14 +-
 medcat-trainer/webapp/api/api/views.py        |  15 +-
 6 files changed, 169 insertions(+), 143 deletions(-)
 create mode 100644 medcat-trainer/webapp/api/api/migrations/0092_exportedproject_cdb_search_filter_id_and_more.py

diff --git a/medcat-trainer/client/mctclient.py b/medcat-trainer/client/mctclient.py
index 27da892f..7a4285a5 100644
--- a/medcat-trainer/client/mctclient.py
+++ b/medcat-trainer/client/mctclient.py
@@ -65,8 +65,6 @@ class MCTConceptDB(MCTObj):
 
     def __post_init__(self):
         if self.name is not None:
-            if not self.name[0].islower():
-                raise ValueError("Name must start with a lowercase letter")
             if not self.name.replace('_', '').replace('-', '').isalnum():
                 raise ValueError("Name must contain only alphanumeric characters and underscores")
 
@@ -90,26 +88,24 @@ def __str__(self):
 
 
 @dataclass
-class MCTModelPack(MCTObj):
-    """A model pack in the MedCATTrainer instance.
+class MCTMetaTask(MCTObj):
+    """A meta task in the MedCATTrainer instance.
 
     Attributes:
-        name (str): The name of the model pack.
-        model_pack_zip (str): The path to the model pack zip file, should be a <modelpack_name>.zip file.
+        name (str): The name of the meta task.
     """
     name: str=None
-    model_pack_zip: str=None
 
     def __str__(self):
-        return f'{self.id} : {self.name} \t {self. model_pack_zip}'
+        return f'{self.id} : {self.name}'
 
 
 @dataclass
-class MCTMetaTask(MCTObj):
-    """A meta task in the MedCATTrainer instance.
+class MCTRelTask(MCTObj):
+    """A relation extraction task in the MedCATTrainer instance.
 
     Attributes:
-        name (str): The name of the meta task.
+        name (str): The name of the relation extraction task.
     """
     name: str=None
 
@@ -118,16 +114,22 @@ def __str__(self):
 
 
 @dataclass
-class MCTRelTask(MCTObj):
-    """A relation extraction task in the MedCATTrainer instance.
+class MCTModelPack(MCTObj):
+    """A model pack in the MedCATTrainer instance.
 
     Attributes:
-        name (str): The name of the relation extraction task.
+        name (str): The name of the model pack.
+        model_pack_zip (str): The path to the model pack zip file, should be a <modelpack_name>.zip file.
     """
     name: str=None
+    model_pack_zip: str=None
+    concept_db: MCTConceptDB=None
+    vocab: MCTVocab=None
+    meta_cats: List[MCTMetaTask]=None
 
     def __str__(self):
-        return f'{self.id} : {self.name}'
+        return f'{self.id} : {self.name} \t {self. model_pack_zip}'
+
 
 
 @dataclass
@@ -520,7 +522,11 @@ def get_model_packs(self) -> List[MCTModelPack]:
             List[MCTModelPack]: A list of all MedCAT model packs in the MedCATTrainer instance
         """
         resp = json.loads(requests.get(f'{self.server}/api/modelpacks/', headers=self.headers).text)['results']
-        mct_model_packs = [MCTModelPack(id=mp['id'], name=mp['name'], model_pack_zip=mp['model_pack']) for mp in resp]
+        mct_model_packs = [MCTModelPack(id=mp['id'], name=mp['name'], model_pack_zip=mp['model_pack'],
+                                        concept_db=MCTConceptDB(id=mp['concept_db']),
+                                        vocab=MCTVocab(id=mp['vocab']),
+                                        meta_cats=[MCTMetaTask(id=mt) for mt in mp['meta_cats']])
+                            for mp in resp]
         return mct_model_packs
 
     def get_meta_tasks(self) -> List[MCTMetaTask]:
@@ -596,7 +602,10 @@ def upload_projects_export(self, projects: Dict[str, Any],
                                cdb: Union[MCTConceptDB, str]=None,
                                vocab: Union[MCTVocab, str]=None,
                                modelpack: Union[MCTModelPack, str]=None,
-                               import_project_name_suffix: str=' IMPORTED'):
+                               import_project_name_suffix: str=' IMPORTED',
+                               cdb_search_filter: Union[MCTConceptDB, str]=None,
+                               members: Union[List[MCTUser], List[str]]=None,
+                               set_validated_docs: bool=False):
         """Upload Trainer export as a list of projects to a MedCATTrainer instance.
 
         Args:
@@ -604,6 +613,10 @@ def upload_projects_export(self, projects: Dict[str, Any],
             cdb (Union[MCTConceptDB, str]): The concept database to be used in the project - CDB name or the MCTCDB Object
             vocab (Union[MCTVocab, str]): The vocabulary to be used in the project - Vocab name or the MCTVocab Object
             modelpack (Union[MCTModelPack, str]): The model pack to be used in the project - ModelPack name or the MCTModelPack Object
+            import_project_name_suffix (str): The suffix to be added to the project name
+            cdb_search_filter (Union[MCTConceptDB, str]): The concept database to be used in the project - CDB name or the MCTCDB Object
+            members (Union[List[MCTUser], List[str]]): The annotators for the project - List of MCTUser objects or list of user names
+            set_validated_docs (bool): Whether to set the validated documents, e.g. their annotation submit status.
         """
         if isinstance(cdb, str):
             cdb = [c for c in self.get_concept_dbs() if c.name == cdb].pop()
@@ -611,10 +624,18 @@ def upload_projects_export(self, projects: Dict[str, Any],
             vocab = [v for v in self.get_vocabs() if v.name == vocab].pop()
         if isinstance(modelpack, str):
             modelpack = [m for m in self.get_model_packs() if m.name == modelpack].pop()
+        if isinstance(cdb_search_filter, str):
+            cdb_search_filter = [c for c in self.get_concept_dbs() if c.name == cdb_search_filter].pop()
+        if isinstance(members, str):
+            members = [m for m in self.get_users() if m.username == members].pop()
 
         payload = {
             'exported_projects': projects,
-            'project_name_suffix': import_project_name_suffix
+            'project_name_suffix': import_project_name_suffix,
+            'cdb_search_filter': cdb_search_filter.id,
+            'members': [m.id for m in members],
+            'import_project_name_suffix': import_project_name_suffix,
+            'set_validated_docs': set_validated_docs,
         }
 
         if cdb and vocab:
diff --git a/medcat-trainer/notebook_docs/Client_API_Tutorials.ipynb b/medcat-trainer/notebook_docs/Client_API_Tutorials.ipynb
index 5220262e..c3a13172 100644
--- a/medcat-trainer/notebook_docs/Client_API_Tutorials.ipynb
+++ b/medcat-trainer/notebook_docs/Client_API_Tutorials.ipynb
@@ -50,7 +50,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -63,7 +63,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -90,53 +90,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Users:\n",
-      "3 : annotator2\n",
-      "2 : annotator1\n",
-      "1 : admin\n",
-      "\n",
-      "Datasets:\n",
-      "1 : Example Dataset \t http://localhost:8001/media/Example_Dataset.csv\n",
-      "2 : Neurology Notes \t http://localhost:8001/media/neurology_notes.csv\n",
-      "3 : SG-example-docs \t http://localhost:8001/media/sg-sample-docs.csv\n",
-      "\n",
-      "Concept DBs:\n",
-      "1 : umls_cdb \t http://localhost:8001/media/cdb.dat\n",
-      "2 : snomed_cdb \t http://localhost:8001/media/snomed-cdb.dat\n",
-      "3 : snomed_2022_modelpack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/20230227__kch_gstt_trained_model_494c3717f637bb89/cdb.dat\n",
-      "8 : medcat_full_pack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/medcat_model_pack_u3fB9G5/cdb.dat\n",
-      "12 : snomed-2023-bert-metacats_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a/cdb.dat\n",
-      "13 : de_id_modelpack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/medcat_deid_trained_a7120281ebb9fc9e/cdb.dat\n",
-      "\n",
-      "Vocabularies:\n",
-      "1 : http://localhost:8001/media/vocab.dat\n",
-      "3 : http://localhost:8001/media/20230227__kch_gstt_trained_model_494c3717f637bb89/vocab.dat\n",
-      "12 : http://localhost:8001/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a/vocab.dat\n",
-      "\n",
-      "ModelPacks:\n",
-      "1 : snomed_2022_modelpack \t http://localhost:8001/media/20230227__kch_gstt_trained_model_494c3717f637bb89.zip\n",
-      "9 : snomed-2023-bert-metacats \t http://localhost:8001/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a.zip\n",
-      "10 : de-id modelpack \t http://localhost:8001/media/medcat_deid_trained_a7120281ebb9fc9e.zip\n",
-      "\n",
-      "Meta Tasks:\n",
-      "1 : Experiencer\n",
-      "2 : Presence\n",
-      "3 : Subject\n",
-      "4 : Temporality\n",
-      "5 : Time\n",
-      "\n",
-      "Relation Tasks:\n",
-      "1 : Spatial\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Get users\n",
     "users = session.get_users()\n",
@@ -304,37 +260,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Users:\n",
-      "2 : annotator1\n",
-      "1 : admin\n",
-      "\n",
-      "Datasets:\n",
-      "1 : Example Annotation Project - Model pack \t http://localhost:8001/media/cardio.csv\n",
-      "2 : Example Project - SNOMED CT All IMPORTED_dataset \t http://localhost:8001/media/Users/k1897038/projects/cogstack-nlp/medcat-trainer/webapp/api/media/Example%20Project%20-%20SNOMED%20CT%20All%20IMPORTED_dataset.csv\n",
-      "3 : Example Project - SNOMED CT All - ModelPack IMPORTED_dataset \t http://localhost:8001/media/Users/k1897038/projects/cogstack-nlp/medcat-trainer/webapp/api/media/Example%20Project%20-%20SNOMED%20CT%20All%20-%20ModelPack%20IMPORTED_dataset.csv\n",
-      "\n",
-      "Concept DBs:\n",
-      "\n",
-      "Vocabularies:\n",
-      "\n",
-      "ModelPacks:\n",
-      "1 : snomed_2023_htn_modelpack \t http://localhost:8001/media/snomed_2023_base_model_dm_htn_copd_only_f86505ba72beff08.zipv2_48299cf9ff983030.zip\n",
-      "\n",
-      "Meta Tasks:\n",
-      "1 : Presence\n",
-      "2 : Subject\n",
-      "3 : Time\n",
-      "\n",
-      "Relation Tasks:\n",
-      "1 : Spatial\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Get users\n",
     "users = session.get_users()\n",
@@ -408,15 +334,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Created project with model pack: 2 : Demo General Medical Annotation \t Annotation of neurology medical conditions \t 3 : Example Project - SNOMED CT All - ModelPack IMPORTED_dataset \t http://localhost:8001/media/Users/k1897038/projects/cogstack-nlp/medcat-trainer/webapp/api/media/Example%20Project%20-%20SNOMED%20CT%20All%20-%20ModelPack%20IMPORTED_dataset.csv\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Method 2: Create a project with a modelpack\n",
     "\n",
@@ -446,17 +364,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Downloaded annotations for 2 projects:\n",
-      "Example Project - SNOMED CT All - ModelPack IMPORTED\n",
-      "Demo General Medical Annotation\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Get all projects\n",
     "mct_projects = session.get_projects()\n",
@@ -489,17 +397,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 12,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Annotations saved to ./example_data/medical_annotations.json\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "# Save MCT export / annotations to a file\n",
     "with open(\"./example_data/medical_annotations.json\", \"w\") as f:\n",
@@ -518,7 +418,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -527,20 +427,47 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
-    "model_pack = session.get_model_packs()"
+    "model_packs = session.get_model_packs()"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [],
    "source": [
-    "session.upload_projects_export(projects, modelpack=model_pack[0])"
+    "users = session.get_users()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 6,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "'successfully uploaded'"
+      ]
+     },
+     "execution_count": 6,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "session.upload_projects_export(\n",
+    "    projects,\n",
+    "    modelpack=model_packs[1],\n",
+    "    cdb_search_filter=model_packs[1].concept_db,\n",
+    "    members=users,\n",
+    "    import_project_name_suffix='imported4',\n",
+    "    set_validated_docs=False\n",
+    ")"
    ]
   },
   {
@@ -553,9 +480,9 @@
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "Python [conda env:cattrainer]",
+   "display_name": "cattrainer",
    "language": "python",
-   "name": "conda-env-cattrainer-py"
+   "name": "python3"
   },
   "language_info": {
    "codemirror_mode": {
diff --git a/medcat-trainer/webapp/api/api/data_utils.py b/medcat-trainer/webapp/api/api/data_utils.py
index 7199bd0a..9ecde91e 100644
--- a/medcat-trainer/webapp/api/api/data_utils.py
+++ b/medcat-trainer/webapp/api/api/data_utils.py
@@ -66,8 +66,17 @@ def delete_orphan_docs(dataset: Dataset):
     Document.objects.filter(dataset__id=dataset.id).delete()
 
 
-def upload_projects_export(medcat_export: Dict, cdb_id: str, vocab_id: str, modelpack_id: str,
-                            project_name_suffix: str=' IMPORTED'):
+def upload_projects_export(
+    medcat_export: Dict,
+    cdb_id: str,
+    vocab_id: str,
+    modelpack_id: str,
+    project_name_suffix: str = ' IMPORTED',
+    cdb_search_filter_id: str = None,
+    members: List[str] = None,
+    import_project_name_suffix: str = ' IMPORTED',
+    set_validated_docs: bool = False
+):
     for proj in medcat_export['projects']:
         if len(proj['documents']) == 0:
             # don't add projects with no documents
@@ -119,6 +128,12 @@ def upload_projects_export(medcat_export: Dict, cdb_id: str, vocab_id: str, mode
         p.dataset = ds_mod
         p.save()
 
+        if cdb_search_filter_id is not None:
+            p.cdb_search_filter.set([ConceptDB.objects.get(id=cdb_search_filter_id)])
+
+        if members is not None:
+            p.members.set(members)
+
         # create django ORM model instances that are referenced in the upload if they don't exist.
         for u in unavailable_users:
             logger.warning(f'Username: {u} - not present in this trainer deployment.')
@@ -150,7 +165,11 @@ def upload_projects_export(medcat_export: Dict, cdb_id: str, vocab_id: str, mode
                 r.label = rel
                 r.save()
 
-        p.validated_documents.set(list(Document.objects.filter(dataset=ds_mod)))
+        if set_validated_docs:
+            p.validated_documents.set(list(Document.objects.filter(dataset=ds_mod)))
+        else:
+            p.validated_documents.clear()
+
 
         for doc in proj['documents']:
             doc_mod = Document.objects.filter(Q(dataset=ds_mod) & Q(text=doc['text'])).first()
diff --git a/medcat-trainer/webapp/api/api/migrations/0092_exportedproject_cdb_search_filter_id_and_more.py b/medcat-trainer/webapp/api/api/migrations/0092_exportedproject_cdb_search_filter_id_and_more.py
new file mode 100644
index 00000000..e1b4db2b
--- /dev/null
+++ b/medcat-trainer/webapp/api/api/migrations/0092_exportedproject_cdb_search_filter_id_and_more.py
@@ -0,0 +1,42 @@
+# Generated by Django 5.1.7 on 2025-10-07 00:03
+
+import django.core.validators
+import django.db.models.deletion
+from django.conf import settings
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+
+    dependencies = [
+        ('api', '0091_exportedproject_cdb_id_exportedproject_modelpack_id_and_more'),
+        migrations.swappable_dependency(settings.AUTH_USER_MODEL),
+    ]
+
+    operations = [
+        migrations.AddField(
+            model_name='exportedproject',
+            name='cdb_search_filter_id',
+            field=models.ForeignKey(blank=True, default=None, help_text='The CDB that will be used for concept lookup. This specific CDB should have been "imported" via the CDB admin screen', null=True, on_delete=django.db.models.deletion.SET_NULL, related_name='concept_source_exported_project', to='api.conceptdb'),
+        ),
+        migrations.AddField(
+            model_name='exportedproject',
+            name='import_project_name_suffix',
+            field=models.CharField(default=' IMPORTED', help_text='The suffix to be added to the project name', max_length=100),
+        ),
+        migrations.AddField(
+            model_name='exportedproject',
+            name='members',
+            field=models.ManyToManyField(blank=True, default=None, help_text='The annotators for the project', to=settings.AUTH_USER_MODEL),
+        ),
+        migrations.AddField(
+            model_name='exportedproject',
+            name='set_validated_docs',
+            field=models.BooleanField(default=False, help_text='Whether to set the validated documents, e.g. their annotation submit status.'),
+        ),
+        migrations.AlterField(
+            model_name='conceptdb',
+            name='name',
+            field=models.CharField(blank=True, default='', max_length=100, unique=True, validators=[django.core.validators.RegexValidator('^[a-zA-Z][a-zA-Z0-9_]*$', 'a-zA-Z for first character required. Alpahanumeric or _ thereafter are allowed for CDB names')]),
+        ),
+    ]
diff --git a/medcat-trainer/webapp/api/api/models.py b/medcat-trainer/webapp/api/api/models.py
index 00055bbe..d03aa8de 100644
--- a/medcat-trainer/webapp/api/api/models.py
+++ b/medcat-trainer/webapp/api/api/models.py
@@ -29,7 +29,7 @@
         ]
 
 
-cdb_name_validator = RegexValidator(r'^[a-zA-Z][a-zA-Z0-9_]*$', 'a-z for first character required. Alpahanumeric and _ thereafter are allowed for CDB names')
+cdb_name_validator = RegexValidator(r'^[a-zA-Z][a-zA-Z0-9_]*$', 'a-zA-Z for first character required. Alpahanumeric or _ thereafter are allowed for CDB names')
 
 logger = logging.getLogger(__name__)
 
@@ -110,9 +110,7 @@ def save(self, *args, **kwargs):
         except Exception as exc:
             raise MedCATLoadException(f'Failure loading MetaCAT models - {unpacked_model_pack_path}') from exc
 
-        # Only save if this is an update (not a new instance)
-        if not is_new:
-            super().save(*args, **kwargs)
+        super().save(*args, **kwargs)
 
     def __str__(self):
         return self.name
@@ -524,9 +522,17 @@ def __str__(self):
 
 class ExportedProject(models.Model):
     trainer_export_file = models.FileField(help_text='Previously exported MedCATtrainer .json file')
+    import_project_name_suffix = models.CharField(max_length=100, default=' IMPORTED', help_text='The suffix to be added to the project name')
+    members = models.ManyToManyField(settings.AUTH_USER_MODEL, blank=True, default=None, help_text='The annotators for the project')
     cdb_id = models.ForeignKey('ConceptDB', on_delete=models.SET_NULL, blank=True, null=True, default=None, help_text='The ConceptDB to be set for this exported project')
     vocab_id = models.ForeignKey('Vocabulary', on_delete=models.SET_NULL, blank=True, null=True, default=None, help_text='The Vocabulary to be set for this exported project')
     modelpack_id = models.ForeignKey('ModelPack', on_delete=models.SET_NULL, blank=True, null=True, default=None, help_text='The ModelPack to be set for this exported project')
+    cdb_search_filter_id = models.ForeignKey('ConceptDB', on_delete=models.SET_NULL, blank=True, null=True, default=None, help_text='The CDB that will be used for concept lookup. '
+                                                         'This specific CDB should have been "imported" '
+                                                         'via the CDB admin screen', related_name='concept_source_exported_project')
+    set_validated_docs = models.BooleanField(default=False, help_text='Whether to set the validated documents, e.g. their annotation submit status.')
+
+
 
     def __str__(self):
         return f'{self.trainer_export_file.name} - {self.cdb_id} - {self.vocab_id} - {self.modelpack_id}'
diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py
index 49282f05..7a06a01c 100644
--- a/medcat-trainer/webapp/api/api/views.py
+++ b/medcat-trainer/webapp/api/api/views.py
@@ -687,13 +687,24 @@ def upload_deployment(request):
     vocab_id = deployment_export.get('vocab_id', None)
     modelpack_id = deployment_export.get('modelpack_id', None)
     project_name_suffix = deployment_export.get('project_name_suffix', ' IMPORTED')
+    set_validated_docs = deployment_export.get('set_validated_docs', False)
+    cdb_search_filter_id = deployment_export.get('cdb_search_filter', None)
+    members = deployment_export.get('members', None)
+    import_project_name_suffix = deployment_export.get('import_project_name_suffix', ' IMPORTED')
 
     if all(x is None for x in [cdb_id, vocab_id, modelpack_id]):
         return Response("No cdb, vocab, or modelpack provided", 400)
 
     try:
-        upload_projects_export(deployment_upload, cdb_id, vocab_id, modelpack_id,
-                               project_name_suffix)
+        upload_projects_export(deployment_upload,
+                                cdb_id,
+                                vocab_id,
+                                modelpack_id,
+                                project_name_suffix,
+                                cdb_search_filter_id,
+                                members,
+                                import_project_name_suffix,
+                                set_validated_docs)
         return Response("successfully uploaded", 200)
     except Exception as e:
         return Response(f"Failed to upload projects export: {str(e)}", 500)

From 7f343258d5f5b65c5f25dc939539d9de0f537340 Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Tue, 7 Oct 2025 01:31:17 +0100
Subject: [PATCH 4/5] tests for client changes

---
 medcat-trainer/client/mctclient.py            |   8 +-
 medcat-trainer/client/tests/test_mctclient.py | 354 +++++++++++++++++-
 2 files changed, 353 insertions(+), 9 deletions(-)

diff --git a/medcat-trainer/client/mctclient.py b/medcat-trainer/client/mctclient.py
index 7a4285a5..59119419 100644
--- a/medcat-trainer/client/mctclient.py
+++ b/medcat-trainer/client/mctclient.py
@@ -626,14 +626,14 @@ def upload_projects_export(self, projects: Dict[str, Any],
             modelpack = [m for m in self.get_model_packs() if m.name == modelpack].pop()
         if isinstance(cdb_search_filter, str):
             cdb_search_filter = [c for c in self.get_concept_dbs() if c.name == cdb_search_filter].pop()
-        if isinstance(members, str):
-            members = [m for m in self.get_users() if m.username == members].pop()
+        if members and all(isinstance(m, str) for m in members):
+            members = [m for m in self.get_users() if m.username in members]
 
         payload = {
             'exported_projects': projects,
             'project_name_suffix': import_project_name_suffix,
-            'cdb_search_filter': cdb_search_filter.id,
-            'members': [m.id for m in members],
+            'cdb_search_filter': cdb_search_filter.id if cdb_search_filter else None,
+            'members': [m.id for m in members] if members else None,
             'import_project_name_suffix': import_project_name_suffix,
             'set_validated_docs': set_validated_docs,
         }
diff --git a/medcat-trainer/client/tests/test_mctclient.py b/medcat-trainer/client/tests/test_mctclient.py
index e3a9bc9b..455417ee 100644
--- a/medcat-trainer/client/tests/test_mctclient.py
+++ b/medcat-trainer/client/tests/test_mctclient.py
@@ -158,7 +158,11 @@ def post_side_effect(url, *args, **kwargs):
                 'exported_projects': projects,
                 'project_name_suffix': ' IMPORTED',
                 'cdb_id': '20',
-                'vocab_id': '30'
+                'vocab_id': '30',
+                'cdb_search_filter': None,
+                'members': None,
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
             }
         )
         self.assertEqual(result, mock_upload_response)
@@ -213,7 +217,11 @@ def post_side_effect(url, *args, **kwargs):
                 'exported_projects': projects,
                 'project_name_suffix': ' IMPORTED',
                 'cdb_id': '20',
-                'vocab_id': '30'
+                'vocab_id': '30',
+                'cdb_search_filter': None,
+                'members': None,
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
             }
         )
         self.assertEqual(result, mock_upload_response)
@@ -252,7 +260,11 @@ def post_side_effect(url, *args, **kwargs):
             json={
                 'exported_projects': projects,
                 'project_name_suffix': ' IMPORTED',
-                'modelpack_id': '40'
+                'modelpack_id': '40',
+                'cdb_search_filter': None,
+                'members': None,
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
             }
         )
         self.assertEqual(result, mock_upload_response)
@@ -266,7 +278,7 @@ def get_side_effect(url, *args, **kwargs):
             if url.endswith('/api/modelpacks/'):
                 return MagicMock(
                     status_code=200,
-                    text=json.dumps({"results": [{"id": "40", "name": "testModelPack", "model_pack": "model.zip"}]})
+                    text=json.dumps({"results": [{"id": "40", "name": "testModelPack", "model_pack": "model.zip", "concept_db": "20", "vocab": "30", "meta_cats": ["200"]}]})
                 )
             else:
                 return MagicMock(status_code=404, text='')
@@ -301,7 +313,11 @@ def post_side_effect(url, *args, **kwargs):
             json={
                 'exported_projects': projects,
                 'project_name_suffix': ' IMPORTED',
-                'modelpack_id': '40'
+                'modelpack_id': '40',
+                'cdb_search_filter': None,
+                'members': None,
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
             }
         )
         self.assertEqual(result, mock_upload_response)
@@ -353,5 +369,333 @@ def post_side_effect(url, *args, **kwargs):
 
         self.assertIn('Failed to upload projects export', str(context.exception))
 
+    @patch('mctclient.requests.post')
+    @patch('mctclient.requests.get')
+    def test_upload_projects_export_with_custom_suffix(self, mock_get, mock_post):
+        """Test upload_projects_export with custom import_project_name_suffix"""
+        # Mock authentication and upload responses
+        mock_upload_response = {"status": "success", "uploaded_projects": 1}
+
+        def post_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/api-token-auth/'):
+                return MagicMock(status_code=200, text='{"token": "abc"}')
+            elif url.endswith('/api/upload-deployment/'):
+                return MagicMock(
+                    status_code=200,
+                    json=lambda: mock_upload_response
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_post.side_effect = post_side_effect
+
+        session = MedCATTrainerSession(server='http://localhost', username='u', password='p')
+        cdb = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat')
+        vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat')
+        projects = [{"id": 1, "name": "Project 1"}]
+
+        result = session.upload_projects_export(
+            projects,
+            cdb=cdb,
+            vocab=vocab,
+            import_project_name_suffix=' - CUSTOM SUFFIX'
+        )
+
+        # Verify the API call was made correctly
+        mock_post.assert_called_with(
+            f'{session.server}/api/upload-deployment/',
+            headers=session.headers,
+            json={
+                'exported_projects': projects,
+                'project_name_suffix': ' - CUSTOM SUFFIX',
+                'cdb_id': '20',
+                'vocab_id': '30',
+                'cdb_search_filter': None,
+                'members': None,
+                'import_project_name_suffix': ' - CUSTOM SUFFIX',
+                'set_validated_docs': False
+            }
+        )
+        self.assertEqual(result, mock_upload_response)
+
+    @patch('mctclient.requests.post')
+    @patch('mctclient.requests.get')
+    def test_upload_projects_export_with_cdb_search_filter_object(self, mock_get, mock_post):
+        """Test upload_projects_export with cdb_search_filter as MCTConceptDB object"""
+        # Mock authentication and upload responses
+        mock_upload_response = {"status": "success", "uploaded_projects": 1}
+
+        def post_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/api-token-auth/'):
+                return MagicMock(status_code=200, text='{"token": "abc"}')
+            elif url.endswith('/api/upload-deployment/'):
+                return MagicMock(
+                    status_code=200,
+                    json=lambda: mock_upload_response
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_post.side_effect = post_side_effect
+
+        session = MedCATTrainerSession(server='http://localhost', username='u', password='p')
+        cdb = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat')
+        vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat')
+        cdb_search_filter = MCTConceptDB(id='25', name='searchFilterCDB', conceptdb_file='filter.dat')
+        projects = [{"id": 1, "name": "Project 1"}]
+
+        result = session.upload_projects_export(
+            projects,
+            cdb=cdb,
+            vocab=vocab,
+            cdb_search_filter=cdb_search_filter
+        )
+
+        # Verify the API call was made correctly
+        mock_post.assert_called_with(
+            f'{session.server}/api/upload-deployment/',
+            headers=session.headers,
+            json={
+                'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
+                'cdb_id': '20',
+                'vocab_id': '30',
+                'cdb_search_filter': '25',
+                'members': None,
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
+            }
+        )
+        self.assertEqual(result, mock_upload_response)
+
+    @patch('mctclient.requests.post')
+    @patch('mctclient.requests.get')
+    def test_upload_projects_export_with_cdb_search_filter_string(self, mock_get, mock_post):
+        """Test upload_projects_export with cdb_search_filter as string name"""
+        # Mock get_concept_dbs response
+        def get_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/concept-dbs/'):
+                return MagicMock(
+                    status_code=200,
+                    text=json.dumps({"results": [
+                        {"id": "20", "name": "testCDB", "cdb_file": "cdb.dat"},
+                        {"id": "25", "name": "searchFilterCDB", "cdb_file": "filter.dat"}
+                    ]})
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_get.side_effect = get_side_effect
+
+        # Mock authentication and upload responses
+        mock_upload_response = {"status": "success", "uploaded_projects": 1}
+
+        def post_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/api-token-auth/'):
+                return MagicMock(status_code=200, text='{"token": "abc"}')
+            elif url.endswith('/api/upload-deployment/'):
+                return MagicMock(
+                    status_code=200,
+                    json=lambda: mock_upload_response
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_post.side_effect = post_side_effect
+
+        session = MedCATTrainerSession(server='http://localhost', username='u', password='p')
+        cdb = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat')
+        vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat')
+        projects = [{"id": 1, "name": "Project 1"}]
+
+        result = session.upload_projects_export(
+            projects,
+            cdb=cdb,
+            vocab=vocab,
+            cdb_search_filter="searchFilterCDB"
+        )
+
+        # Verify the API call was made correctly
+        mock_post.assert_called_with(
+            f'{session.server}/api/upload-deployment/',
+            headers=session.headers,
+            json={
+                'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
+                'cdb_id': '20',
+                'vocab_id': '30',
+                'cdb_search_filter': '25',
+                'members': None,
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
+            }
+        )
+        self.assertEqual(result, mock_upload_response)
+
+    @patch('mctclient.requests.post')
+    @patch('mctclient.requests.get')
+    def test_upload_projects_export_with_members_objects(self, mock_get, mock_post):
+        """Test upload_projects_export with members as list of MCTUser objects"""
+        # Mock authentication and upload responses
+        mock_upload_response = {"status": "success", "uploaded_projects": 1}
+
+        def post_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/api-token-auth/'):
+                return MagicMock(status_code=200, text='{"token": "abc"}')
+            elif url.endswith('/api/upload-deployment/'):
+                return MagicMock(
+                    status_code=200,
+                    json=lambda: mock_upload_response
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_post.side_effect = post_side_effect
+
+        session = MedCATTrainerSession(server='http://localhost', username='u', password='p')
+        cdb = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat')
+        vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat')
+        members = [MCTUser(id='100', username='user1'), MCTUser(id='101', username='user2')]
+        projects = [{"id": 1, "name": "Project 1"}]
+
+        result = session.upload_projects_export(
+            projects,
+            cdb=cdb,
+            vocab=vocab,
+            members=members
+        )
+
+        # Verify the API call was made correctly
+        mock_post.assert_called_with(
+            f'{session.server}/api/upload-deployment/',
+            headers=session.headers,
+            json={
+                'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
+                'cdb_id': '20',
+                'vocab_id': '30',
+                'cdb_search_filter': None,
+                'members': ['100', '101'],
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
+            }
+        )
+        self.assertEqual(result, mock_upload_response)
+
+    @patch('mctclient.requests.post')
+    @patch('mctclient.requests.get')
+    def test_upload_projects_export_with_members_strings(self, mock_get, mock_post):
+        """Test upload_projects_export with members as list of string usernames"""
+        # Mock get_users response
+        def get_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/users/'):
+                return MagicMock(
+                    status_code=200,
+                    text=json.dumps({"results": [
+                        {"id": "100", "username": "user1"},
+                        {"id": "101", "username": "user2"}
+                    ]})
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_get.side_effect = get_side_effect
+
+        # Mock authentication and upload responses
+        mock_upload_response = {"status": "success", "uploaded_projects": 1}
+
+        def post_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/api-token-auth/'):
+                return MagicMock(status_code=200, text='{"token": "abc"}')
+            elif url.endswith('/api/upload-deployment/'):
+                return MagicMock(
+                    status_code=200,
+                    json=lambda: mock_upload_response
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_post.side_effect = post_side_effect
+
+        session = MedCATTrainerSession(server='http://localhost', username='u', password='p')
+        cdb = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat')
+        vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat')
+        projects = [{"id": 1, "name": "Project 1"}]
+
+        result = session.upload_projects_export(
+            projects,
+            cdb=cdb,
+            vocab=vocab,
+            members=["user1", "user2"]
+        )
+
+        # Verify the API call was made correctly
+        mock_post.assert_called_with(
+            f'{session.server}/api/upload-deployment/',
+            headers=session.headers,
+            json={
+                'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
+                'cdb_id': '20',
+                'vocab_id': '30',
+                'cdb_search_filter': None,
+                'members': ['100', '101'],
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
+            }
+        )
+        self.assertEqual(result, mock_upload_response)
+
+    @patch('mctclient.requests.post')
+    @patch('mctclient.requests.get')
+    def test_upload_projects_export_handles_none_parameters(self, mock_get, mock_post):
+        """Test upload_projects_export handles None values for optional parameters gracefully"""
+        # Mock authentication and upload responses
+        mock_upload_response = {"status": "success", "uploaded_projects": 1}
+
+        def post_side_effect(url, *args, **kwargs):
+            if url.endswith('/api/api-token-auth/'):
+                return MagicMock(status_code=200, text='{"token": "abc"}')
+            elif url.endswith('/api/upload-deployment/'):
+                return MagicMock(
+                    status_code=200,
+                    json=lambda: mock_upload_response
+                )
+            else:
+                return MagicMock(status_code=404, text='')
+
+        mock_post.side_effect = post_side_effect
+
+        session = MedCATTrainerSession(server='http://localhost', username='u', password='p')
+        cdb = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat')
+        vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat')
+        projects = [{"id": 1, "name": "Project 1"}]
+
+        # This test verifies that the implementation properly handles None values
+        result = session.upload_projects_export(
+            projects,
+            cdb=cdb,
+            vocab=vocab,
+            cdb_search_filter=None,  # This should be handled gracefully
+            members=None  # This should be handled gracefully
+        )
+
+        # Verify the API call was made correctly with None values
+        mock_post.assert_called_with(
+            f'{session.server}/api/upload-deployment/',
+            headers=session.headers,
+            json={
+                'exported_projects': projects,
+                'project_name_suffix': ' IMPORTED',
+                'cdb_id': '20',
+                'vocab_id': '30',
+                'cdb_search_filter': None,
+                'members': None,
+                'import_project_name_suffix': ' IMPORTED',
+                'set_validated_docs': False
+            }
+        )
+        self.assertEqual(result, mock_upload_response)
+
 if __name__ == '__main__':
     unittest.main()
\ No newline at end of file

From 75d8420453cd0da221db88482c66f3cee734d27d Mon Sep 17 00:00:00 2001
From: Tom Searle <tom@cogstack.org>
Date: Tue, 7 Oct 2025 10:36:40 +0100
Subject: [PATCH 5/5] fix codeQL leak

---
 medcat-trainer/webapp/api/api/views.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/medcat-trainer/webapp/api/api/views.py b/medcat-trainer/webapp/api/api/views.py
index 7a06a01c..feeec638 100644
--- a/medcat-trainer/webapp/api/api/views.py
+++ b/medcat-trainer/webapp/api/api/views.py
@@ -707,7 +707,8 @@ def upload_deployment(request):
                                 set_validated_docs)
         return Response("successfully uploaded", 200)
     except Exception as e:
-        return Response(f"Failed to upload projects export: {str(e)}", 500)
+        logger.error(f"Failed to upload projects export: {str(e)}", exc_info=e)
+        return Response(f"Failed to upload projects export: {e.message}", 500)
 
 
 @api_view(http_method_names=['GET', 'DELETE'])