diff --git a/medcat-trainer/client/mctclient.py b/medcat-trainer/client/mctclient.py index 2d4b0370b..134616e5b 100644 --- a/medcat-trainer/client/mctclient.py +++ b/medcat-trainer/client/mctclient.py @@ -1,4 +1,5 @@ from dataclasses import dataclass +from datetime import datetime import json import os from abc import ABC @@ -13,7 +14,7 @@ @dataclass class MCTObj(ABC): - id: str=None + id: Union[str, int]=None def valid(self): return self.id is not None @@ -35,6 +36,20 @@ def __str__(self): return f'{self.id} : {self.name} \t {self.dataset_file}' +@dataclass +class MCTDocument(MCTObj): + """A document in the MedCATTrainer instance. + Intentionally NOT including the text in here. + + Attributes: + name (str): The name of the document. + """ + name: str=None + + def __str__(self): + return f'{self.id} : {self.name}' + + @dataclass class MCTConceptDB(MCTObj): """A concept database in the MedCATTrainer instance. @@ -135,8 +150,14 @@ class MCTProject(MCTObj): Attributes: name (str): The name of the project. description (str): The description of the project. + create_time (datetime): The date and time the project was created. + last_modified (datetime): The date and time the project was last modified. cuis (str): The CUIs to be used in the project filter. dataset (MCTDataset): The dataset to be used in the project. + annotation_classification (bool): Whether the project is an annotation classification project. + project_locked (bool): Whether the project is locked. + project_status (str): The status of the project. Active, Discontinued (Fail) or Complete. + deid_model_annotation (bool): Whether the project is a de-identification model annotation project. concept_db (MCTConceptDB): The concept database to be used in the project. vocab (MCTVocab): The vocabulary to be used in the project. members (List[MCTUser]): The annotators for the project. @@ -145,8 +166,15 @@ class MCTProject(MCTObj): """ name: str=None description: str=None + create_time: Union[datetime, str]=None + last_modified: Union[datetime, str]=None cuis: str=None dataset: MCTDataset=None + validated_documents: List[MCTDocument]=None + annotation_classification: bool=None + project_locked: bool=None + project_status: str='A' + deid_model_annotation: bool=False concept_db: MCTConceptDB=None vocab: MCTVocab=None members: List[MCTUser]=None @@ -223,6 +251,8 @@ def create_project(self, name: str, dataset: Union[MCTDataset, str], cuis: List[str]=[], cuis_file: str=None, + deid_model_annotation: bool=False, + annotation_classification: bool=False, concept_db: Union[MCTConceptDB, str]=None, vocab: Union[MCTVocab, str]=None, cdb_search_filter: Union[MCTConceptDB, str]=None, @@ -244,7 +274,9 @@ def create_project(self, name: str, dataset (Union[MCTDataset, str]): The dataset to be used in the project. cuis (List[str]): The CUIs to be used in the project filter. cuis_file (str): The file containing the CUIs to be used in the project filter, will be appended to the cuis list. + annotation_classification (bool): Whether the project will contribute to a 'globally' fine-tuned model or not. Defaults to False. concept_db (Union[MCTConceptDB, str], optional): The concept database to be used in the project. Defaults to None. + deid_model_annotation (bool): Whether the project is a de-identification model annotation project. Defaults to False. vocab (Union[MCTVocab, str], optional): The vocabulary to be used in the project. Defaults to None. cdb_search_filter (Union[MCTConceptDB, str], optional): _description_. Defaults to None. modelpack (Union[MCTModelPack, str], optional): _description_. Defaults to None. @@ -319,7 +351,9 @@ def create_project(self, name: str, 'dataset': dataset.id, 'members': [m.id for m in members], 'tasks': [mt.id for mt in meta_tasks], - 'relations': [rt.id for rt in rel_tasks] + 'relations': [rt.id for rt in rel_tasks], + 'annotation_classification': annotation_classification, + 'deid_model_annotation': deid_model_annotation, } if concept_db and vocab: @@ -340,7 +374,9 @@ def create_project(self, name: str, resp_json = json.loads(resp.text) return MCTProject(id=resp_json['id'], name=name, description=description, cuis=cuis, dataset=dataset, concept_db=concept_db, vocab=vocab, members=members, - meta_tasks=meta_tasks, rel_tasks=rel_tasks) + meta_tasks=meta_tasks, rel_tasks=rel_tasks, + annotation_classification=annotation_classification, + deid_model_annotation=deid_model_annotation) else: raise MCTUtilsException(f'Failed to create project with name: {name}', resp.text) @@ -497,7 +533,14 @@ def get_projects(self) -> List[MCTProject]: """ resp = json.loads(requests.get(f'{self.server}/api/project-annotate-entities/', headers=self.headers).text)['results'] mct_projects = [MCTProject(id=p['id'], name=p['name'], description=p['description'], cuis=p['cuis'], + create_time=p['create_time'], + last_modified=p['last_modified'], + annotation_classification=p['annotation_classification'], + project_locked=p['project_locked'], + project_status=p['project_status'], + deid_model_annotation=p['deid_model_annotation'], dataset=MCTDataset(id=p['id']), + validated_documents=[MCTDocument(id=d) for d in p['validated_documents']], concept_db=MCTConceptDB(id=p['concept_db']), vocab=MCTVocab(id=p['vocab']), members=[MCTUser(id=u) for u in p['members']], diff --git a/medcat-trainer/client/tests/test_mctclient.py b/medcat-trainer/client/tests/test_mctclient.py index c06b8ae64..5d490c487 100644 --- a/medcat-trainer/client/tests/test_mctclient.py +++ b/medcat-trainer/client/tests/test_mctclient.py @@ -18,6 +18,13 @@ def test_session_get_projects(self, mock_get, mock_post): "name": "Test Project", "description": "A test project", "cuis": "C001,C002", + "create_time": "2021-01-01T00:00:00Z", + "last_modified": "2021-01-01T00:00:00Z", + "annotation_classification": False, + "project_locked": False, + "project_status": "Active", + "deid_model_annotation": False, + "validated_documents": [1000, 1010], "dataset": 10, "concept_db": 20, "vocab": 30,