From 6de343f2c531c8fff4e6337df81e4cf306948191 Mon Sep 17 00:00:00 2001 From: Tom Searle Date: Mon, 18 Aug 2025 13:42:29 +0100 Subject: [PATCH] chore(medcat-trainer-v1): CU-869a5wkcx: remove client from v1 code --- .github/workflows/medcat-trainer-v1_ci.yml | 35 -- .github/workflows/medcat-trainer-v1_qa.yml | 41 -- .../workflows/medcat-trainer-v1_release.yml | 49 +- v1/medcat-trainer/client/README.md | 88 --- v1/medcat-trainer/client/__init__.py | 0 v1/medcat-trainer/client/mctclient.py | 547 ------------------ v1/medcat-trainer/client/pyproject.toml | 18 - .../client/tests/test_mctclient.py | 119 ---- v1/medcat-trainer/docs/client.md | 88 --- .../notebook_docs/Client_API_Tutorials.ipynb | 485 ---------------- 10 files changed, 1 insertion(+), 1469 deletions(-) delete mode 100644 v1/medcat-trainer/client/README.md delete mode 100644 v1/medcat-trainer/client/__init__.py delete mode 100644 v1/medcat-trainer/client/mctclient.py delete mode 100644 v1/medcat-trainer/client/pyproject.toml delete mode 100644 v1/medcat-trainer/client/tests/test_mctclient.py delete mode 100644 v1/medcat-trainer/docs/client.md delete mode 100644 v1/medcat-trainer/notebook_docs/Client_API_Tutorials.ipynb diff --git a/.github/workflows/medcat-trainer-v1_ci.yml b/.github/workflows/medcat-trainer-v1_ci.yml index 071d2e6b0..2f170a218 100644 --- a/.github/workflows/medcat-trainer-v1_ci.yml +++ b/.github/workflows/medcat-trainer-v1_ci.yml @@ -11,44 +11,9 @@ defaults: working-directory: ./v1/medcat-trainer jobs: - # Test and build client library - test-client: - runs-on: ubuntu-latest - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: ${{ github.ref }} - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install requests pytest build - - - name: Install client package in development mode - run: | - cd client - pip install -e . - - - name: Run client tests - run: | - cd client - python -m pytest tests/ -v - - - name: Build client package - run: | - cd client - python -m build - # Build and test webapp container build-and-push: runs-on: ubuntu-latest - needs: test-client steps: - name: Checkout main uses: actions/checkout@v4 diff --git a/.github/workflows/medcat-trainer-v1_qa.yml b/.github/workflows/medcat-trainer-v1_qa.yml index ebcdc95b7..1b720c398 100644 --- a/.github/workflows/medcat-trainer-v1_qa.yml +++ b/.github/workflows/medcat-trainer-v1_qa.yml @@ -9,47 +9,6 @@ defaults: working-directory: ./v1/medcat-trainer jobs: - # Test and build client library - test-client: - runs-on: ubuntu-latest - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: 'main' - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install requests pytest build - - - name: Install client package in development mode - run: | - cd client - pip install -e . - - - name: Run client tests - run: | - cd client - python -m pytest tests/ -v - - - name: Build client package - run: | - cd client - python -m build - - # - name: Publish dev distribution to Test PyPI - # uses: pypa/gh-action-pypi-publish@v1.4.2 - # with: - # password: ${{ secrets.MEDCAT_TRAINER_TEST_PYPI_API_TOKEN }} - # repository_url: https://test.pypi.org/legacy/ - # packages_dir: v1/medcat-trainer/client/dist - # Build and test webapp container build-and-push: runs-on: ubuntu-latest diff --git a/.github/workflows/medcat-trainer-v1_release.yml b/.github/workflows/medcat-trainer-v1_release.yml index d98f807d2..2bbcf42d9 100644 --- a/.github/workflows/medcat-trainer-v1_release.yml +++ b/.github/workflows/medcat-trainer-v1_release.yml @@ -3,63 +3,16 @@ name: medcat-trainer-v1 release-build on: push: tags: - - 'medcat-trainer/v1.*.*' + - 'medcat-trainer/v1.*.*' defaults: run: working-directory: ./v1/medcat-trainer jobs: - # Test, build and publish client library - test-and-publish-client: - runs-on: ubuntu-latest - steps: - - name: Checkout main - uses: actions/checkout@v4 - with: - ref: "main" - - - name: Release Tag - # If GITHUB_REF=refs/tags/medcat-trainer/v0.1.2, this returns v0.1.2. Note it's including the "v" though it probably shouldnt - run: echo "RELEASE_VERSION=${GITHUB_REF##refs/*/}" >> $GITHUB_ENV - - - name: Set up Python - uses: actions/setup-python@v4 - with: - python-version: '3.10' - - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install requests pytest build twine - - - name: Install client package in development mode - run: | - cd client - pip install -e . - - - name: Run client tests - run: | - cd client - python -m pytest tests/ -v - - - name: Build client package - run: | - cd client - python -m build - - - name: Publish production distribution to PyPI - if: startsWith(github.ref, 'refs/tags') && ! github.event.release.prerelease - uses: pypa/gh-action-pypi-publish@v1.4.2 - with: - # TODO CU-869a25n7e Use Trusted Platform Publisher based PyPI release - password: ${{ secrets.PYPI_API_TOKEN }} - packages_dir: v1/medcat-trainer/client/dist - # Build and test webapp container build-and-push: runs-on: ubuntu-latest - needs: test-and-publish-client steps: - name: Checkout main uses: actions/checkout@v4 diff --git a/v1/medcat-trainer/client/README.md b/v1/medcat-trainer/client/README.md deleted file mode 100644 index d5d131325..000000000 --- a/v1/medcat-trainer/client/README.md +++ /dev/null @@ -1,88 +0,0 @@ - ---- - -# MedCATtrainer Client - -A Python client for interacting with a MedCATTrainer web application instance. This package allows you to manage datasets, concept databases, vocabularies, model packs, users, projects, and more via Python code or the command line. - -## Features - -- Manage datasets, concept databases, vocabularies, and model packs -- Create and manage users and projects -- Retrieve and upload project annotations -- Command-line interface (CLI) for automation - -## Installation - -```sh -pip install mctclient -``` - -Or, if installing from source: - -```sh -cd client -python -m build -pip install dist/*.whl -``` - -## Python Usage - -```sh -export MCTRAINER_USERNAME= -export MCTRAINER_PASSWORD= -``` - -```python -from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject - -# Connect to your MedCATTrainer instance -session = MedCATTrainerSession(server="http://localhost:8001") - -# List all projects -projects = session.get_projects() -for project in projects: - print(project) - -# Create a new dataset -dataset = session.create_dataset(name="My Dataset", dataset_file="path/to/data.csv") - -# Create a new user -user = session.create_user(username="newuser", password="password123") - -# Create a new project -project = session.create_project( - name="My Project", - description="A new annotation project", - members=[user], - dataset=dataset -) -``` - -### MedCATTrainerSession Methods - -- `create_project(name, description, members, dataset, cuis=[], cuis_file=None, concept_db=None, vocab=None, cdb_search_filter=None, modelpack=None, meta_tasks=[], rel_tasks=[])` -- `create_dataset(name, dataset_file)` -- `create_user(username, password)` -- `create_medcat_model(cdb, vocab)` -- `create_medcat_model_pack(model_pack)` -- `get_users()` -- `get_models()` -- `get_model_packs()` -- `get_meta_tasks()` -- `get_rel_tasks()` -- `get_projects()` -- `get_datasets()` -- `get_project_annos(projects)` - -Each method returns the corresponding object or a list of objects. - -## License - -This project is licensed under the Apache 2.0 License. - -## Contributing - -Pull requests are welcome! For major changes, please open an issue first to discuss what you would like to change. - - diff --git a/v1/medcat-trainer/client/__init__.py b/v1/medcat-trainer/client/__init__.py deleted file mode 100644 index e69de29bb..000000000 diff --git a/v1/medcat-trainer/client/mctclient.py b/v1/medcat-trainer/client/mctclient.py deleted file mode 100644 index 2d4b0370b..000000000 --- a/v1/medcat-trainer/client/mctclient.py +++ /dev/null @@ -1,547 +0,0 @@ -from dataclasses import dataclass -import json -import os -from abc import ABC -from typing import List, Tuple, Union - -import requests - -import logging - -logger = logging.getLogger(__name__) - - -@dataclass -class MCTObj(ABC): - id: str=None - - def valid(self): - return self.id is not None - - -@dataclass -class MCTDataset(MCTObj): - """A dataset in the MedCATTrainer instance. - - Attributes: - name (str): The name of the dataset. - dataset_file (str): The path to the dataset file, can be a csv, or excel file, with at - least 2 columns: 'name': unique identifier for each text, and 'text': the text to be annotated. - """ - name: str=None - dataset_file: str=None - - def __str__(self): - return f'{self.id} : {self.name} \t {self.dataset_file}' - - -@dataclass -class MCTConceptDB(MCTObj): - """A concept database in the MedCATTrainer instance. - - Attributes: - name (str): The name of the concept database. Name must start with a lowercase letter and contain only alphanumeric characters and underscores. - conceptdb_file (str): The path to the concept database file, should be a .dat file. - use_for_training (bool): Whether to use the concept database for training. Defaults to True as most uploaded CDBs will be used for training, unless they are used for the concept search lookup. - """ - name: str=None - conceptdb_file: str=None - use_for_training: bool=True - - def __post_init__(self): - if self.name is not None: - if not self.name[0].islower(): - raise ValueError("Name must start with a lowercase letter") - if not self.name.replace('_', '').replace('-', '').isalnum(): - raise ValueError("Name must contain only alphanumeric characters and underscores") - - def __str__(self): - return f'{getattr(self, "id", "N/A")} : {self.name} \t {self.conceptdb_file}' - - -@dataclass -class MCTVocab(MCTObj): - """A vocabulary in the MedCATTrainer instance. - - Attributes: - name (str): The name of the vocabulary. - vocab_file (str): The path to the vocabulary file, should be a .dat file. - """ - name: str=None - vocab_file: str=None - - def __str__(self): - return f'{self.id} : {self.vocab_file}' - - -@dataclass -class MCTModelPack(MCTObj): - """A model pack in the MedCATTrainer instance. - - Attributes: - name (str): The name of the model pack. - model_pack_zip (str): The path to the model pack zip file, should be a .zip file. - """ - name: str=None - model_pack_zip: str=None - - def __str__(self): - return f'{self.id} : {self.name} \t {self. model_pack_zip}' - - -@dataclass -class MCTMetaTask(MCTObj): - """A meta task in the MedCATTrainer instance. - - Attributes: - name (str): The name of the meta task. - """ - name: str=None - - def __str__(self): - return f'{self.id} : {self.name}' - - -@dataclass -class MCTRelTask(MCTObj): - """A relation extraction task in the MedCATTrainer instance. - - Attributes: - name (str): The name of the relation extraction task. - """ - name: str=None - - def __str__(self): - return f'{self.id} : {self.name}' - - -@dataclass -class MCTUser(MCTObj): - """A user in the MedCATTrainer instance. - - Attributes: - username (str): The username of the user. - """ - username: str=None - - def __str__(self): - return f'{self.id} : {self.username}' - - -@dataclass -class MCTProject(MCTObj): - """A project in the MedCATTrainer instance. - - Attributes: - name (str): The name of the project. - description (str): The description of the project. - cuis (str): The CUIs to be used in the project filter. - dataset (MCTDataset): The dataset to be used in the project. - concept_db (MCTConceptDB): The concept database to be used in the project. - vocab (MCTVocab): The vocabulary to be used in the project. - members (List[MCTUser]): The annotators for the project. - meta_tasks (List[MCTMetaTask]): The meta tasks for the project. - rel_tasks (List[MCTRelTask]): The relation extraction tasks for the project. - """ - name: str=None - description: str=None - cuis: str=None - dataset: MCTDataset=None - concept_db: MCTConceptDB=None - vocab: MCTVocab=None - members: List[MCTUser]=None - meta_tasks: List[MCTMetaTask]=None - rel_tasks: List[MCTRelTask]=None - - def __str__(self): - return f'{self.id} : {self.name} \t {self.description} \t {self.dataset}' - - - -class MedCATTrainerSession: - """Wrapper for the MedCATTrainer API. - This class provides a wrapper around the MedCATTrainer API, allowing for easy creation of projects, datasets, users, and models. - - Attributes: - server (str): The server to connect to can also be set by an ENVVAR MCTRAINER_SERVER. Defaults to http://localhost:8001. - username (str): The username to connect to can also be set by an ENVVAR MCTRAINER_USERNAME. - password (str): The password to connect to can also be set by an ENVVAR MCTRAINER_PASSWORD. - - Example: - Create a project with a concept database, vocabulary, dataset, and user. - - >>> session = MedCATTrainerSession() - >>> ds = session.create_dataset(name='Test DS', dataset_file='.csv') - >>> cdb_file = '/cdb.dat' - >>> vocab_file = '/vocab.dat' - >>> model_pack_zip = '.zip' - >>> # Create a concept database and vocabulary in the MCTrainer instance. This is the NER+L model only. - >>> cdb, vocab = session.create_medcat_model(MCTConceptDB(name='test_cdb', conceptdb_file=cdb_file), - MCTVocab(name='test_vocab', vocab_file=vocab_file)) - >>> # OR Create a model pack in the MCTrainer instance, NER+L, plus any MetaCAT or RelCAT models packaged together. - >>> session.create_medcat_model_pack(MCTModelPack(name='test_model_pack', model_pack_zip=model_pack_zip)) - >>> session.create_project(name='test-project', description='test-description', members=[MCTUser(username='test-user')], dataset=ds, concept_db=cdb, vocab=vocab) - - A common interaction would be to create a project with a new dataset but existing concept database and vocabulary or Modelpack. - >>> projects = session.get_projects() - >>> ds = session.create_dataset(name='New Test DS', dataset_file='/Users/tom/phd/MedCATtrainer/notebook_docs/example_data/cardio.csv') - >>> # MCTObjects can be referenced by name or by the wrapper object. - >>> session.create_project(name='test-project', description='test-description', members=[MCTUser(username='test-user')], dataset=ds, - concept_db=MCTConceptDB(name='test_cdb'), vocab=MCTVocab(name='test_vocab')) - - To download annotations for a project: - >>> projects = session.get_projects() - >>> annotations = session.get_project_annos(projects[0]) - """ - - def __init__(self, server=None, username=None, password=None): - """Initialize the MedCATTrainerSession. - - Args: - server (_type_, optional): _description_. Defaults to None. - - Raises: - MCTUtilsException: _description_ - """ - self.username = username or os.getenv("MCTRAINER_USERNAME") - self.password = password or os.getenv("MCTRAINER_PASSWORD") - self.server = server or 'http://localhost:8001' - - payload = {"username": self.username, "password": self.password} - resp = requests.post(f"{self.server}/api/api-token-auth/", json=payload) - if 200 <= resp.status_code < 300: - token = json.loads(resp.text)["token"] - self.headers = { - 'Authorization': f'Token {token}', - } - else: - raise MCTUtilsException(f'Failed to login to MedCATtrainer instance running at: {self.server}') - - def create_project(self, name: str, - description: str, - members: Union[List[MCTUser], List[str]], - dataset: Union[MCTDataset, str], - cuis: List[str]=[], - cuis_file: str=None, - concept_db: Union[MCTConceptDB, str]=None, - vocab: Union[MCTVocab, str]=None, - cdb_search_filter: Union[MCTConceptDB, str]=None, - modelpack: Union[MCTModelPack, str]=None, - meta_tasks: Union[List[MCTMetaTask], List[str]]=[], - rel_tasks: Union[List[MCTRelTask], List[str]]=[]): - """Create a new project in the MedCATTrainer session. - Users, models, datasets etc. can be referred to by either their client wrapper object or their name, and the ID will be retrieved - then used to create the project. Most names have a unique constraint on them so for the majority of cases will not results in an error. - - Only a concept_db and vocab pair, or a modelpack needs to be specified. - - Setting a modelpack will also eventually automatically select meta tasks and rel tasks. - - Args: - name (str): The name of the project. - description (str): The description of the project. - members (Union[List[MCTUser], List[str]]): The annotators for the project. - dataset (Union[MCTDataset, str]): The dataset to be used in the project. - cuis (List[str]): The CUIs to be used in the project filter. - cuis_file (str): The file containing the CUIs to be used in the project filter, will be appended to the cuis list. - concept_db (Union[MCTConceptDB, str], optional): The concept database to be used in the project. Defaults to None. - vocab (Union[MCTVocab, str], optional): The vocabulary to be used in the project. Defaults to None. - cdb_search_filter (Union[MCTConceptDB, str], optional): _description_. Defaults to None. - modelpack (Union[MCTModelPack, str], optional): _description_. Defaults to None. - meta_tasks (Union[List[MCTMetaTask], List[str]], optional): _description_. Defaults to None. - rel_tasks (Union[List[MCTRelTask], List[str]], optional): _description_. Defaults to None. - - Raises: - MCTUtilsException: If the project creation fails - - Returns: - MCTProject: The created project - """ - - if all(isinstance(m, str) for m in members): - mct_members = [u for u in self.get_users() if u.username in members] - if len(mct_members) != len(members): - raise MCTUtilsException(f'Not all users found in MedCATTrainer instance: {members} requested, trainer members found: {mct_members}') - else: - members = mct_members - - if isinstance(dataset, str): - try: - dataset = [d for d in self.get_datasets() if d.name == dataset].pop() - except IndexError: - raise MCTUtilsException(f'Dataset not found in MedCATTrainer instance: {dataset}') - - if isinstance(concept_db, str): - try: - concept_db = [c for c in self.get_models()[0] if c.name == concept_db].pop() - except IndexError: - raise MCTUtilsException(f'Concept DB not found in MedCATTrainer instance: {concept_db}') - - if isinstance(vocab, str): - try: - vocab = [v for v in self.get_models()[1] if v.name == vocab].pop() - except IndexError: - raise MCTUtilsException(f'Vocab not found in MedCATTrainer instance: {vocab}') - - if isinstance(cdb_search_filter, str): - try: - cdb_search_filter = [c for c in self.get_concept_dbs() if c.name == cdb_search_filter].pop() - except IndexError: - raise MCTUtilsException(f'Concept DB not found in MedCATTrainer instance: {cdb_search_filter}') - - if isinstance(modelpack, str): - try: - modelpack = [m for m in self.get_model_packs() if m.name == modelpack].pop() - except IndexError: - raise MCTUtilsException(f'Model pack not found in MedCATTrainer instance: {modelpack}') - - if all(isinstance(m, str) for m in meta_tasks): - mct_meta_tasks = [m for m in self.get_meta_tasks() if m.name in meta_tasks] - if len(mct_meta_tasks) != len(meta_tasks): - raise MCTUtilsException(f'Not all meta tasks found in MedCATTrainer instance: {meta_tasks} requested, trainer meta tasks found: {mct_meta_tasks}') - else: - meta_tasks = mct_meta_tasks - - if all(isinstance(r, str) for r in rel_tasks): - mct_rel_tasks = [r for r in self.get_rel_tasks() if r.name in rel_tasks] - if len(mct_rel_tasks) != len(rel_tasks): - raise MCTUtilsException(f'Not all rel tasks found in MedCATTrainer instance: {rel_tasks} requested, trainer rel tasks found: {mct_rel_tasks}') - else: - rel_tasks = mct_rel_tasks - - if (concept_db or vocab) and modelpack: - raise MCTUtilsException('Cannot specify both concept_db/vocab and modelpack') - - payload = { - 'name': name, - 'description': description, - 'cuis': ','.join(cuis), - 'dataset': dataset.id, - 'members': [m.id for m in members], - 'tasks': [mt.id for mt in meta_tasks], - 'relations': [rt.id for rt in rel_tasks] - } - - if concept_db and vocab: - payload['concept_db'] = concept_db.id - payload['vocab'] = vocab.id - elif modelpack: - payload['model_pack'] = modelpack.id - - if cdb_search_filter: - payload['cdb_search_filter'] = [cdb_search_filter.id] - - if cuis_file: - with open(cuis_file, 'rb') as f: - resp = requests.post(f'{self.server}/api/project-annotate-entities/', data=payload, files={'cuis_file': f}, headers=self.headers) - else: - resp = requests.post(f'{self.server}/api/project-annotate-entities/', data=payload, headers=self.headers) - if 200 <= resp.status_code < 300: - resp_json = json.loads(resp.text) - return MCTProject(id=resp_json['id'], name=name, description=description, cuis=cuis, - dataset=dataset, concept_db=concept_db, vocab=vocab, members=members, - meta_tasks=meta_tasks, rel_tasks=rel_tasks) - else: - raise MCTUtilsException(f'Failed to create project with name: {name}', resp.text) - - def create_dataset(self, name: str, dataset_file: str): - """Create a new dataset in the MedCATTrainer session. - - Args: - name (str): The name of the dataset. - dataset_file (str): The path to the dataset file. - - Raises: - MCTUtilsException: If the dataset creation fails - - Returns: - MCTDataset: The created dataset - """ - resp = requests.post(f'{self.server}/api/datasets/', headers=self.headers, - data={'name': name}, - files={'original_file': open(dataset_file, 'rb')}) - if 200 <= resp.status_code < 300: - resp_json = json.loads(resp.text) - return MCTDataset(name=name, id=resp_json['id']) - else: - raise MCTUtilsException(f'Failed to create dataset with name: {name}', resp.text) - - def create_user(self, username: str, password): - """Create a new user in the MedCATTrainer session. - - Args: - username (str): The username of the new user. - password (str): The password of the new user. - - Raises: - MCTUtilsException: If the user creation fails - - Returns: - MCTUser: The created user - """ - payload = { - 'username': username, - 'password': password - } - resp = requests.post(f'{self.server}/api/users/', json=payload, headers=self.headers) - if 200 <= resp.status_code < 300: - resp_json = json.loads(resp.text) - return MCTUser(username=username, id=resp_json['id']) - else: - raise MCTUtilsException(f'Failed to create new user with username: {username}', resp.text) - - def create_medcat_model(self, cdb:MCTConceptDB, vocab: MCTVocab): - """Create a new MedCAT cdb and vocab model in the MedCATTrainer session. - - Args: - cdb (MCTConceptDB): The concept database to be created. - vocab (MCTVocab): The vocabulary to be created. - - Raises: - MCTUtilsException: If the model creation fails - """ - resp = requests.post(f'{self.server}/api/concept-dbs/', headers=self.headers, - data={'name': cdb.name, 'use_for_training': cdb.use_for_training}, - files={'cdb_file': open(cdb.conceptdb_file, 'rb')}) - if 200 <= resp.status_code < 300: - resp_json = json.loads(resp.text) - cdb.id = resp_json['id'] - else: - raise MCTUtilsException(f'Failed uploading MedCAT cdb model: {cdb}', resp.text) - - resp = requests.post(f'{self.server}/api/vocabs/', headers=self.headers, - data={'name': vocab.name}, - files={'vocab_file': open(vocab.vocab_file, 'rb')}) - if 200 <= resp.status_code < 300: - resp_json = json.loads(resp.text) - vocab.id = resp_json['id'] - else: - raise MCTUtilsException(f'Failed uploading MedCAT vocab model: {vocab}', resp.text) - - return cdb, vocab - - def create_medcat_model_pack(self, model_pack: MCTModelPack): - """Create a new MedCAT model pack in the MedCATTrainer session. - - Args: - model_pack (MCTModelPack): The model pack to be created. - - Raises: - MCTUtilsException: If the model pack creation fails - """ - resp = requests.post(f'{self.server}/api/modelpacks/', headers=self.headers, - data={'name': model_pack.name}, - files={'model_pack': open(model_pack.model_pack_zip, 'rb')}) - if 200 <= resp.status_code < 300: - resp_json = json.loads(resp.text) - model_pack.id = resp_json['id'] - else: - raise MCTUtilsException(f'Failed uploading model pack: {model_pack.model_pack_zip}', resp.text) - - def get_users(self) -> List[MCTUser]: - """Get all users in the MedCATTrainer instance. - - Returns: - List[MCTUser]: A list of all users in the MedCATTrainer instance - """ - users = json.loads(requests.get(f'{self.server}/api/users/', headers=self.headers).text)['results'] - return [MCTUser(id=u['id'], username=u['username']) for u in users] - - def get_models(self) -> Tuple[List[str], List[str]]: - """Get all MedCAT cdb and vocab models in the MedCATTrainer instance. - - Returns: - Tuple[List[MCTConceptDB], List[MCTVocab]]: A tuple of lists of all MedCAT cdb and vocab models in the MedCATTrainer instance - """ - cdbs = json.loads(requests.get(f'{self.server}/api/concept-dbs/', headers=self.headers).text)['results'] - vocabs = json.loads(requests.get(f'{self.server}/api/vocabs/', headers=self.headers).text)['results'] - mct_cdbs = [MCTConceptDB(id=cdb['id'], name=cdb['name'], conceptdb_file=cdb['cdb_file']) for cdb in cdbs] - mct_vocabs = [MCTVocab(id=v['id'], name=v['name'], vocab_file=v['vocab_file']) for v in vocabs] - return mct_cdbs, mct_vocabs - - def get_model_packs(self) -> List[MCTModelPack]: - """Get all MedCAT model packs in the MedCATTrainer instance. - - Returns: - List[MCTModelPack]: A list of all MedCAT model packs in the MedCATTrainer instance - """ - resp = json.loads(requests.get(f'{self.server}/api/modelpacks/', headers=self.headers).text)['results'] - mct_model_packs = [MCTModelPack(id=mp['id'], name=mp['name'], model_pack_zip=mp['model_pack']) for mp in resp] - return mct_model_packs - - def get_meta_tasks(self) -> List[MCTMetaTask]: - """Get all MedCAT meta tasks that have been created in the MedCATTrainer instance. - - Returns: - List[MCTMetaTask]: A list of all MedCAT meta tasks in the MedCATTrainer instance - """ - resp = json.loads(requests.get(f'{self.server}/api/meta-tasks/', headers=self.headers).text)['results'] - mct_meta_tasks = [MCTMetaTask(name=mt['name'], id=mt['id']) for mt in resp] - return mct_meta_tasks - - def get_rel_tasks(self) -> List[MCTRelTask]: - """Get all MedCAT relation tasks that have been created in the MedCATTrainer instance. - - Returns: - List[MCTRelTask]: A list of all MedCAT relation tasks in the MedCATTrainer instance - """ - resp = json.loads(requests.get(f'{self.server}/api/relations/', headers=self.headers).text)['results'] - mct_rel_tasks = [MCTRelTask(name=rt['label'], id=rt['id']) for rt in resp] - return mct_rel_tasks - - def get_projects(self) -> List[MCTProject]: - """Get all MedCAT annotation projects that have been created in the MedCATTrainer instance. - - Returns: - List[MCTProject]: A list of all MedCAT annotation projects in the MedCATTrainer instance - """ - resp = json.loads(requests.get(f'{self.server}/api/project-annotate-entities/', headers=self.headers).text)['results'] - mct_projects = [MCTProject(id=p['id'], name=p['name'], description=p['description'], cuis=p['cuis'], - dataset=MCTDataset(id=p['id']), - concept_db=MCTConceptDB(id=p['concept_db']), - vocab=MCTVocab(id=p['vocab']), - members=[MCTUser(id=u) for u in p['members']], - meta_tasks=[MCTMetaTask(id=mt) for mt in p['tasks']], - rel_tasks=[MCTRelTask(id=rt) for rt in p['relations']]) for p in resp] - return mct_projects - - def get_datasets(self) -> List[MCTDataset]: - """Get all datasets that have been created in the MedCATTrainer instance. - - Returns: - List[MCTDataset]: A list of all datasets in the MedCATTrainer instance - """ - resp = json.loads(requests.get(f'{self.server}/api/datasets/', headers=self.headers).text)['results'] - mct_datasets = [MCTDataset(name=d['name'], dataset_file=d['original_file'], id=d['id']) for d in resp] - return mct_datasets - - def get_project_annos(self, projects: List[MCTProject]): - """Get the annotations for a list of projects. Schema is documented here: https://github.com/medcat/MedCATtrainer/blob/main/docs/api.md#download-annotations - - Args: - projects (List[MCTProject]): A list of projects to get annotations for - - Returns: - List[MCTProject]: A list of all projects with annotations - """ - if any(p.id is None for p in projects): - raise MCTUtilsException('One or more project.id are None and all are required to download annotations') - - resp = json.loads(requests.get(f'{self.server}/api/download-annos/?project_ids={",".join([str(p.id) for p in projects])}&with_text=1', - headers=self.headers).text) - return resp - - def __str__(self) -> str: - return f'{self.server} \t {self.username} \t {self.password}' - - -class MCTUtilsException(Exception): - """Base exception for MedCAT Trainer API errors""" - def __init__(self, message, original_exception=None): - self.message = message - self.original_exception = original_exception - super().__init__(self.message) - - def __str__(self): - return f'{self.message} \n {self.original_exception}' - diff --git a/v1/medcat-trainer/client/pyproject.toml b/v1/medcat-trainer/client/pyproject.toml deleted file mode 100644 index 05e562f9e..000000000 --- a/v1/medcat-trainer/client/pyproject.toml +++ /dev/null @@ -1,18 +0,0 @@ -[build-system] -requires = ["setuptools>=61.0", "wheel"] -build-backend = "setuptools.build_meta" - -[project] -name = "medcattrainer-client" -version = "1.0.0" -description = "Python client for interacting with a MedCATTrainer instance" -readme = "client/README.md" -requires-python = ">=3.10" -license = { file = "LICENSE" } -authors = [{ name = "Tom Searle", email = "tom@cogstack.org" }] -dependencies = ["requests"] - -[project.urls] -Homepage = "https://github.com/CogStack/cogstack-nlp/" -Documentation = "https://docs.cogstack.org/projects/medcat-trainer" -Source = "https://github.com/CogStack/cogstack-nlp/" diff --git a/v1/medcat-trainer/client/tests/test_mctclient.py b/v1/medcat-trainer/client/tests/test_mctclient.py deleted file mode 100644 index c06b8ae64..000000000 --- a/v1/medcat-trainer/client/tests/test_mctclient.py +++ /dev/null @@ -1,119 +0,0 @@ -import json -import unittest -from unittest.mock import patch, MagicMock -from mctclient import ( - MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject -) - -class TestMCTClient(unittest.TestCase): - - @patch('mctclient.requests.post') - @patch('mctclient.requests.get') - def test_session_get_projects(self, mock_get, mock_post): - # Mock authentication - mock_post.return_value = MagicMock(status_code=200, text='{"token": "abc"}') - # Mock get_projects with a real project structure - mock_project = { - "id": 1, - "name": "Test Project", - "description": "A test project", - "cuis": "C001,C002", - "dataset": 10, - "concept_db": 20, - "vocab": 30, - "members": [100, 101], - "tasks": [200], - "relations": [300] - } - mock_get.return_value = MagicMock( - status_code=200, - text=json.dumps({"results": [mock_project]}) - ) - session = MedCATTrainerSession(server='http://localhost', username='u', password='p') - projects = session.get_projects() - self.assertIsInstance(projects, list) - self.assertEqual(len(projects), 1) - project = projects[0] - self.assertIsInstance(project, MCTProject) - self.assertEqual(project.name, "Test Project") - self.assertEqual(project.description, "A test project") - self.assertEqual(project.cuis, "C001,C002") - self.assertIsInstance(project.dataset, MCTDataset) - self.assertIsInstance(project.concept_db, MCTConceptDB) - self.assertIsInstance(project.vocab, MCTVocab) - self.assertTrue(all(isinstance(m, MCTUser) for m in project.members)) - self.assertTrue(all(isinstance(mt, MCTMetaTask) for mt in project.meta_tasks)) - self.assertTrue(all(isinstance(rt, MCTRelTask) for rt in project.rel_tasks)) - - @patch('mctclient.requests.post') - def test_create_project(self, mock_post): - # Mock authentication - def post_side_effect(url, *args, **kwargs): - if url.endswith('/api/api-token-auth/'): - return MagicMock(status_code=200, text='{"token": "abc"}') - elif url.endswith('/api/project-annotate-entities/'): - # Return a response with all fields needed for MCTProject - return MagicMock( - status_code=200, - text=json.dumps({ - 'id': '3', - 'name': 'My Project', - 'description': 'desc', - 'cuis': 'C001,C002', - 'dataset': '2', - 'concept_db': '20', - 'vocab': '30', - 'members': ['1'], - 'tasks': ['200'], - 'relations': ['300'] - }), - json=lambda: { - 'id': '3', - 'name': 'My Project', - 'description': 'desc', - 'cuis': 'C001,C002', - 'dataset': '2', - 'concept_db': '20', - 'vocab': '30', - 'members': ['1'], - 'tasks': ['200'], - 'relations': ['300'] - } - ) - else: - return MagicMock(status_code=404, text='') - - mock_post.side_effect = post_side_effect - - session = MedCATTrainerSession(server='http://localhost', username='u', password='p') - user = MCTUser(id='1', username='testuser') - dataset = MCTDataset(id='2', name='TestDS', dataset_file='file.csv') - concept_db = MCTConceptDB(id='20', name='testCDB', conceptdb_file='cdb.dat') - vocab = MCTVocab(id='30', name='testVocab', vocab_file='vocab.dat') - meta_task = MCTMetaTask(id='200', name='TestMetaTask') - rel_task = MCTRelTask(id='300', name='TestRelTask') - - project = session.create_project( - name='My Project', - description='desc', - cuis='C001,C002', - members=[user], - dataset=dataset, - concept_db=concept_db, - vocab=vocab, - meta_tasks=[meta_task], - rel_tasks=[rel_task] - ) - self.assertIsInstance(project, MCTProject) - self.assertEqual(project.name, 'My Project') - self.assertEqual(project.description, 'desc') - self.assertEqual(project.cuis, 'C001,C002') - self.assertIsInstance(project.dataset, MCTDataset) - self.assertIsInstance(project.concept_db, MCTConceptDB) - self.assertIsInstance(project.vocab, MCTVocab) - self.assertEqual(project.members, [user]) - self.assertEqual(project.meta_tasks, [meta_task]) - self.assertEqual(project.rel_tasks, [rel_task]) - -if __name__ == '__main__': - unittest.main() \ No newline at end of file diff --git a/v1/medcat-trainer/docs/client.md b/v1/medcat-trainer/docs/client.md deleted file mode 100644 index d5d131325..000000000 --- a/v1/medcat-trainer/docs/client.md +++ /dev/null @@ -1,88 +0,0 @@ - ---- - -# MedCATtrainer Client - -A Python client for interacting with a MedCATTrainer web application instance. This package allows you to manage datasets, concept databases, vocabularies, model packs, users, projects, and more via Python code or the command line. - -## Features - -- Manage datasets, concept databases, vocabularies, and model packs -- Create and manage users and projects -- Retrieve and upload project annotations -- Command-line interface (CLI) for automation - -## Installation - -```sh -pip install mctclient -``` - -Or, if installing from source: - -```sh -cd client -python -m build -pip install dist/*.whl -``` - -## Python Usage - -```sh -export MCTRAINER_USERNAME= -export MCTRAINER_PASSWORD= -``` - -```python -from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTMetaTask, MCTRelTask, MCTUser, MCTProject - -# Connect to your MedCATTrainer instance -session = MedCATTrainerSession(server="http://localhost:8001") - -# List all projects -projects = session.get_projects() -for project in projects: - print(project) - -# Create a new dataset -dataset = session.create_dataset(name="My Dataset", dataset_file="path/to/data.csv") - -# Create a new user -user = session.create_user(username="newuser", password="password123") - -# Create a new project -project = session.create_project( - name="My Project", - description="A new annotation project", - members=[user], - dataset=dataset -) -``` - -### MedCATTrainerSession Methods - -- `create_project(name, description, members, dataset, cuis=[], cuis_file=None, concept_db=None, vocab=None, cdb_search_filter=None, modelpack=None, meta_tasks=[], rel_tasks=[])` -- `create_dataset(name, dataset_file)` -- `create_user(username, password)` -- `create_medcat_model(cdb, vocab)` -- `create_medcat_model_pack(model_pack)` -- `get_users()` -- `get_models()` -- `get_model_packs()` -- `get_meta_tasks()` -- `get_rel_tasks()` -- `get_projects()` -- `get_datasets()` -- `get_project_annos(projects)` - -Each method returns the corresponding object or a list of objects. - -## License - -This project is licensed under the Apache 2.0 License. - -## Contributing - -Pull requests are welcome! For major changes, please open an issue first to discuss what you would like to change. - - diff --git a/v1/medcat-trainer/notebook_docs/Client_API_Tutorials.ipynb b/v1/medcat-trainer/notebook_docs/Client_API_Tutorials.ipynb deleted file mode 100644 index 3cecbdcbc..000000000 --- a/v1/medcat-trainer/notebook_docs/Client_API_Tutorials.ipynb +++ /dev/null @@ -1,485 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# Client API Tutorial\n", - "\n", - "This notebook demonstrates how to use the `MedCATTrainerSession` class to interact with the MedCATTrainer API. We'll cover:\n", - "\n", - "1. Setting up a MedCATTrainer session\n", - "2. Exploring available resources (users, datasets, models)\n", - "3. Creating new resources (datasets, models, users)\n", - "4. Creating annotation projects with different approaches\n", - "5. Downloading and saving annotations\n", - "\n", - "These steps provide a complete workflow for programmatically managing medical text annotation projects with MedCATTrainer." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "__SETUP:__\n", - "\n", - "You need to have [MedCATtrainer service running locally](http://localhost:8001/)\n", - "\n", - "The default credentials when setup is:\n", - "\n", - "```bash\n", - "username: admin\n", - "password: admin\n", - "```\n", - "\n", - "The administrative console can be found here: http://localhost:8001/admin/\n", - "\n", - "Within this admin console is where you can manually interact the the MedCATtrainer program and setup projects\n", - "\n", - "\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 1. Setup and Authentication\n", - "\n", - "First, let's import the necessary classes and set up our session:" - ] - }, - { - "cell_type": "code", - "execution_count": 5, - "metadata": {}, - "outputs": [], - "source": [ - "import os\n", - "import json\n", - "import sys\n", - "sys.path.append('../client')\n", - "from mctclient import MedCATTrainerSession, MCTDataset, MCTConceptDB, MCTVocab, MCTModelPack, MCTUser, MCTProject" - ] - }, - { - "cell_type": "code", - "execution_count": 6, - "metadata": {}, - "outputs": [], - "source": [ - "# Initialize the session\n", - "\n", - "# Set environment variables for authentication, These are default and are optional.\n", - "os.environ['MCTRAINER_USERNAME'] = 'admin'\n", - "os.environ['MCTRAINER_PASSWORD'] = 'admin'\n", - "mct_server = 'http://localhost:8001' # Default server is http://localhost:8001 if not specified\n", - "# session = MedCATTrainerSession()\n", - "\n", - "# Initialize the session and change explicit arguements if required.\n", - "session = MedCATTrainerSession(server=mct_server, username='admin', password='admin') # Wrapper for the MedCATTrainer API." - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 2. Explore Available Resources\n", - "\n", - "Let's check what resources are already available in the MedCATTrainer instance:" - ] - }, - { - "cell_type": "code", - "execution_count": 7, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Users:\n", - "3 : annotator2\n", - "2 : annotator1\n", - "1 : admin\n", - "\n", - "Datasets:\n", - "1 : Example Dataset \t http://localhost:8001/media/Example_Dataset.csv\n", - "2 : Neurology Notes \t http://localhost:8001/media/neurology_notes.csv\n", - "3 : SG-example-docs \t http://localhost:8001/media/sg-sample-docs.csv\n", - "\n", - "Concept DBs:\n", - "1 : umls_cdb \t http://localhost:8001/media/cdb.dat\n", - "2 : snomed_cdb \t http://localhost:8001/media/snomed-cdb.dat\n", - "3 : snomed_2022_modelpack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/20230227__kch_gstt_trained_model_494c3717f637bb89/cdb.dat\n", - "8 : medcat_full_pack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/medcat_model_pack_u3fB9G5/cdb.dat\n", - "12 : snomed-2023-bert-metacats_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a/cdb.dat\n", - "13 : de_id_modelpack_CDB \t http://localhost:8001/media/Users/k1897038/projects/MedCATtrainer/webapp/api/media/medcat_deid_trained_a7120281ebb9fc9e/cdb.dat\n", - "\n", - "Vocabularies:\n", - "1 : http://localhost:8001/media/vocab.dat\n", - "3 : http://localhost:8001/media/20230227__kch_gstt_trained_model_494c3717f637bb89/vocab.dat\n", - "12 : http://localhost:8001/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a/vocab.dat\n", - "\n", - "ModelPacks:\n", - "1 : snomed_2022_modelpack \t http://localhost:8001/media/20230227__kch_gstt_trained_model_494c3717f637bb89.zip\n", - "9 : snomed-2023-bert-metacats \t http://localhost:8001/media/20230227__kch_gstt_trained_model_bert_metacats_138689a7bb83cb0a.zip\n", - "10 : de-id modelpack \t http://localhost:8001/media/medcat_deid_trained_a7120281ebb9fc9e.zip\n", - "\n", - "Meta Tasks:\n", - "1 : Experiencer\n", - "2 : Presence\n", - "3 : Subject\n", - "4 : Temporality\n", - "5 : Time\n", - "\n", - "Relation Tasks:\n", - "1 : Spatial\n" - ] - } - ], - "source": [ - "# Get users\n", - "users = session.get_users()\n", - "print(\"Users:\")\n", - "for user in users:\n", - " print(user)\n", - "print()\n", - "\n", - "# Get datasets\n", - "datasets = session.get_datasets()\n", - "print(\"Datasets:\")\n", - "for dataset in datasets:\n", - " print(dataset)\n", - "print()\n", - "\n", - "# Get concept databases and vocabularies\n", - "concept_dbs, vocabs = session.get_models()\n", - "print(\"Concept DBs:\")\n", - "for cdb in concept_dbs:\n", - " print(cdb)\n", - "print()\n", - "print(\"Vocabularies:\")\n", - "for vocab in vocabs:\n", - " print(vocab)\n", - "print()\n", - "\n", - "# Get modelpacks\n", - "model_packs = session.get_model_packs()\n", - "print(\"ModelPacks:\")\n", - "for model_pack in model_packs:\n", - " print(model_pack)\n", - "print()\n", - "\n", - "# Get meta tasks\n", - "meta_tasks = session.get_meta_tasks()\n", - "print(\"Meta Tasks:\")\n", - "for i, task in enumerate(meta_tasks):\n", - " print(f\"{i+1} : {task.name}\")\n", - "print()\n", - "\n", - "# Get relation tasks\n", - "rel_tasks = session.get_rel_tasks()\n", - "print(\"Relation Tasks:\")\n", - "for i, task in enumerate(rel_tasks):\n", - " print(f\"{i+1} : {task.name}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 3. Upload new resources to MedCATtrainer\n", - "\n", - "Before we create a project we need to create and upload all the required resources. We'll start with a dataset:\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Create a new dataset to be annotated.\n", - "neurology_dataset = session.create_dataset(\n", - " name=\"Neurology Notes\", # Names must be unique\n", - " dataset_file=\"./example_data/neuro.csv\" # This csv should have atleast these 2 columns. [\"name\", \"text\"]\n", - ")\n", - "print(f\"Created dataset: {neurology_dataset}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3.1 Creating MedCAT Models\n", - "\n", - "We have two options for creating models:\n", - "\n", - "1. Upload separate CDB and Vocab files\n", - "2. Upload a complete model pack ZIP\n", - "\n", - "Let's explore both approaches:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# If you don't have these medcat components or modelpack. You can download an example here:\n", - "# Download vocab.dat\n", - "!wget -O ./example_data/vocab.dat https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/vocab.dat\n", - "# Download snomed-cdb-mc-v1.cdb\n", - "!wget -O ./example_data/snomed-cdb-mc-v1.cdb https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/snomed-cdb-mc-v1.cdb\n", - "# Download model pack (this is a zip file)\n", - "!wget -O ./example_data/medcat_model_pack.zip https://cogstack-medcat-example-models.s3.eu-west-2.amazonaws.com/medcat-example-models/medcat_model_pack_c4e0d25701ce4e88.zip\n", - "\n", - "# Otherwise Skip this" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Option 1: Upload separate CDB and Vocab files\n", - "example_cdb = MCTConceptDB(name=\"example_cdbv1\", conceptdb_file=\"./example_data/snomed-cdb-mc-v1.cdb\")\n", - "example_vocab = MCTVocab(name=\"example_vocabv2\", vocab_file=\"./example_data/vocab.dat\")\n", - "\n", - "# Create the model in the MedCATTrainer instance\n", - "cdb, vocab = session.create_medcat_model(example_cdb, example_vocab)\n", - "print(f\"Created CDB: {cdb}\")\n", - "print(f\"Created Vocab: {vocab}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Option 2: Upload a complete modelpack ZIP\n", - "# This contains CDB, Vocab, and potentially MetaCAT and RelCAT models\n", - "medcat_model_pack = MCTModelPack(\n", - " name=\"medcat_full_pack\",\n", - " model_pack_zip=\"./medcat_model_pack.zip\"\n", - ")\n", - "session.create_medcat_model_pack(medcat_model_pack)\n", - "print(f\"Created model pack: {medcat_model_pack}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "### 3.2 Creating a New User\n", - "\n", - "If we need to add an annotator to our project:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "new_user = session.create_user(username=\"annotator1\", password=\"secure_password\")\n", - "print(f\"Created user: {new_user}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 4. Creating Annotation Projects\n", - "\n", - "Now we can create annotation projects using our resources:\n", - "\n", - "But first, Let's check again what resources are now available in the MedCATTrainer instance after Part 3:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get users\n", - "users = session.get_users()\n", - "print(\"Users:\")\n", - "for user in users:\n", - " print(user)\n", - "print()\n", - "\n", - "# Get datasets\n", - "datasets = session.get_datasets()\n", - "print(\"Datasets:\")\n", - "for dataset in datasets:\n", - " print(dataset)\n", - "print()\n", - "\n", - "# Get concept databases and vocabularies\n", - "concept_dbs, vocabs = session.get_models()\n", - "print(\"Concept DBs:\")\n", - "for cdb in concept_dbs:\n", - " print(cdb)\n", - "print()\n", - "print(\"Vocabularies:\")\n", - "for vocab in vocabs:\n", - " print(vocab)\n", - "print()\n", - "\n", - "# Get modelpacks\n", - "model_packs = session.get_model_packs()\n", - "print(\"ModelPacks:\")\n", - "for model_pack in model_packs:\n", - " print(model_pack)\n", - "print()\n", - "\n", - "# Get meta tasks\n", - "meta_tasks = session.get_meta_tasks()\n", - "print(\"Meta Tasks:\")\n", - "for i, task in enumerate(meta_tasks):\n", - " print(f\"{i+1} : {task.name}\")\n", - "print()\n", - "\n", - "# Get relation tasks\n", - "rel_tasks = session.get_rel_tasks()\n", - "print(\"Relation Tasks:\")\n", - "for i, task in enumerate(rel_tasks):\n", - " print(f\"{i+1} : {task.name}\")\n" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Method 1: Create a project with separate CDB and Vocab\n", - "neuro_project = session.create_project(\n", - " name=\"Neurology Annotation Project\",\n", - " description=\"Demo annotation project of neurology conditions, epilepsy & seizure\",\n", - " members=[user for user in users], # Add all users...\n", - " dataset=datasets[-1],\n", - " concept_db=concept_dbs[-1],\n", - " vocab=vocabs[-1],\n", - " cuis=[\"84757009\", \"91175000\"], # Whitelist Filter CUIs/concepts\n", - " #meta_tasks=[\"Temporality\", \"Certainty\"], # Can specify by name or by object\n", - " #rel_tasks=[\"Has_Finding\"] # only add this relational extraction task if absolutely required\n", - ")\n", - "\n", - "print(f\"Created project: {neuro_project}\")" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Method 2: Create a project with a modelpack\n", - "\n", - "# Rerun the explore resources to run the following code:\n", - "general_project = session.create_project(\n", - " name=\"Demo General Medical Annotation\",\n", - " description=\"Annotation of neurology medical conditions\",\n", - " members=[user for user in users], # All users\n", - " dataset=datasets[-1], # Use existing dataset\n", - " modelpack=model_packs[-1], # Use existing model pack\n", - " # cuis_file=\"./resources/mct_filter.json\", # Load whitelist concepts from a file [\"concept1\", \"concept2\"]\n", - ")\n", - "\n", - "print(f\"Created project with model pack: {general_project}\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 5. Retrieving Project Annotations\n", - "\n", - "After annotators have worked on the projects, we can download the annotations:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Get all projects\n", - "mct_projects = session.get_projects()\n", - "\n", - "# Download annotations for all projects\n", - "projects = session.get_project_annos(mct_projects)\n", - "\n", - "print(f\"Downloaded annotations for {len(mct_projects)} projects:\")\n", - "for p in projects['projects']:\n", - " print(p['name'])" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Inspect all details from a single export\n", - "projects['projects'][0]" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## 6. Saving Annotations for Analysis\n", - "\n", - "Finally, let's save the annotations to a file for later analysis:" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "# Save MCT export / annotations to a file\n", - "with open(\"./example_data/medical_annotations.json\", \"w\") as f:\n", - " json.dump(projects, f, indent=2)\n", - "\n", - "print(\"Annotations saved to ./example_data/medical_annotations.json\")" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# End of Tutorial" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "bioext-medcat-env", - "language": "python", - "name": "python3" - }, - "language_info": { - "codemirror_mode": { - "name": "ipython", - "version": 3 - }, - "file_extension": ".py", - "mimetype": "text/x-python", - "name": "python", - "nbconvert_exporter": "python", - "pygments_lexer": "ipython3", - "version": "3.11.11" - } - }, - "nbformat": 4, - "nbformat_minor": 4 -}