From 75e177539988c9e2427b3977f412fedc5464c853 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 11:03:14 +0100 Subject: [PATCH 01/30] CU-869aujr7h: Add nightly workflow to check library stability --- .github/workflows/medcat-v2-lib-stabiliy.yml | 50 ++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 .github/workflows/medcat-v2-lib-stabiliy.yml diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml new file mode 100644 index 000000000..7919b8820 --- /dev/null +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -0,0 +1,50 @@ +name: MedCAT-nightly-stability-check + +on: + schedule: + - cron: "0 3 * * *" # every Monday at 3am UTC + workflow_dispatch: # allow manual runs + push: + paths: + - ".github/workflows/medcat-v2-lib-stabiliy.yml" + pull_request: + paths: + - ".github/workflows/medcat-v2-lib-stabiliy.yml" + + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: [3.9, 3.10, 3.11, 3.12] + + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Install with latest deps + run: | + uv sync --all-extras --dev --no-lock + uv run python -m ensurepip + uv run python -m pip install --upgrade pip + + - name: Check types + run: | + uv run python -m mypy --follow-imports=normal medcat + + - name: Ruff linting + run: | + uv run ruff check medcat --preview + + - name: Test + run: | + timeout 30m uv run python -m unittest discover + + - name: Model regression + run: | + uv run bash tests/backwards_compatibility/run_current.sh From 68deffd556eafeabfca2d2895a4117b7b175443d Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 11:07:21 +0100 Subject: [PATCH 02/30] CU-869aujr7h: Update working directory in new workflow --- .github/workflows/medcat-v2-lib-stabiliy.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 7919b8820..a0a7244cd 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -11,6 +11,10 @@ on: paths: - ".github/workflows/medcat-v2-lib-stabiliy.yml" +defaults: + run: + working-directory: ./medcat-v2 + jobs: test: From 63e7eea8e4ff9ebdd20d8f143fb06c428ccf206d Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 11:08:00 +0100 Subject: [PATCH 03/30] CU-869aujr7h: Update comment in new workflow --- .github/workflows/medcat-v2-lib-stabiliy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index a0a7244cd..ec81005f2 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -2,7 +2,7 @@ name: MedCAT-nightly-stability-check on: schedule: - - cron: "0 3 * * *" # every Monday at 3am UTC + - cron: "0 3 * * *" # every day at 3am UTC workflow_dispatch: # allow manual runs push: paths: From aaf99069ffed16318345f1651a32bbad6cbf0887 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 11:21:33 +0100 Subject: [PATCH 04/30] CU-869aujr7h: Disallow incompatible transformers version --- medcat-v2/pyproject.toml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/medcat-v2/pyproject.toml b/medcat-v2/pyproject.toml index 556a76993..199cc38d4 100644 --- a/medcat-v2/pyproject.toml +++ b/medcat-v2/pyproject.toml @@ -99,12 +99,16 @@ dict_ner = [ ] deid = [ "datasets>=2.2.2,<3.0.0", + # Transformers 4.57 doesn't support 3.9 + "transformers!=4.57.0; python_version == '3.9'", "transformers>=4.41.0,<5.0", # avoid major bump "scikit-learn>=1.1.3,<2.0", "torch>=2.4.0,<3.0", "scipy", ] rel_cat = [ + # Transformers 4.57 doesn't support 3.9 + "transformers!=4.57.0; python_version == '3.9'", "transformers>=4.41.0,<5.0", # avoid major bump "scikit-learn>=1.1.3,<2.0", "torch>=2.4.0,<3.0", From 86698afe97b9fb6ef21c91906883715b50ad5f5e Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 11:21:50 +0100 Subject: [PATCH 05/30] CU-869aujr7h: Fix worklflow install / sync --- .github/workflows/medcat-v2-lib-stabiliy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index ec81005f2..9621e822d 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -33,7 +33,7 @@ jobs: - name: Install with latest deps run: | - uv sync --all-extras --dev --no-lock + uv sync --all-extras --dev uv run python -m ensurepip uv run python -m pip install --upgrade pip From b3b955aaad78ad244eb78cc297e05e98b3c1fe6a Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 11:27:05 +0100 Subject: [PATCH 06/30] CU-869aujr7h: Make worklflow only have read permissions --- .github/workflows/medcat-v2-lib-stabiliy.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 9621e822d..343e58a0e 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -1,5 +1,6 @@ name: MedCAT-nightly-stability-check - +permissions: + contents: read on: schedule: - cron: "0 3 * * *" # every day at 3am UTC From 99042b041a9c93e79d219dc8e8e6a815056cd519 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 11:37:43 +0100 Subject: [PATCH 07/30] CU-869aujr7h: Install without lock --- .github/workflows/medcat-v2-lib-stabiliy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 343e58a0e..472be0f25 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -34,7 +34,7 @@ jobs: - name: Install with latest deps run: | - uv sync --all-extras --dev + uv pip install --no-lock --system --all-extras --dev uv run python -m ensurepip uv run python -m pip install --upgrade pip From 9ad4a9e2816aadd5b93fd5359540d5b67ae5515c Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 11:47:45 +0100 Subject: [PATCH 08/30] CU-869aujr7h: Use non-uv pip for lock-free install --- .github/workflows/medcat-v2-lib-stabiliy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 472be0f25..01fea6330 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -34,9 +34,9 @@ jobs: - name: Install with latest deps run: | - uv pip install --no-lock --system --all-extras --dev uv run python -m ensurepip uv run python -m pip install --upgrade pip + uv run python -m pip install ".[spacy,deid,meta-cat,rel-cat,dict-ner,dev]" - name: Check types run: | From 07d072e2f3fdbfa53ee0ec47fdc68807221e2e55 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 12:10:14 +0100 Subject: [PATCH 09/30] CU-869aujr7h: Force usage of correct python version in workflow --- .github/workflows/medcat-v2-lib-stabiliy.yml | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 01fea6330..f99fa792e 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -34,22 +34,22 @@ jobs: - name: Install with latest deps run: | - uv run python -m ensurepip - uv run python -m pip install --upgrade pip - uv run python -m pip install ".[spacy,deid,meta-cat,rel-cat,dict-ner,dev]" + uv run --python ${{ matrix.python-version }} python -m ensurepip + uv run --python ${{ matrix.python-version }} python -m pip install --upgrade pip + uv run --python ${{ matrix.python-version }} python -m pip install ".[spacy,deid,meta-cat,rel-cat,dict-ner,dev]" - name: Check types run: | - uv run python -m mypy --follow-imports=normal medcat + uv run --python ${{ matrix.python-version }} python -m mypy --follow-imports=normal medcat - name: Ruff linting run: | - uv run ruff check medcat --preview + uv run --python ${{ matrix.python-version }} python -m ruff check medcat --preview - name: Test run: | - timeout 30m uv run python -m unittest discover + timeout 30m uv run --python ${{ matrix.python-version }} python -m unittest discover - name: Model regression run: | - uv run bash tests/backwards_compatibility/run_current.sh + uv run --python ${{ matrix.python-version }} bash tests/backwards_compatibility/run_current.sh From 606769dc9f1f9949eb45afd55cd958b61162acf1 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 12:18:42 +0100 Subject: [PATCH 10/30] CU-869aujr7h: Fix versions in workflow (3.10 instead of 3.1) --- .github/workflows/medcat-v2-lib-stabiliy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index f99fa792e..9475ba263 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -24,7 +24,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [3.9, 3.10, 3.11, 3.12] + python-version: [ "3.9", "3.10", "3.11", "3.12"] steps: - uses: actions/checkout@v4 From ecc18dea549bdee7e0caf5c22c03c4dd9485d6c6 Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 10 Oct 2025 15:37:08 +0100 Subject: [PATCH 11/30] Typing fix for regression utils --- medcat-v2/medcat/utils/regression/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-v2/medcat/utils/regression/utils.py b/medcat-v2/medcat/utils/regression/utils.py index 2f793b409..0d168bc1e 100644 --- a/medcat-v2/medcat/utils/regression/utils.py +++ b/medcat-v2/medcat/utils/regression/utils.py @@ -212,7 +212,7 @@ def get_class_level_docstrings(cls: Type) -> list[str]: elem.value, ast.Constant): # If it's an expression node containing a constant, # extract the string - docstrings.append(elem.value.s) + docstrings.append(str(elem.value.s)) return docstrings From ad6eb74e10bb1ce7393cfc4181c96ace87e538e2 Mon Sep 17 00:00:00 2001 From: mart-r Date: Fri, 10 Oct 2025 15:36:51 +0100 Subject: [PATCH 12/30] Typing fix for modern bert RelCAT --- .../addons/relation_extraction/modernbert/tokenizer.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-v2/medcat/components/addons/relation_extraction/modernbert/tokenizer.py b/medcat-v2/medcat/components/addons/relation_extraction/modernbert/tokenizer.py index 17bff1f41..1c1aac991 100644 --- a/medcat-v2/medcat/components/addons/relation_extraction/modernbert/tokenizer.py +++ b/medcat-v2/medcat/components/addons/relation_extraction/modernbert/tokenizer.py @@ -33,5 +33,5 @@ def load(cls, tokenizer_path: str, relcat_config: ConfigRelCAT, **kwargs relcat_config.general.model_name = ( cls.pretrained_model_name_or_path) tokenizer.hf_tokenizers = PreTrainedTokenizerFast.from_pretrained( - path=relcat_config.general.model_name) + relcat_config.general.model_name) return tokenizer From 3dd38f485d32a620908c7a0c8a2db26cdc0a31aa Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 12:43:07 +0100 Subject: [PATCH 13/30] CU-869aujr7h: Change the way tests timeout is set up --- .github/workflows/medcat-v2-lib-stabiliy.yml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 9475ba263..1a805d0db 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -48,7 +48,8 @@ jobs: - name: Test run: | - timeout 30m uv run --python ${{ matrix.python-version }} python -m unittest discover + uv run --python ${{ matrix.python-version }} python -m unittest discover + timeout-minutes: 30 - name: Model regression run: | From 4cc196f4db9825320fac13c13c1a3caea5aa3457 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 13:44:59 +0100 Subject: [PATCH 14/30] CU-869aujr7h: Attempt to fix builds on Windows by ignoring Windows + python 3.9 --- .github/workflows/medcat-v2-lib-stabiliy.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 1a805d0db..07ac7e298 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -25,6 +25,10 @@ jobs: matrix: os: [ubuntu-latest, macos-latest, windows-latest] python-version: [ "3.9", "3.10", "3.11", "3.12"] + exclude: + # NOTE: On Windows, prior to 3.9 builds fail due to blis not providing wheels + - os: windows-latest + python-version: 3.9 steps: - uses: actions/checkout@v4 From aeabbaa78e80de0a51767dbb6cdb222ca9b6f7c7 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 21:05:15 +0100 Subject: [PATCH 15/30] CU-869aujr7h: Remove python 3.9 from matrix --- .github/workflows/medcat-v2-lib-stabiliy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 07ac7e298..bfa9e9669 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -24,7 +24,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [ "3.9", "3.10", "3.11", "3.12"] + python-version: [ "3.10", "3.11", "3.12"] exclude: # NOTE: On Windows, prior to 3.9 builds fail due to blis not providing wheels - os: windows-latest From 112d3f965f57e467b2a4942736d6e100780d8884 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 21:56:15 +0100 Subject: [PATCH 16/30] CU-869aujr7h: Attempt fix mock for Windows --- medcat-v2/tests/utils/test_cdb_state.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/medcat-v2/tests/utils/test_cdb_state.py b/medcat-v2/tests/utils/test_cdb_state.py index 4883c1ce5..091c9ccba 100644 --- a/medcat-v2/tests/utils/test_cdb_state.py +++ b/medcat-v2/tests/utils/test_cdb_state.py @@ -123,7 +123,8 @@ def saved_name_temp_file(cls): @classmethod def setUpClass(cls) -> None: - with mock.patch("builtins.open", side_effect=open) as cls.popen: + with mock.patch("medcat.utils.cdb_state.open", side_effect=open + ) as cls.popen: with mock.patch("tempfile.NamedTemporaryFile", side_effect=cls.saved_name_temp_file) as cls.pntf: return super().setUpClass() From 53eee068f1b7f5f01069800388d085bdbc8dd0db Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 22:08:47 +0100 Subject: [PATCH 17/30] CU-869aujr7h: Use CPU-only torch for MacOS in workflow to avoid MPS issues --- .github/workflows/medcat-v2-lib-stabiliy.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index bfa9e9669..b6d6516a3 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -40,6 +40,10 @@ jobs: run: | uv run --python ${{ matrix.python-version }} python -m ensurepip uv run --python ${{ matrix.python-version }} python -m pip install --upgrade pip + # install cpu-only torch for MacOS + if [[ "$RUNNER_OS" == "macOS" ]]; then + uv run --python ${{ matrix.python-version }} python -m pip install torch --index-url https://download.pytorch.org/whl/cpu + fi uv run --python ${{ matrix.python-version }} python -m pip install ".[spacy,deid,meta-cat,rel-cat,dict-ner,dev]" - name: Check types From 7b84d9ab15eb78f9b56c51a5f7a5ca6f4ba58716 Mon Sep 17 00:00:00 2001 From: mart-r Date: Tue, 14 Oct 2025 22:48:58 +0100 Subject: [PATCH 18/30] CU-869aujr7h: Force installation to happen through bash so IF works on Windows --- .github/workflows/medcat-v2-lib-stabiliy.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index b6d6516a3..4186bd260 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -37,6 +37,7 @@ jobs: python-version: ${{ matrix.python-version }} - name: Install with latest deps + shell: bash run: | uv run --python ${{ matrix.python-version }} python -m ensurepip uv run --python ${{ matrix.python-version }} python -m pip install --upgrade pip From 8817043b0ca97e259f684ac3106a39c1df823472 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 15 Oct 2025 11:00:57 +0100 Subject: [PATCH 19/30] CU-869aujr7h: Add 3.13 for lib stability workflow --- .github/workflows/medcat-v2-lib-stabiliy.yml | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 4186bd260..0ba20954f 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -24,11 +24,7 @@ jobs: fail-fast: false matrix: os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [ "3.10", "3.11", "3.12"] - exclude: - # NOTE: On Windows, prior to 3.9 builds fail due to blis not providing wheels - - os: windows-latest - python-version: 3.9 + python-version: [ "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 From f201270f31bd16e3bbd18e751df140cffefba2bb Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 15 Oct 2025 11:07:15 +0100 Subject: [PATCH 20/30] CU-869aujr7h: [NEEDS TO BE REVERTED] Only run on MacOS and Windows on 3.10. This commit TEMPORARILY (while the workflows are failing) makes them only run on Windows and MacOS (which are the workflows that are failing) and on python 3.10 so as to lower the overall number of workflow runners. --- .github/workflows/medcat-v2-lib-stabiliy.yml | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 0ba20954f..96559fa86 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -23,8 +23,11 @@ jobs: strategy: fail-fast: false matrix: - os: [ubuntu-latest, macos-latest, windows-latest] - python-version: [ "3.10", "3.11", "3.12", "3.13"] + # TODO: Revert before merge + # os: [ubuntu-latest, macos-latest, windows-latest] + # python-version: [ "3.10", "3.11", "3.12", "3.13"] + os: [macos-latest, windows-latest] + python-version: [ "3.10", ] steps: - uses: actions/checkout@v4 From ea89d14d16f582d5ee329e88ddb708d38a6ef259 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 15 Oct 2025 11:09:56 +0100 Subject: [PATCH 21/30] CU-869aujr7h: Allow 45 minutes for tests so tests on MacOS don't time out --- .github/workflows/medcat-v2-lib-stabiliy.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index 96559fa86..cb8d2b58c 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -57,7 +57,7 @@ jobs: - name: Test run: | uv run --python ${{ matrix.python-version }} python -m unittest discover - timeout-minutes: 30 + timeout-minutes: 45 - name: Model regression run: | From 2347a6c6f67c102635919d92717ee767d0125a3d Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 15 Oct 2025 11:17:53 +0100 Subject: [PATCH 22/30] CU-869aujr7h: Use temporary directory instead of named temp file for Windows compatibility --- medcat-v2/medcat/utils/cdb_state.py | 10 +++++++--- medcat-v2/tests/utils/test_cdb_state.py | 8 ++++---- 2 files changed, 11 insertions(+), 7 deletions(-) diff --git a/medcat-v2/medcat/utils/cdb_state.py b/medcat-v2/medcat/utils/cdb_state.py index b5390f398..c280999ba 100644 --- a/medcat-v2/medcat/utils/cdb_state.py +++ b/medcat-v2/medcat/utils/cdb_state.py @@ -3,6 +3,7 @@ from typing import TypedDict, cast import tempfile import dill +import os from copy import deepcopy @@ -216,7 +217,10 @@ def on_disk_memory_capture(cdb): Yields: None """ - with tempfile.NamedTemporaryFile() as tf: - save_cdb_state(cdb, tf.name) + # NOTE: using temporary directory so that it also works on Windows + # otherwise you can't reopen a temporary file in Windows (apparently) + with tempfile.TemporaryDirectory() as temp_dir: + temp_file_name = os.path.join(temp_dir, "cdb_state.dat") + save_cdb_state(cdb, temp_file_name) yield - load_and_apply_cdb_state(cdb, tf.name) + load_and_apply_cdb_state(cdb, temp_file_name) diff --git a/medcat-v2/tests/utils/test_cdb_state.py b/medcat-v2/tests/utils/test_cdb_state.py index 091c9ccba..a1013aa22 100644 --- a/medcat-v2/tests/utils/test_cdb_state.py +++ b/medcat-v2/tests/utils/test_cdb_state.py @@ -113,19 +113,19 @@ def test_state_restored(self): class StateSavedOnDiskTests(StateSavedTests): on_disk = True - _named_tempory_file = tempfile.NamedTemporaryFile + _named_tempory_directory = tempfile.TemporaryDirectory @classmethod def saved_name_temp_file(cls): - tf = cls._named_tempory_file() - cls.temp_file_name = tf.name + tf = cls._named_tempory_directory() + cls.temp_file_name = os.path.join(tf.name, "cdb_state.dat") return tf @classmethod def setUpClass(cls) -> None: with mock.patch("medcat.utils.cdb_state.open", side_effect=open ) as cls.popen: - with mock.patch("tempfile.NamedTemporaryFile", + with mock.patch("tempfile.TemporaryDirectory", side_effect=cls.saved_name_temp_file) as cls.pntf: return super().setUpClass() From 2dbce7f90d132766d2d4956e7f166f81c99b24c7 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 15 Oct 2025 14:29:46 +0100 Subject: [PATCH 23/30] CU-869aujr7h: Avoid heavy RAM tests (DeID) on MacOS during CI --- medcat-v2/tests/utils/ner/test_deid.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/medcat-v2/tests/utils/ner/test_deid.py b/medcat-v2/tests/utils/ner/test_deid.py index f11cc2dec..bd09efea6 100644 --- a/medcat-v2/tests/utils/ner/test_deid.py +++ b/medcat-v2/tests/utils/ner/test_deid.py @@ -36,6 +36,10 @@ cnf.general.nlp.provider = 'spacy' +def should_do_test_ci() -> bool: + return os.getenv("RUNNER_OS", "None").lower() != "macos" + + def _get_def_cdb(): return CDB(config=cnf) @@ -112,13 +116,16 @@ def _train_model_once() -> tuple[tuple[Any, Any, Any], deid.DeIdModel]: return retval, model -_TRAINED_MODEL_AND_INFO = _train_model_once() +if should_do_test_ci(): + _TRAINED_MODEL_AND_INFO = _train_model_once() def train_model_once() -> tuple[tuple[Any, Any, Any], deid.DeIdModel]: return _TRAINED_MODEL_AND_INFO +@unittest.skipIf(not should_do_test_ci(), + "MacOS on workflow doesn't have enough memory") class DeIDModelTests(unittest.TestCase): save_folder = os.path.join("results", "final_model") @@ -171,6 +178,8 @@ def test_add_new_concepts(self): ''' # noqa +@unittest.skipIf(not should_do_test_ci(), + "MacOS on workflow doesn't have enough memory") class DeIDModelWorks(unittest.TestCase): save_folder = os.path.join("results", "final_model") From 3976a51d8f40e161116291509c08a3ffb77e091a Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 15 Oct 2025 15:13:33 +0100 Subject: [PATCH 24/30] CU-869aujr7h: Ignore further tests for MacOS runner --- medcat-v2/tests/components/ner/trf/test_transformers_ner.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/medcat-v2/tests/components/ner/trf/test_transformers_ner.py b/medcat-v2/tests/components/ner/trf/test_transformers_ner.py index 5fc11906e..432b7c75f 100644 --- a/medcat-v2/tests/components/ner/trf/test_transformers_ner.py +++ b/medcat-v2/tests/components/ner/trf/test_transformers_ner.py @@ -14,12 +14,13 @@ from medcat.model_creation.cdb_maker import CDBMaker from transformers import TrainerCallback -from unittest import TestCase +from unittest import TestCase, skipIf import unittest.mock from ...addons.meta_cat.test_meta_cat import FakeTokenizer from ....pipeline.test_pipeline import FakeCDB, Config from .... import RESOURCES_PATH +from ....utils.ner.test_deid import should_do_test_ci class TransformersNERTests(TestCase): @@ -280,6 +281,8 @@ def test_ignore_extra_labels(self): ) +@skipIf(not should_do_test_ci, + "MacOS on workflow doesn't have enough memory") class AdditionalTransfromersNERTests(TestCase): TOKENIZER = FakeTokenizer() CNF = ConfigTransformersNER() From 663a20cbf18947dfcb89d5cb57de3fd7d77d6a57 Mon Sep 17 00:00:00 2001 From: mart-r Date: Wed, 15 Oct 2025 16:49:33 +0100 Subject: [PATCH 25/30] CU-869aujr7h: Make component tests more flexible --- medcat-v2/tests/components/helper.py | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) diff --git a/medcat-v2/tests/components/helper.py b/medcat-v2/tests/components/helper.py index 368a91473..9513f3c43 100644 --- a/medcat-v2/tests/components/helper.py +++ b/medcat-v2/tests/components/helper.py @@ -40,13 +40,32 @@ def setUpClass(cls): cls.vtokenizer = FTokenizer() cls.comp_cnf: ComponentConfig = getattr( cls.cnf.components, cls.comp_type.name) + if isinstance(cls.default_creator, Type): + cls._def_creator_name_opts = (cls.default_creator.__name__,) + else: + # classmethod + cls._def_creator_name_opts = (".".join(( + # etiher class.method_name + cls.default_creator.__self__.__name__, + cls.default_creator.__name__)), + # or just method_name + cls.default_creator.__name__ + ) def test_has_default(self): avail_components = types.get_registered_components(self.comp_type) self.assertEqual(len(avail_components), self.expected_def_components) name, cls_name = avail_components[0] - self.assertEqual(name, self.default) - self.assertIs(cls_name, self.default_creator.__name__) + # 1 name / cls name + eq_name = [name == self.default for name, _ in avail_components] + eq_cls = [cls_name in self._def_creator_name_opts + for _, cls_name in avail_components] + self.assertEqual(sum(eq_name), 1) + # NOTE: for NER both the default as well as the Dict based NER + # have the came class name, so may be more than 1 + self.assertGreaterEqual(sum(eq_cls), 1) + # needs to have the same class where name is equal + self.assertTrue(eq_cls[eq_name.index(True)]) def test_can_create_def_component(self): component = types.create_core_component( From ff56c7561555faf52459c94df5a68dee69478186 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 16 Oct 2025 09:10:37 +0100 Subject: [PATCH 26/30] CU-869aujr7h: Fix test skip method call --- medcat-v2/tests/components/ner/trf/test_transformers_ner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/medcat-v2/tests/components/ner/trf/test_transformers_ner.py b/medcat-v2/tests/components/ner/trf/test_transformers_ner.py index 432b7c75f..cb3bae019 100644 --- a/medcat-v2/tests/components/ner/trf/test_transformers_ner.py +++ b/medcat-v2/tests/components/ner/trf/test_transformers_ner.py @@ -281,7 +281,7 @@ def test_ignore_extra_labels(self): ) -@skipIf(not should_do_test_ci, +@skipIf(not should_do_test_ci(), "MacOS on workflow doesn't have enough memory") class AdditionalTransfromersNERTests(TestCase): TOKENIZER = FakeTokenizer() From 242a02fa2b70aad997a38b59c3a66eb62ffe5289 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 16 Oct 2025 09:58:39 +0100 Subject: [PATCH 27/30] Revert "CU-869aujr7h: [NEEDS TO BE REVERTED] Only run on MacOS and Windows on 3.10." This reverts commit f201270f31bd16e3bbd18e751df140cffefba2bb. --- .github/workflows/medcat-v2-lib-stabiliy.yml | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index cb8d2b58c..ad7058717 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -23,11 +23,8 @@ jobs: strategy: fail-fast: false matrix: - # TODO: Revert before merge - # os: [ubuntu-latest, macos-latest, windows-latest] - # python-version: [ "3.10", "3.11", "3.12", "3.13"] - os: [macos-latest, windows-latest] - python-version: [ "3.10", ] + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: [ "3.10", "3.11", "3.12", "3.13"] steps: - uses: actions/checkout@v4 From c337428cec42362413154d026665f8400c16b062 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 16 Oct 2025 10:01:10 +0100 Subject: [PATCH 28/30] CU-869aujr7h: Remove push-specific workflow triggers --- .github/workflows/medcat-v2-lib-stabiliy.yml | 3 --- 1 file changed, 3 deletions(-) diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml index ad7058717..21f8491ba 100644 --- a/.github/workflows/medcat-v2-lib-stabiliy.yml +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -5,9 +5,6 @@ on: schedule: - cron: "0 3 * * *" # every day at 3am UTC workflow_dispatch: # allow manual runs - push: - paths: - - ".github/workflows/medcat-v2-lib-stabiliy.yml" pull_request: paths: - ".github/workflows/medcat-v2-lib-stabiliy.yml" From 62da42b1add8768763966b7f09bbef8fdd71a0a3 Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 16 Oct 2025 10:41:19 +0100 Subject: [PATCH 29/30] CU-869avau57: Require numpy 2.1 or above for python 3.13 --- medcat-v2/pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/medcat-v2/pyproject.toml b/medcat-v2/pyproject.toml index 810cfa43d..581d5df66 100644 --- a/medcat-v2/pyproject.toml +++ b/medcat-v2/pyproject.toml @@ -57,7 +57,8 @@ classifiers = [ # For an analysis of this field vs pip's requirements files see: # https://packaging.python.org/discussions/install-requires-vs-requirements/ dependencies = [ # Optional - "numpy>2.0", + "numpy>=2.1; python_version >= '3.13'", + "numpy>=2.0; python_version < '3.13'", "dill", "pandas>=2.2,<3.0", "tqdm>=4.64,<5.0", From ed8426e5d8e35bb3dde162bae57a91127226713a Mon Sep 17 00:00:00 2001 From: mart-r Date: Thu, 16 Oct 2025 13:22:52 +0100 Subject: [PATCH 30/30] CU-869aujr7h: Rename helper method to avoid heavy RAM tests on MacOS in CI --- .../tests/components/ner/trf/test_transformers_ner.py | 4 ++-- medcat-v2/tests/utils/ner/test_deid.py | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/medcat-v2/tests/components/ner/trf/test_transformers_ner.py b/medcat-v2/tests/components/ner/trf/test_transformers_ner.py index cb3bae019..399ec50d2 100644 --- a/medcat-v2/tests/components/ner/trf/test_transformers_ner.py +++ b/medcat-v2/tests/components/ner/trf/test_transformers_ner.py @@ -20,7 +20,7 @@ from ...addons.meta_cat.test_meta_cat import FakeTokenizer from ....pipeline.test_pipeline import FakeCDB, Config from .... import RESOURCES_PATH -from ....utils.ner.test_deid import should_do_test_ci +from ....utils.ner.test_deid import is_macos_on_ci class TransformersNERTests(TestCase): @@ -281,7 +281,7 @@ def test_ignore_extra_labels(self): ) -@skipIf(not should_do_test_ci(), +@skipIf(not is_macos_on_ci(), "MacOS on workflow doesn't have enough memory") class AdditionalTransfromersNERTests(TestCase): TOKENIZER = FakeTokenizer() diff --git a/medcat-v2/tests/utils/ner/test_deid.py b/medcat-v2/tests/utils/ner/test_deid.py index bd09efea6..df684d9de 100644 --- a/medcat-v2/tests/utils/ner/test_deid.py +++ b/medcat-v2/tests/utils/ner/test_deid.py @@ -36,7 +36,7 @@ cnf.general.nlp.provider = 'spacy' -def should_do_test_ci() -> bool: +def is_macos_on_ci() -> bool: return os.getenv("RUNNER_OS", "None").lower() != "macos" @@ -116,7 +116,7 @@ def _train_model_once() -> tuple[tuple[Any, Any, Any], deid.DeIdModel]: return retval, model -if should_do_test_ci(): +if is_macos_on_ci(): _TRAINED_MODEL_AND_INFO = _train_model_once() @@ -124,7 +124,7 @@ def train_model_once() -> tuple[tuple[Any, Any, Any], deid.DeIdModel]: return _TRAINED_MODEL_AND_INFO -@unittest.skipIf(not should_do_test_ci(), +@unittest.skipIf(not is_macos_on_ci(), "MacOS on workflow doesn't have enough memory") class DeIDModelTests(unittest.TestCase): save_folder = os.path.join("results", "final_model") @@ -178,7 +178,7 @@ def test_add_new_concepts(self): ''' # noqa -@unittest.skipIf(not should_do_test_ci(), +@unittest.skipIf(not is_macos_on_ci(), "MacOS on workflow doesn't have enough memory") class DeIDModelWorks(unittest.TestCase): save_folder = os.path.join("results", "final_model")