diff --git a/.github/workflows/medcat-v2-lib-stabiliy.yml b/.github/workflows/medcat-v2-lib-stabiliy.yml new file mode 100644 index 000000000..21f8491ba --- /dev/null +++ b/.github/workflows/medcat-v2-lib-stabiliy.yml @@ -0,0 +1,58 @@ +name: MedCAT-nightly-stability-check +permissions: + contents: read +on: + schedule: + - cron: "0 3 * * *" # every day at 3am UTC + workflow_dispatch: # allow manual runs + pull_request: + paths: + - ".github/workflows/medcat-v2-lib-stabiliy.yml" + +defaults: + run: + working-directory: ./medcat-v2 + + +jobs: + test: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + os: [ubuntu-latest, macos-latest, windows-latest] + python-version: [ "3.10", "3.11", "3.12", "3.13"] + + steps: + - uses: actions/checkout@v4 + - uses: astral-sh/setup-uv@v3 + with: + python-version: ${{ matrix.python-version }} + + - name: Install with latest deps + shell: bash + run: | + uv run --python ${{ matrix.python-version }} python -m ensurepip + uv run --python ${{ matrix.python-version }} python -m pip install --upgrade pip + # install cpu-only torch for MacOS + if [[ "$RUNNER_OS" == "macOS" ]]; then + uv run --python ${{ matrix.python-version }} python -m pip install torch --index-url https://download.pytorch.org/whl/cpu + fi + uv run --python ${{ matrix.python-version }} python -m pip install ".[spacy,deid,meta-cat,rel-cat,dict-ner,dev]" + + - name: Check types + run: | + uv run --python ${{ matrix.python-version }} python -m mypy --follow-imports=normal medcat + + - name: Ruff linting + run: | + uv run --python ${{ matrix.python-version }} python -m ruff check medcat --preview + + - name: Test + run: | + uv run --python ${{ matrix.python-version }} python -m unittest discover + timeout-minutes: 45 + + - name: Model regression + run: | + uv run --python ${{ matrix.python-version }} bash tests/backwards_compatibility/run_current.sh diff --git a/medcat-v2/medcat/utils/cdb_state.py b/medcat-v2/medcat/utils/cdb_state.py index b5390f398..c280999ba 100644 --- a/medcat-v2/medcat/utils/cdb_state.py +++ b/medcat-v2/medcat/utils/cdb_state.py @@ -3,6 +3,7 @@ from typing import TypedDict, cast import tempfile import dill +import os from copy import deepcopy @@ -216,7 +217,10 @@ def on_disk_memory_capture(cdb): Yields: None """ - with tempfile.NamedTemporaryFile() as tf: - save_cdb_state(cdb, tf.name) + # NOTE: using temporary directory so that it also works on Windows + # otherwise you can't reopen a temporary file in Windows (apparently) + with tempfile.TemporaryDirectory() as temp_dir: + temp_file_name = os.path.join(temp_dir, "cdb_state.dat") + save_cdb_state(cdb, temp_file_name) yield - load_and_apply_cdb_state(cdb, tf.name) + load_and_apply_cdb_state(cdb, temp_file_name) diff --git a/medcat-v2/pyproject.toml b/medcat-v2/pyproject.toml index 466a398cc..581d5df66 100644 --- a/medcat-v2/pyproject.toml +++ b/medcat-v2/pyproject.toml @@ -57,7 +57,8 @@ classifiers = [ # For an analysis of this field vs pip's requirements files see: # https://packaging.python.org/discussions/install-requires-vs-requirements/ dependencies = [ # Optional - "numpy>2.0", + "numpy>=2.1; python_version >= '3.13'", + "numpy>=2.0; python_version < '3.13'", "dill", "pandas>=2.2,<3.0", "tqdm>=4.64,<5.0", @@ -102,6 +103,8 @@ dict_ner = [ ] deid = [ "datasets>=2.2.2,<3.0.0", + # Transformers 4.57 doesn't support 3.9 + "transformers!=4.57.0; python_version == '3.9'", "transformers>=4.41.0,<5.0", # avoid major bump # Transformers 4.57 doesn't support 3.9 "transformers!=4.57.0; python_version == '3.9'", @@ -112,6 +115,8 @@ deid = [ "scipy>=1.14; python_version >= '3.13'", ] rel_cat = [ + # Transformers 4.57 doesn't support 3.9 + "transformers!=4.57.0; python_version == '3.9'", "transformers>=4.41.0,<5.0", # avoid major bump # Transformers 4.57 doesn't support 3.9 "transformers!=4.57.0; python_version == '3.9'", diff --git a/medcat-v2/tests/components/helper.py b/medcat-v2/tests/components/helper.py index 368a91473..9513f3c43 100644 --- a/medcat-v2/tests/components/helper.py +++ b/medcat-v2/tests/components/helper.py @@ -40,13 +40,32 @@ def setUpClass(cls): cls.vtokenizer = FTokenizer() cls.comp_cnf: ComponentConfig = getattr( cls.cnf.components, cls.comp_type.name) + if isinstance(cls.default_creator, Type): + cls._def_creator_name_opts = (cls.default_creator.__name__,) + else: + # classmethod + cls._def_creator_name_opts = (".".join(( + # etiher class.method_name + cls.default_creator.__self__.__name__, + cls.default_creator.__name__)), + # or just method_name + cls.default_creator.__name__ + ) def test_has_default(self): avail_components = types.get_registered_components(self.comp_type) self.assertEqual(len(avail_components), self.expected_def_components) name, cls_name = avail_components[0] - self.assertEqual(name, self.default) - self.assertIs(cls_name, self.default_creator.__name__) + # 1 name / cls name + eq_name = [name == self.default for name, _ in avail_components] + eq_cls = [cls_name in self._def_creator_name_opts + for _, cls_name in avail_components] + self.assertEqual(sum(eq_name), 1) + # NOTE: for NER both the default as well as the Dict based NER + # have the came class name, so may be more than 1 + self.assertGreaterEqual(sum(eq_cls), 1) + # needs to have the same class where name is equal + self.assertTrue(eq_cls[eq_name.index(True)]) def test_can_create_def_component(self): component = types.create_core_component( diff --git a/medcat-v2/tests/components/ner/trf/test_transformers_ner.py b/medcat-v2/tests/components/ner/trf/test_transformers_ner.py index 5fc11906e..399ec50d2 100644 --- a/medcat-v2/tests/components/ner/trf/test_transformers_ner.py +++ b/medcat-v2/tests/components/ner/trf/test_transformers_ner.py @@ -14,12 +14,13 @@ from medcat.model_creation.cdb_maker import CDBMaker from transformers import TrainerCallback -from unittest import TestCase +from unittest import TestCase, skipIf import unittest.mock from ...addons.meta_cat.test_meta_cat import FakeTokenizer from ....pipeline.test_pipeline import FakeCDB, Config from .... import RESOURCES_PATH +from ....utils.ner.test_deid import is_macos_on_ci class TransformersNERTests(TestCase): @@ -280,6 +281,8 @@ def test_ignore_extra_labels(self): ) +@skipIf(not is_macos_on_ci(), + "MacOS on workflow doesn't have enough memory") class AdditionalTransfromersNERTests(TestCase): TOKENIZER = FakeTokenizer() CNF = ConfigTransformersNER() diff --git a/medcat-v2/tests/utils/ner/test_deid.py b/medcat-v2/tests/utils/ner/test_deid.py index f11cc2dec..df684d9de 100644 --- a/medcat-v2/tests/utils/ner/test_deid.py +++ b/medcat-v2/tests/utils/ner/test_deid.py @@ -36,6 +36,10 @@ cnf.general.nlp.provider = 'spacy' +def is_macos_on_ci() -> bool: + return os.getenv("RUNNER_OS", "None").lower() != "macos" + + def _get_def_cdb(): return CDB(config=cnf) @@ -112,13 +116,16 @@ def _train_model_once() -> tuple[tuple[Any, Any, Any], deid.DeIdModel]: return retval, model -_TRAINED_MODEL_AND_INFO = _train_model_once() +if is_macos_on_ci(): + _TRAINED_MODEL_AND_INFO = _train_model_once() def train_model_once() -> tuple[tuple[Any, Any, Any], deid.DeIdModel]: return _TRAINED_MODEL_AND_INFO +@unittest.skipIf(not is_macos_on_ci(), + "MacOS on workflow doesn't have enough memory") class DeIDModelTests(unittest.TestCase): save_folder = os.path.join("results", "final_model") @@ -171,6 +178,8 @@ def test_add_new_concepts(self): ''' # noqa +@unittest.skipIf(not is_macos_on_ci(), + "MacOS on workflow doesn't have enough memory") class DeIDModelWorks(unittest.TestCase): save_folder = os.path.join("results", "final_model") diff --git a/medcat-v2/tests/utils/test_cdb_state.py b/medcat-v2/tests/utils/test_cdb_state.py index 4883c1ce5..a1013aa22 100644 --- a/medcat-v2/tests/utils/test_cdb_state.py +++ b/medcat-v2/tests/utils/test_cdb_state.py @@ -113,18 +113,19 @@ def test_state_restored(self): class StateSavedOnDiskTests(StateSavedTests): on_disk = True - _named_tempory_file = tempfile.NamedTemporaryFile + _named_tempory_directory = tempfile.TemporaryDirectory @classmethod def saved_name_temp_file(cls): - tf = cls._named_tempory_file() - cls.temp_file_name = tf.name + tf = cls._named_tempory_directory() + cls.temp_file_name = os.path.join(tf.name, "cdb_state.dat") return tf @classmethod def setUpClass(cls) -> None: - with mock.patch("builtins.open", side_effect=open) as cls.popen: - with mock.patch("tempfile.NamedTemporaryFile", + with mock.patch("medcat.utils.cdb_state.open", side_effect=open + ) as cls.popen: + with mock.patch("tempfile.TemporaryDirectory", side_effect=cls.saved_name_temp_file) as cls.pntf: return super().setUpClass()