diff --git a/medcat-v2/README.md b/medcat-v2/README.md index d791212d4..8aa596c31 100644 --- a/medcat-v2/README.md +++ b/medcat-v2/README.md @@ -86,6 +86,24 @@ pip install "medcat[deid]~=2.0.0" # for DeID models pip install "medcat[spacy,meta-cat,deid,rel-cat,dict-ner]~=2.0.0" # for all of the above ``` +### Version / update checking + +MedCAT now has the ability to check for newer versions of itself on PyPI (or a local mirror of it). +This is so users don't get left behind too far with older versions of our software. +This is configurable by evnironmental variables so that sys admins (e.g for JupyterHub) can specify the settings they wish. +Version checks are done once a week and the results are cached. + +Below is a table of the environmental variables that govern the version checking and their defaults. + +| Variable | Default | Description | +|-----------|----------|-------------| +| **`MEDCAT_DISABLE_VERSION_CHECK`** | *(unset)* | When set to `true`, `yes` or `disable`, disables the version update check entirely. Useful for CI environments, offline setups, or deployments where external network access is restricted. | +| **`MEDCAT_PYPI_URL`** | `https://pypi.org/pypi` | Base URL used to query package metadata. Can be changed to a PyPI mirror or internal repository that exposes the `/pypi/{pkg}/json` API. | +| **`MEDCAT_MINOR_UPDATE_THRESHOLD`** | `3` | Number of newer **minor** versions (e.g. `1.4.x`, `1.5.x`) that must exist before MedCAT emits a “newer version available” log message. | +| **`MEDCAT_PATCH_UPDATE_THRESHOLD`** | `3` | Number of newer **patch** versions (e.g. `1.3.1`, `1.3.2`, `1.3.3`) on the same minor line required before emitting an informational update message. | +| **`MEDCAT_VERSION_UPDATE_LOG_LEVEL`** | `INFO` | Logging level used when reporting available newer versions (minor/patch thresholds). Accepts any valid `logging` level string (`DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`). | +| **`MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL`** | `WARNING` | Logging level used when reporting that the current version has been **yanked** on PyPI. Accepts the same values as above. | + ## Demo The MedCAT v2 demo web app is available [here](https://medcat.sites.er.kcl.ac.uk/). diff --git a/medcat-v2/medcat/__init__.py b/medcat-v2/medcat/__init__.py index 10b1a22b9..9e44fd23b 100644 --- a/medcat-v2/medcat/__init__.py +++ b/medcat-v2/medcat/__init__.py @@ -1,7 +1,15 @@ from importlib.metadata import version as __version_method from importlib.metadata import PackageNotFoundError as __PackageNotFoundError +from medcat.utils.check_for_updates import ( + check_for_updates as __check_for_updates) + try: __version__ = __version_method("medcat") except __PackageNotFoundError: __version__ = "0.0.0-dev" + + +# NOTE: this will not always actually do the check +# it will only (by default) check once a week +__check_for_updates("medcat", __version__) diff --git a/medcat-v2/medcat/utils/check_for_updates.py b/medcat-v2/medcat/utils/check_for_updates.py new file mode 100644 index 000000000..4f3a14bd1 --- /dev/null +++ b/medcat-v2/medcat/utils/check_for_updates.py @@ -0,0 +1,192 @@ +from typing import TypedDict +import json +import os +import time +import urllib.request +from pathlib import Path +from packaging.version import Version, InvalidVersion +import logging + +from medcat.utils.defaults import ( + MEDCAT_DISABLE_VERSION_CHECK_ENVIRON, MEDCAT_PYPI_URL_ENVIRON, + MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON, + MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON, + MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON, + MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON, +) +from medcat.utils.defaults import ( + DEFAULT_PYPI_URL, DEFAULT_MINOR_FOR_INFO, DEFAULT_PATCH_FOR_INFO, + DEFAULT_VERSION_INFO_LEVEL, DEFAULT_VERSION_INFO_YANKED_LEVEL) + + +DEFAULT_CACHE_PATH = ( + Path.home() / ".cache" / "cogstack" / "medcat_version.json") +# 1 week +DEFAULT_CHECK_INTERVAL = 7 * 24 * 3600 + + +logger = logging.getLogger(__name__) + + +def log_info(msg: str, *args, yanked: bool = False, **kwargs): + if yanked: + lvl = os.environ.get(MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON, + DEFAULT_VERSION_INFO_YANKED_LEVEL).upper() + else: + lvl = os.environ.get(MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON, + DEFAULT_VERSION_INFO_LEVEL).upper() + _level_map = { + "NOTSET": logging.NOTSET, + "DEBUG": logging.DEBUG, + "INFO": logging.INFO, + "WARN": logging.WARNING, + "WARNING": logging.WARNING, + "ERROR": logging.ERROR, + "CRITICAL": logging.CRITICAL, + "FATAL": logging.FATAL, + } + level = _level_map.get(lvl, logging.INFO) + logger.log(level, msg, *args, **kwargs) + + +def _get_env_int(name: str, default: int) -> int: + try: + return int(os.getenv(name, default)) + except ValueError: + return default + + +def _should_check(cache_path: Path, check_interval: int) -> bool: + if not cache_path.exists(): + return True + try: + with open(cache_path) as f: + last_check = json.load(f)["last_check"] + return time.time() - last_check > check_interval + except Exception: + return True + + +class UpdateCheckConfig(TypedDict): + pkg_name: str + cache_path: Path + url: str + enabled: bool + minor_threshold: int + patch_threshold: int + timeout: float + check_interval: int + + +def _get_config(pkg_name: str) -> UpdateCheckConfig: + if os.getenv(MEDCAT_DISABLE_VERSION_CHECK_ENVIRON, + "False").lower() in ("true", "yes", "disable"): + return { + "pkg_name": pkg_name, + "enabled": False, + "cache_path": Path("."), + "url": "-1", + "minor_threshold": -1, + "patch_threshold": -1, + "timeout": -1.0, + "check_interval": -1, + } + base_url = os.getenv(MEDCAT_PYPI_URL_ENVIRON, DEFAULT_PYPI_URL).rstrip("/") + url = f"{base_url}/{pkg_name}/json" + minor_thresh = _get_env_int(MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON, + DEFAULT_MINOR_FOR_INFO) + patch_thresh = _get_env_int(MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON, + DEFAULT_PATCH_FOR_INFO) + # TODO: add env variables for timeout and default cache? + return { + "pkg_name": pkg_name, + "enabled": True, + "cache_path": DEFAULT_CACHE_PATH, + "url": url, + "minor_threshold": minor_thresh, + "patch_threshold": patch_thresh, + "timeout": 3.0, + "check_interval": DEFAULT_CHECK_INTERVAL, + } + + +def check_for_updates(pkg_name: str, current_version: str): + cnf = _get_config(pkg_name) + if not cnf["enabled"]: + return + + if not _should_check(cnf["cache_path"], cnf["check_interval"]): + return + + try: + with urllib.request.urlopen(cnf["url"], + timeout=cnf["timeout"]) as r: + data = json.load(r) + releases = { + v: files for v, files in data.get("releases", {}).items() + if files # skip empty entries + } + except Exception as e: + log_info("Unable to check for update", exc_info=e) + return + + # cache update time + cnf["cache_path"].parent.mkdir(parents=True, exist_ok=True) + with open(cnf["cache_path"], "w") as f: + json.dump({"last_check": time.time()}, f) + + _do_check(cnf, releases, current_version) + + +def _do_check(cnf: UpdateCheckConfig, releases: dict, + current_version: str): + try: + current = Version(current_version) + except InvalidVersion: + return + pkg_name = cnf["pkg_name"] + patch_thresh = cnf["patch_threshold"] + minor_thresh = cnf["minor_threshold"] + + newer_minors, newer_patches = [], [] + yanked = False + for v_str, files in releases.items(): + try: + v = Version(v_str) + except InvalidVersion: + continue + if v <= current: + continue + if any(f.get("yanked") for f in files): + continue # don’t count yanked releases in comparisons + if v.major == current.major and v.minor == current.minor: + newer_patches.append(v) + elif v.major == current.major and v.minor > current.minor: + newer_minors.append(v) + + # detect if current version is yanked + for f in releases.get(current_version, []): + if f.get("yanked"): + reason = f.get("yanked_reason", "") + msg = (f"⚠️ You are using a yanked version ({pkg_name} " + f"{current_version}). {reason}") + log_info(msg, yanked=True) + yanked = True + break + + # report newer versions + if len(newer_patches) >= patch_thresh: + latest_patch = max(newer_patches) + msg = (f"ℹ️ {pkg_name} {current_version} → {latest_patch} " + f"({len(newer_patches)} newer patch releases available)") + log_info(msg) + elif len(newer_minors) >= minor_thresh: + latest_minor = max(newer_minors) + msg = (f"⚠️ {pkg_name} {current_version} → {latest_minor} " + f"({len(newer_minors)} newer minor releases available)") + log_info(msg) + + if yanked and not (newer_minors or newer_patches): + msg = (f"⚠️ Your installed version {current_version} was yanked and " + "has no newer stable releases yet.") + log_info(msg, yanked=True) diff --git a/medcat-v2/medcat/utils/defaults.py b/medcat-v2/medcat/utils/defaults.py index c1ce9002e..ba4171f63 100644 --- a/medcat-v2/medcat/utils/defaults.py +++ b/medcat-v2/medcat/utils/defaults.py @@ -10,6 +10,20 @@ COMPONENTS_FOLDER = "saved_components" AVOID_LEGACY_CONVERSION_ENVIRON = "MEDCAT_AVOID_LECACY_CONVERSION" +# version check +MEDCAT_DISABLE_VERSION_CHECK_ENVIRON = "MEDCAT_DISABLE_VERSION_CHECK" +MEDCAT_PYPI_URL_ENVIRON = "MEDCAT_PYPI_URL" +DEFAULT_PYPI_URL = "https://pypi.org/pypi" +MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON = "MEDCAT_MINOR_UPDATE_THRESHOLD" +DEFAULT_MINOR_FOR_INFO = 3 +MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON = "MEDCAT_PATCH_UPDATE_THRESHOLD" +DEFAULT_PATCH_FOR_INFO = 3 +MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON = "MEDCAT_VERSION_UPDATE_LOG_LEVEL" +DEFAULT_VERSION_INFO_LEVEL = "INFO" +MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON = ( + "MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL") +DEFAULT_VERSION_INFO_YANKED_LEVEL = "WARNING" + def avoid_legacy_conversion() -> bool: return os.environ.get( diff --git a/medcat-v2/tests/utils/test_check_for_updates.py b/medcat-v2/tests/utils/test_check_for_updates.py new file mode 100644 index 000000000..a5977861a --- /dev/null +++ b/medcat-v2/tests/utils/test_check_for_updates.py @@ -0,0 +1,148 @@ +import io +import json +import logging +import time +import unittest +from unittest.mock import patch +from pathlib import Path +from medcat.utils import check_for_updates + + +class TestVersionCheck(unittest.TestCase): + + def setUp(self): + self.pkg = "medcat" + self.current_version = "1.3.0" + self.cache_path = Path("/tmp/fake_cache.json") + + def tearDown(self): + if self.cache_path.exists(): + self.cache_path.unlink() + + # --- helpers --- + def _make_releases(self, versions, yanked=None): + """Return a fake releases dict.""" + yanked = yanked or {} + return { + v: [{"yanked": yanked.get(v, False)}] + for v in versions + } + + # 1. runs if cache missing + @patch("medcat.utils.check_for_updates._do_check") + @patch("medcat.utils.check_for_updates.urllib.request.urlopen") + def test_runs_without_cache(self, mock_urlopen, mock_do_check): + data = {"releases": self._make_releases(["1.3.1", "1.3.2", "1.4.0"])} + mock_urlopen.return_value.__enter__.return_value = io.StringIO( + json.dumps(data)) + with patch("medcat.utils.check_for_updates.DEFAULT_CACHE_PATH", + self.cache_path): + check_for_updates.check_for_updates(self.pkg, self.current_version) + mock_do_check.assert_called_once() + + # 2. runs if cache interval expired + @patch("medcat.utils.check_for_updates._do_check") + @patch("medcat.utils.check_for_updates.urllib.request.urlopen") + def test_runs_if_interval_expired(self, mock_urlopen, mock_do_check): + data = {"releases": self._make_releases(["1.3.1"])} + mock_urlopen.return_value.__enter__.return_value = io.StringIO( + json.dumps(data)) + # create old cache + self.cache_path.parent.mkdir(parents=True, exist_ok=True) + with open(self.cache_path, "w") as f: + json.dump({"last_check": time.time() - ( + check_for_updates.DEFAULT_CHECK_INTERVAL + 1)}, f) + with patch("medcat.utils.check_for_updates.DEFAULT_CACHE_PATH", + self.cache_path): + check_for_updates.check_for_updates(self.pkg, self.current_version) + mock_do_check.assert_called_once() + + # 3. doesn't run if cache still valid + @patch("medcat.utils.check_for_updates._do_check") + def test_does_not_run_if_interval_not_expired(self, mock_do_check): + # recent cache + self.cache_path.parent.mkdir(parents=True, exist_ok=True) + with open(self.cache_path, "w") as f: + json.dump({"last_check": time.time()}, f) + with patch("medcat.utils.check_for_updates.DEFAULT_CACHE_PATH", + self.cache_path): + check_for_updates.check_for_updates(self.pkg, self.current_version) + mock_do_check.assert_not_called() + + # 4. info for 3+ patch versions + @patch("medcat.utils.check_for_updates.log_info") + def test_patch_threshold_triggered(self, mock_log): + releases = self._make_releases(["1.3.1", "1.3.2", "1.3.3", "1.3.4"]) + cnf = { + "pkg_name": self.pkg, + "minor_threshold": 99, + "patch_threshold": 3, + } + cnf.update(enabled=True, cache_path=self.cache_path, url="", + timeout=0, check_interval=0) + check_for_updates._do_check(cnf, releases, self.current_version) + self.assertTrue(any("patch releases available" in c[0][0] + for c in mock_log.call_args_list)) + + # 5. info for 3+ minor versions + @patch("medcat.utils.check_for_updates.log_info") + def test_minor_threshold_triggered(self, mock_log): + releases = self._make_releases(["1.4.0", "1.5.0", "1.6.0", "1.7.0"]) + cnf = { + "pkg_name": self.pkg, + "minor_threshold": 3, + "patch_threshold": 99, + } + cnf.update(enabled=True, cache_path=self.cache_path, url="", + timeout=0, check_interval=0) + check_for_updates._do_check(cnf, releases, self.current_version) + self.assertTrue(any("minor releases available" in c[0][0] + for c in mock_log.call_args_list)) + + # 6. env variable changes log level (regular) + @patch.dict("os.environ", { + "MEDCAT_VERSION_UPDATE_LOG_LEVEL": "ERROR"}) + def test_env_log_level_regular(self): + msg = "Test" + with patch.object(check_for_updates.logger, "log") as mock_log: + check_for_updates.log_info(msg) + self.assertEqual(mock_log.call_args[0][0], logging.ERROR) + + # 7. env variable changes log level (yanked) + @patch.dict("os.environ", { + "MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL": "CRITICAL"}) + def test_env_log_level_yanked(self): + msg = "Yanked" + with patch.object(check_for_updates.logger, "log") as mock_log: + check_for_updates.log_info(msg, yanked=True) + self.assertEqual(mock_log.call_args[0][0], logging.CRITICAL) + + # 8. yanked version triggers warning + @patch("medcat.utils.check_for_updates.log_info") + def test_yanked_version_logs(self, mock_log): + releases = self._make_releases(["1.3.0"], yanked={"1.3.0": True}) + cnf = { + "pkg_name": self.pkg, + "minor_threshold": 99, + "patch_threshold": 99, + } + cnf.update(enabled=True, cache_path=self.cache_path, url="", + timeout=0, check_interval=0) + check_for_updates._do_check(cnf, releases, self.current_version) + self.assertTrue(any("yanked version" in c[0][0] + for c in mock_log.call_args_list)) + + # 9. invalid current version handled gracefully + def test_invalid_current_version_does_not_raise(self): + releases = self._make_releases(["1.2.0"]) + cnf = { + "pkg_name": self.pkg, + "minor_threshold": 99, + "patch_threshold": 99, + } + cnf.update(enabled=True, cache_path=self.cache_path, url="", + timeout=0, check_interval=0) + try: + check_for_updates._do_check(cnf, releases, "not_a_version") + except Exception as e: + self.fail(f"Should not raise, but got {e!r}")