Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions medcat-v2/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,24 @@ pip install "medcat[deid]~=2.0.0" # for DeID models
pip install "medcat[spacy,meta-cat,deid,rel-cat,dict-ner]~=2.0.0" # for all of the above
```

### Version / update checking

MedCAT now has the ability to check for newer versions of itself on PyPI (or a local mirror of it).
This is so users don't get left behind too far with older versions of our software.
This is configurable by evnironmental variables so that sys admins (e.g for JupyterHub) can specify the settings they wish.
Version checks are done once a week and the results are cached.

Below is a table of the environmental variables that govern the version checking and their defaults.

| Variable | Default | Description |
|-----------|----------|-------------|
| **`MEDCAT_DISABLE_VERSION_CHECK`** | *(unset)* | When set to `true`, `yes` or `disable`, disables the version update check entirely. Useful for CI environments, offline setups, or deployments where external network access is restricted. |
| **`MEDCAT_PYPI_URL`** | `https://pypi.org/pypi` | Base URL used to query package metadata. Can be changed to a PyPI mirror or internal repository that exposes the `/pypi/{pkg}/json` API. |
| **`MEDCAT_MINOR_UPDATE_THRESHOLD`** | `3` | Number of newer **minor** versions (e.g. `1.4.x`, `1.5.x`) that must exist before MedCAT emits a “newer version available” log message. |
| **`MEDCAT_PATCH_UPDATE_THRESHOLD`** | `3` | Number of newer **patch** versions (e.g. `1.3.1`, `1.3.2`, `1.3.3`) on the same minor line required before emitting an informational update message. |
| **`MEDCAT_VERSION_UPDATE_LOG_LEVEL`** | `INFO` | Logging level used when reporting available newer versions (minor/patch thresholds). Accepts any valid `logging` level string (`DEBUG`, `INFO`, `WARNING`, `ERROR`, `CRITICAL`). |
| **`MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL`** | `WARNING` | Logging level used when reporting that the current version has been **yanked** on PyPI. Accepts the same values as above. |

## Demo

The MedCAT v2 demo web app is available [here](https://medcat.sites.er.kcl.ac.uk/).
Expand Down
8 changes: 8 additions & 0 deletions medcat-v2/medcat/__init__.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,15 @@
from importlib.metadata import version as __version_method
from importlib.metadata import PackageNotFoundError as __PackageNotFoundError

from medcat.utils.check_for_updates import (
check_for_updates as __check_for_updates)

try:
__version__ = __version_method("medcat")
except __PackageNotFoundError:
__version__ = "0.0.0-dev"


# NOTE: this will not always actually do the check
# it will only (by default) check once a week
__check_for_updates("medcat", __version__)
192 changes: 192 additions & 0 deletions medcat-v2/medcat/utils/check_for_updates.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,192 @@
from typing import TypedDict
import json
import os
import time
import urllib.request
from pathlib import Path
from packaging.version import Version, InvalidVersion
import logging

from medcat.utils.defaults import (
MEDCAT_DISABLE_VERSION_CHECK_ENVIRON, MEDCAT_PYPI_URL_ENVIRON,
MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON,
MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON,
MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON,
MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON,
)
from medcat.utils.defaults import (
DEFAULT_PYPI_URL, DEFAULT_MINOR_FOR_INFO, DEFAULT_PATCH_FOR_INFO,
DEFAULT_VERSION_INFO_LEVEL, DEFAULT_VERSION_INFO_YANKED_LEVEL)


DEFAULT_CACHE_PATH = (
Path.home() / ".cache" / "cogstack" / "medcat_version.json")
# 1 week
DEFAULT_CHECK_INTERVAL = 7 * 24 * 3600


logger = logging.getLogger(__name__)


def log_info(msg: str, *args, yanked: bool = False, **kwargs):
if yanked:
lvl = os.environ.get(MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON,
DEFAULT_VERSION_INFO_YANKED_LEVEL).upper()
else:
lvl = os.environ.get(MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON,
DEFAULT_VERSION_INFO_LEVEL).upper()
_level_map = {
"NOTSET": logging.NOTSET,
"DEBUG": logging.DEBUG,
"INFO": logging.INFO,
"WARN": logging.WARNING,
"WARNING": logging.WARNING,
"ERROR": logging.ERROR,
"CRITICAL": logging.CRITICAL,
"FATAL": logging.FATAL,
}
level = _level_map.get(lvl, logging.INFO)
logger.log(level, msg, *args, **kwargs)


def _get_env_int(name: str, default: int) -> int:
try:
return int(os.getenv(name, default))
except ValueError:
return default


def _should_check(cache_path: Path, check_interval: int) -> bool:
if not cache_path.exists():
return True
try:
with open(cache_path) as f:
last_check = json.load(f)["last_check"]
return time.time() - last_check > check_interval
except Exception:
return True


class UpdateCheckConfig(TypedDict):
pkg_name: str
cache_path: Path
url: str
enabled: bool
minor_threshold: int
patch_threshold: int
timeout: float
check_interval: int


def _get_config(pkg_name: str) -> UpdateCheckConfig:
if os.getenv(MEDCAT_DISABLE_VERSION_CHECK_ENVIRON,
"False").lower() in ("true", "yes", "disable"):
return {
"pkg_name": pkg_name,
"enabled": False,
"cache_path": Path("."),
"url": "-1",
"minor_threshold": -1,
"patch_threshold": -1,
"timeout": -1.0,
"check_interval": -1,
}
base_url = os.getenv(MEDCAT_PYPI_URL_ENVIRON, DEFAULT_PYPI_URL).rstrip("/")
url = f"{base_url}/{pkg_name}/json"
minor_thresh = _get_env_int(MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON,
DEFAULT_MINOR_FOR_INFO)
patch_thresh = _get_env_int(MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON,
DEFAULT_PATCH_FOR_INFO)
# TODO: add env variables for timeout and default cache?
return {
"pkg_name": pkg_name,
"enabled": True,
"cache_path": DEFAULT_CACHE_PATH,
"url": url,
"minor_threshold": minor_thresh,
"patch_threshold": patch_thresh,
"timeout": 3.0,
"check_interval": DEFAULT_CHECK_INTERVAL,
}


def check_for_updates(pkg_name: str, current_version: str):
cnf = _get_config(pkg_name)
if not cnf["enabled"]:
return

if not _should_check(cnf["cache_path"], cnf["check_interval"]):
return

try:
with urllib.request.urlopen(cnf["url"],
timeout=cnf["timeout"]) as r:
data = json.load(r)
releases = {
v: files for v, files in data.get("releases", {}).items()
if files # skip empty entries
}
except Exception as e:
log_info("Unable to check for update", exc_info=e)
return

# cache update time
cnf["cache_path"].parent.mkdir(parents=True, exist_ok=True)
with open(cnf["cache_path"], "w") as f:
json.dump({"last_check": time.time()}, f)

_do_check(cnf, releases, current_version)


def _do_check(cnf: UpdateCheckConfig, releases: dict,
current_version: str):
try:
current = Version(current_version)
except InvalidVersion:
return
pkg_name = cnf["pkg_name"]
patch_thresh = cnf["patch_threshold"]
minor_thresh = cnf["minor_threshold"]

newer_minors, newer_patches = [], []
yanked = False
for v_str, files in releases.items():
try:
v = Version(v_str)
except InvalidVersion:
continue
if v <= current:
continue
if any(f.get("yanked") for f in files):
continue # don’t count yanked releases in comparisons
if v.major == current.major and v.minor == current.minor:
newer_patches.append(v)
elif v.major == current.major and v.minor > current.minor:
newer_minors.append(v)

# detect if current version is yanked
for f in releases.get(current_version, []):
if f.get("yanked"):
reason = f.get("yanked_reason", "")
msg = (f"⚠️ You are using a yanked version ({pkg_name} "
f"{current_version}). {reason}")
log_info(msg, yanked=True)
yanked = True
break

# report newer versions
if len(newer_patches) >= patch_thresh:
latest_patch = max(newer_patches)
msg = (f"ℹ️ {pkg_name} {current_version} → {latest_patch} "
f"({len(newer_patches)} newer patch releases available)")
log_info(msg)
elif len(newer_minors) >= minor_thresh:
latest_minor = max(newer_minors)
msg = (f"⚠️ {pkg_name} {current_version} → {latest_minor} "
f"({len(newer_minors)} newer minor releases available)")
log_info(msg)

if yanked and not (newer_minors or newer_patches):
msg = (f"⚠️ Your installed version {current_version} was yanked and "
"has no newer stable releases yet.")
log_info(msg, yanked=True)
14 changes: 14 additions & 0 deletions medcat-v2/medcat/utils/defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,20 @@
COMPONENTS_FOLDER = "saved_components"
AVOID_LEGACY_CONVERSION_ENVIRON = "MEDCAT_AVOID_LECACY_CONVERSION"

# version check
MEDCAT_DISABLE_VERSION_CHECK_ENVIRON = "MEDCAT_DISABLE_VERSION_CHECK"
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you add this to the docs as well?

The whole .md table in this PR would be great to add there.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I've added it to the README.

However, there's a separate docs/main.md that seems to be (again) limping behind the README, but (mostly) mirrors it.
I think we should find a way to have it just automatically mirror the README.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is pretty pedantic - but can you enforce true/false here instead? Just feels like it will save the question "I set it to False but it is somehow disabled"

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yeah I'd make it the same as AVOID_LEGACY_CONVERSION_ENVIRON just above

MEDCAT_PYPI_URL_ENVIRON = "MEDCAT_PYPI_URL"
DEFAULT_PYPI_URL = "https://pypi.org/pypi"
MEDCAT_MINOR_UPDATE_THRESHOLD_ENVIRON = "MEDCAT_MINOR_UPDATE_THRESHOLD"
DEFAULT_MINOR_FOR_INFO = 3
MEDCAT_PATCH_UPDATE_THRESHOLD_ENVIRON = "MEDCAT_PATCH_UPDATE_THRESHOLD"
DEFAULT_PATCH_FOR_INFO = 3
MEDCAT_VERSION_UPDATE_LOG_LEVEL_ENVIRON = "MEDCAT_VERSION_UPDATE_LOG_LEVEL"
DEFAULT_VERSION_INFO_LEVEL = "INFO"
MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL_ENVIRON = (
"MEDCAT_VERSION_UPDATE_YANKED_LOG_LEVEL")
DEFAULT_VERSION_INFO_YANKED_LEVEL = "WARNING"


def avoid_legacy_conversion() -> bool:
return os.environ.get(
Expand Down
Loading
Loading