diff --git a/.github/workflows/medcat-v2_release.yml b/.github/workflows/medcat-v2_release.yml index b50c1286d..1e333b0ea 100644 --- a/.github/workflows/medcat-v2_release.yml +++ b/.github/workflows/medcat-v2_release.yml @@ -14,14 +14,17 @@ defaults: jobs: build: - name: Build and release + name: Build medcat-v2 wheel runs-on: ubuntu-latest - + outputs: + version_tag: ${{ steps.extract.outputs.version_tag }} + version_only: ${{ steps.extract.outputs.version_only }} steps: - name: Checkout repository uses: actions/checkout@v4 - - name: Checkout release branch + - name: Extract version tag and checkout release branch + id: extract run: | # Fetch all branches to ensure we can access the one we need git fetch --all @@ -31,16 +34,24 @@ jobs: # NOTE: branch name is in line with version tag, except for the patch version BRANCH_NAME="${VERSION_TAG%.*}" # This removes the patch version (everything after the second dot) + # set version tag as output for later use + echo "version_tag=$VERSION_TAG" >> $GITHUB_OUTPUT + + # only the version (no medcat/v prefix) + VERSION_ONLY="${VERSION_TAG#medcat/v}" + echo "version_only=$VERSION_ONLY" >> $GITHUB_OUTPUT + # Check out the corresponding release branch (e.g., medcat/v0.1) git checkout $BRANCH_NAME # Ensure the branch is up-to-date with the remote git pull origin $BRANCH_NAME - - name: Set up Python + # NOTE: building with the lowest python version supported by the package + - name: Set up Python 3.9 uses: actions/setup-python@v5 with: - python-version: "3.11" + python-version: "3.9" - name: Install build dependencies run: pip install --upgrade build @@ -48,11 +59,144 @@ jobs: - name: Build package run: python -m build + - name: Upload wheel artifact + uses: actions/upload-artifact@v4 + with: + name: medcat-v2-wheel + path: | + medcat-v2/dist/*.whl + medcat-v2/dist/*.tar.gz + + bundle: + name: Build install bundles + needs: build + runs-on: ubuntu-latest + strategy: + matrix: + python-version: ["3.9", "3.10", "3.11", "3.12"] + steps: + - name: Checkout repository + uses: actions/checkout@v4 + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + + - name: Install uv + run: pip install uv + + - name: Generate requirements and download (CPU) + run: | + if [[ "${{ matrix.python-version }}" == "3.9" ]]; then + echo "Runnoing unsafe index strategy for Python 3.9 to avoid issues with torch / numpy compatibility" + # NOTE: for python 3.9 it will otherwise look for `numpy>2` in torch's index + # but there's (as of writing on 2025-07-02) none there that support 3.9 + # (though there are ones that support 3.10+) and because of that this + # step would fail wihtout the unsafe index match + # for some documentation on dependency confusion attacks, can reference: + # https://docs.astral.sh/uv/reference/settings/#pip_index-strategy + uv pip compile pyproject.toml --only-binary=:all: \ + --extra spacy --extra deid --extra meta-cat --extra rel-cat \ + --extra-index-url https://download.pytorch.org/whl/cpu \ + --index-strategy unsafe-best-match \ + > req-cpu.txt + else + uv pip compile pyproject.toml --only-binary=:all: \ + --extra spacy --extra deid --extra meta-cat --extra rel-cat \ + --extra-index-url https://download.pytorch.org/whl/cpu \ + > req-cpu.txt + fi + uv venv .venv + .venv/bin/python -m ensurepip + .venv/bin/python -m pip download --only-binary=:all: --dest bundle-cpu -r req-cpu.txt --extra-index-url https://download.pytorch.org/whl/cpu + +# - name: Generate requirements and download (GPU) +# run: | +# uv pip compile pyproject.toml --only-binary=:all: \ +# --extra spacy --extra deid --extra meta-cat --extra rel-cat \ +# > req-gpu.txt +# .venv/bin/python -m pip download --only-binary=:all: --dest bundle-gpu -r req-gpu.txt + + - name: Run sanity check / integration tests on cpu-only bundle + run: | + .venv/bin/python -m pip install --no-index --find-links=bundle-cpu -r req-cpu.txt + uv run bash tests/backwards_compatibility/run_current.sh + + - name: Clear virtual environment + run: | + rm -rf .venv + + - name: Add README to bundles + run: | + cp .release/install_bundle_readme.md bundle-cpu/README.md + cp req-cpu.txt bundle-cpu/requirements.txt +# cp .release/install_bundle_readme.md bundle-gpu/README.md +# cp req-gpu.txt bundle-gpu/requirements.txt + + - name: Download built medcat wheel for inclusion in bundles + uses: actions/download-artifact@v4 + with: + name: medcat-v2-wheel + path: medcat-v2/built-wheel + + - name: List downloaded artifacts + run: ls -lh built-wheel + + - name: Copy built wheel to CPU bundle + run: | + cp built-wheel/medcat*.whl bundle-cpu/. +# cp built-wheel/medcat*.whl bundle-gpu/. + + - name: Archive CPU and GPU bundles + run: | + tar -czf medcat-v${{ needs.build.outputs.version_only }}-${{ matrix.python-version }}-cpu.tar.gz -C bundle-cpu . +# tar -czf medcat-v${{ needs.build.outputs.version_only }}-${{ matrix.python-version }}-gpu.tar.gz -C bundle-gpu . + + - name: Upload bundles as artifacts + uses: actions/upload-artifact@v4 + with: + name: bundles-${{ matrix.python-version }} + path: | + medcat-v2/medcat-v${{ needs.build.outputs.version_only }}-${{ matrix.python-version }}-cpu.tar.gz +# medcat-v2/medcat-v${{ needs.build.outputs.version_only }}-${{ matrix.python-version }}-gpu.tar.gz + + release: + name: Create GitHub Release + needs: [build, bundle] + runs-on: ubuntu-latest + steps: + - name: Download all artifacts + uses: actions/download-artifact@v4 + with: + path: medcat-v2/artifacts + + - name: Move all bundles to dist/ + run: | + ls -l artifacts + mkdir -p dist + find artifacts -name '*.tar.gz' -exec mv {} dist/ \; + ls -l dist/ + + - name: Download built wheel + uses: actions/download-artifact@v4 + with: + name: medcat-v2-wheel + path: medcat-v2/dist-wheel + + - name: Move wheels to dist/ + run: | + mv dist-wheel/*.whl dist/. + mv dist-wheel/*.tar.gz dist/. + + - name: Show files in dist/ for sanity check + run: ls -l dist/ + - name: Create GitHub Release id: create_release uses: softprops/action-gh-release@v2 with: - tag_name: ${{ github.ref_name }} + tag_name: ${{ needs.build.outputs.version_tag }} draft: true # softprops/action-gh-release v2 doesnt support the working-directory field, so put the path in files files: | diff --git a/medcat-v2/.release/install_bundle_readme.md b/medcat-v2/.release/install_bundle_readme.md new file mode 100644 index 000000000..e9f69a575 --- /dev/null +++ b/medcat-v2/.release/install_bundle_readme.md @@ -0,0 +1,79 @@ +# What are install bundles? + +An install bundle (at least in this context) is collection of dependencies needed to use `medcat`. +This includes all direct and transitive dependencies as well as the `medcat` package itself. + +The install bundle: +- Is a `.tar.gz` +- Has the naming scheme `medcat-v..-3.-cpu.tar.gz` + - The ``, ``, and `` placeholder indicate the major, minor and patch release numbers for `medcat` + - The `` placeholder indicates the Python verison it was built for +- It contains + - A collection of `.whl` files + - These are installation files for packages + - There's one for `medcat` itself + - And there's one for each direct and transitive dependency + - A `requirements.txt` file specifying the requirements installed + - This README file + +# Who are install bundles for? + +Most of the time, when installing python packages, `pip` (or another similar tool) is used to install them. +It (generally) uses the Python Package Index ([PyPI](pypi.org)) to do those installs. +However, sometimes another index / mirror can be set up internally within an organisation instead. + +An install bundle is designed to simplify the installation in air-gapped or semi air-gapped environments where: +- The installation environment does not have access to PyPI +- If there is a organisation-specific index / mirror it does not include all the dependencies + +# What are some other benefits of install bundles + +The main purpose is to help the people described in the section above. +However, there's a few other benefits: +- Using an install bundle provides a better guarantee of compatibility + - Since we've done some (albeit limited) tests during release + - There's a higher chance that the combination of dependencies just works +- Install bundles live forever (or at least as long as GitHub) + - One can go back and install an older version of `medcat` + - Even if some newer dependencies would be allowed by requirements, but those are (retroactively) incompatible + - Even if/when some dependencies cease to exist on PyPI (are removed / deprecated) + +# Who are install bundles NOT for? + +Install bundles are not for +- First time users trying out `medcat` + - You should use `pip install` (or similar) instead +- Users with full internet access + - You should use `pip install` (or similar) instead +- Users building a service / docker image + - Use other existing tooling + +The main reason you would normally want to use existing tooling for installing `medcat` is so that it is compatible with the rest of your existing ecosystem. +If you rely too heavily on the install bundle, you might find yourself with incompatible dependencies. + +# What install bundles do we provide as part of a release? + +Currently we provide an install bundle for each supported python version (3.9, 3.10, 3.11, and 3.12). +These are targeting `x86_64` (think Intel and AMD CPUs) based Linux (think Ubuntu, Debian) machines. +They **do not** provide GPU enabled `torch` because the bundle would become too large to handle for a GitHub release if they did. +Users who need gpu-enabled `torch` will need to install it separately. + +**The included release bundles are unlikely to work in other environments (i.e on MacOS, or Windows, or on an ARM based CPU architecture).** + +# How to install an install bundle? + +Once you've downloaded the install bundle on a computer with internet / PyPI access you need to +- Move the archive (a `.tar.gz` file) to the target machine +- Unarchive using `tar -xvzf medcat-v2.*-cpu.tar.gz` + - Probably best to specify your exact file path + - This will extract the contents (both the `.whl` files and this README) in the current folder +- Activate your virtual environment (`venv`, `conda`, etc). + - You generally don't want to install packages for your system `python` +- Install all the wheels + - `pip install /path/to/unarchived/bundle/*.whl` + - NOTE: If there are other `.whl` files in the folder, this will attempt to install these as well +- Now everything should work as expected + - You can run this to verify: + ``` + python -c "from medcat import __version__ as v;print(f'Installed medcat v{v}')" + ```