diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 0d3e9e4..c67d875 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -33,43 +33,65 @@ jobs: with: dotnet-version: '8.0.x' - - name: Install dependencies + - name: Determine runtime identifier + id: rid + shell: bash run: | - python -m pip install --upgrade pip - pip install hatch + case "${{ runner.os }}" in + Linux) echo "rid=linux-x64" >> "$GITHUB_OUTPUT" ;; + Windows) echo "rid=win-x64" >> "$GITHUB_OUTPUT" ;; + macOS) echo "rid=osx-arm64" >> "$GITHUB_OUTPUT" ;; + esac - - name: Build engine binaries - run: python build_differ.py + - name: Build engine binaries for this platform + run: python build_differ.py ${{ steps.rid.outputs.rid }} + + - name: Install packages (editable) + run: pip install -e packages/core -e packages/ooxmlpowertools -e packages/docxodus pytest - name: Run tests - run: hatch run test + run: python -m pytest tests/ -v - build: + build-core: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 with: - submodules: recursive + python-version: '3.11' + - run: pip install build twine + - name: Build core sdist + wheel + run: python -m build packages/core --outdir dist + - name: Check distributions + run: twine check dist/* - - name: Set up Python - uses: actions/setup-python@v5 + build-engine-wheels: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - { os: ubuntu-latest, rids: "linux-x64 linux-arm64" } + - { os: windows-latest, rids: "win-x64 win-arm64" } + - { os: macos-latest, rids: "osx-x64 osx-arm64" } + steps: + - uses: actions/checkout@v4 + with: + submodules: recursive + - uses: actions/setup-python@v5 with: python-version: '3.11' - - - name: Set up .NET - uses: actions/setup-dotnet@v4 + - uses: actions/setup-dotnet@v4 with: dotnet-version: '8.0.x' - - - name: Install build dependencies - run: | - python -m pip install --upgrade pip - pip install hatch hatchling - - - name: Build package - run: hatch build - - - name: Check package + - run: pip install build hatchling twine + - name: Build per-platform engine wheels + shell: bash run: | - pip install twine - twine check dist/* + for rid in ${{ matrix.rids }}; do + python build_differ.py "$rid" + python -m build --wheel --no-isolation packages/ooxmlpowertools --outdir dist + python -m build --wheel --no-isolation packages/docxodus --outdir dist + done + - name: Check wheels + run: twine check dist/* diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index ecf8fd7..9b425b5 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -1,5 +1,10 @@ name: Upload Python Package +# Builds and publishes all three packages on a tagged release: +# python-redlines (core, pure-Python sdist + wheel) +# python-redlines-ooxmlpowertools (per-platform engine wheels) +# python-redlines-docxodus (per-platform engine wheels) + on: release: types: [published] @@ -8,28 +13,70 @@ permissions: contents: read jobs: - deploy: - + build-core: runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - run: pip install build + - name: Build core sdist + wheel + run: python -m build packages/core --outdir dist + - uses: actions/upload-artifact@v4 + with: + name: dist-core + path: dist/* + build-engine-wheels: + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + include: + - { os: ubuntu-latest, rids: "linux-x64 linux-arm64" } + - { os: windows-latest, rids: "win-x64 win-arm64" } + - { os: macos-latest, rids: "osx-x64 osx-arm64" } steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 with: submodules: recursive - - name: Set up Python - uses: actions/setup-python@v3 + - uses: actions/setup-python@v5 with: - python-version: '3.x' - - name: Setup .NET - uses: actions/setup-dotnet@v3 + python-version: '3.11' + - uses: actions/setup-dotnet@v4 with: dotnet-version: '8.0.x' - - name: Install dependencies - run: | - python -m pip install --upgrade pip - pip install hatch hatchling - - name: Build package - run: hatch build - - name: Publish package + - run: pip install build hatchling + - name: Build per-platform engine wheels + shell: bash run: | - hatch publish -u "__token__" -a ${{ secrets.PYPI_API_TOKEN }} + for rid in ${{ matrix.rids }}; do + python build_differ.py "$rid" + python -m build --wheel --no-isolation packages/ooxmlpowertools --outdir dist + python -m build --wheel --no-isolation packages/docxodus --outdir dist + done + - uses: actions/upload-artifact@v4 + with: + name: dist-${{ matrix.os }} + path: dist/* + + publish: + needs: [build-core, build-engine-wheels] + runs-on: ubuntu-latest + steps: + - uses: actions/download-artifact@v4 + with: + path: dist + merge-multiple: true + - uses: actions/setup-python@v5 + with: + python-version: '3.11' + - run: pip install twine + - name: Check distributions + run: twine check dist/* + - name: Publish to PyPI + env: + TWINE_USERNAME: __token__ + TWINE_PASSWORD: ${{ secrets.PYPI_API_TOKEN }} + run: twine upload dist/* diff --git a/.gitignore b/.gitignore index c7a2b3f..5d501c3 100644 --- a/.gitignore +++ b/.gitignore @@ -9,6 +9,10 @@ csproj/obj/* docxodus/**/bin/* docxodus/**/obj/* +# Engine binary archives (built by build_differ.py, embedded in wheels by CI) +packages/*/src/*/_binaries/*.tar.gz +packages/*/src/*/_binaries/*.zip + # C extensions *.so diff --git a/CLAUDE.md b/CLAUDE.md index a4e1792..ac58114 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,68 +4,102 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -Python-Redlines is a Python wrapper around compiled C# binaries that generate `.docx` redline/tracked-changes documents by comparing two Word files. The Python layer handles platform detection, binary extraction, temp file management, and subprocess execution. +Python-Redlines generates `.docx` redline/tracked-changes documents by comparing two Word files. A pure-Python wrapper drives compiled C# (.NET 8) engine binaries; the Python layer handles platform detection, binary extraction, temp file management, and subprocess execution. Two comparison engines are available: - **XmlPowerToolsEngine** — wraps Open-XML-PowerTools WmlComparer (original engine) - **DocxodusEngine** — wraps Docxodus, a modernized .NET 8.0 fork with better move detection -## Commands - -```bash -# Run tests -hatch run test +## Monorepo structure — three published packages -# Run a single test -hatch run test tests/test_openxml_differ.py::test_run_redlines_with_real_files +This repo publishes **three** PyPI packages, each with its own `pyproject.toml` under `packages/`: -# Run tests with coverage -hatch run cov +| Directory | PyPI name | Contents | Wheel | +|---|---|---|---| +| `packages/core` | `python-redlines` | Pure-Python wrapper (`engines.py`) | `py3-none-any` | +| `packages/ooxmlpowertools` | `python-redlines-ooxmlpowertools` | Open-XML-PowerTools binary | per-platform | +| `packages/docxodus` | `python-redlines-docxodus` | Docxodus binary | per-platform | -# Type checking -hatch run types:check +Engine binaries are **optional dependencies**. Users install an engine via an extra: +`pip install python-redlines[docxodus]`, `[ooxmlpowertools]`, or `[all]`. The core +package has no binaries; each binary package ships one platform's compiled binary as a +prebuilt wheel, so end users never compile anything. -# Build C# binaries for all platforms (requires .NET 8.0 SDK) -hatch run build +The repo root is **not** an installable project — its `pyproject.toml` holds only +shared pytest/coverage config. -# Build Python package (triggers C# build via custom hook) -hatch build +## Commands -# Initialize Docxodus submodule (required before building) +```bash +# Initialize the Docxodus submodule (required before building its engine) git submodule update --init --recursive -``` -## Architecture +# Build engine binaries for one or more platforms (requires .NET 8.0 SDK). +# RIDs: linux-x64 linux-arm64 win-x64 win-arm64 osx-x64 osx-arm64 +python build_differ.py linux-x64 +python build_differ.py --all + +# Install all three packages editable for development +pip install -e packages/core -e packages/ooxmlpowertools -e packages/docxodus pytest -The system uses a two-layer wrapper pattern with a shared base class: +# Run tests (from repo root) +python -m pytest tests/ +python -m pytest tests/test_openxml_differ.py::test_run_redlines_with_real_files -1. **Python layer** (`src/python_redlines/engines.py`): - - `BaseEngine` — shared logic for binary extraction, subprocess invocation, and temp file management - - `XmlPowerToolsEngine(BaseEngine)` — sets constants for the Open-XML-PowerTools binary (`dist/`, `bin/`, `redlines`) - - `DocxodusEngine(BaseEngine)` — sets constants for the Docxodus binary (`dist_docxodus/`, `bin_docxodus/`, `redline`) +# Build a package wheel +python -m build packages/core +python -m build --wheel packages/docxodus # needs an archive in _binaries/ first +``` - Both engines expose `run_redline(author_tag, original, modified, **kwargs)`. `DocxodusEngine` overrides `_build_command()` to translate kwargs (e.g. `detect_moves`, `detail_threshold`) into CLI flags for the Docxodus binary. `XmlPowerToolsEngine` uses the legacy 4-positional-arg format and ignores kwargs. +## Architecture -2. **C# binaries**: +1. **Core Python layer** (`packages/core/src/python_redlines/engines.py`): + - `BaseEngine` — locates the engine binary in its companion package via + `importlib.resources`, extracts the platform archive once into a writable + user cache dir (`platformdirs.user_cache_dir`), and runs it via subprocess. + - `XmlPowerToolsEngine` / `DocxodusEngine` — subclasses declaring `BINARY_PACKAGE`, + `BINARY_BASE_NAME`, and `EXTRA_NAME`. + - `EngineNotInstalledError` — raised on instantiation if the companion binary + package is missing, with the `pip install` command to fix it. + + Both engines expose `run_redline(author_tag, original, modified, **kwargs)`. + `DocxodusEngine` overrides `_build_command()` to translate kwargs (e.g. `detect_moves`, + `detail_threshold`) into CLI flags. `XmlPowerToolsEngine` uses the legacy + 4-positional-arg format and ignores kwargs. + +2. **Binary packages** ship one platform archive under + `src//_binaries/.tar.gz` (or `.zip` for Windows). The archive is + gitignored; CI builds it. The hatchling build hook `hatch_build.py` reads which + RID archive is present and stamps the wheel's platform tag accordingly. + +3. **C# sources**: - `csproj/Program.cs` — Open-XML-PowerTools CLI tool - `docxodus/tools/redline/Program.cs` — Docxodus CLI tool (git submodule) -Pre-compiled binaries for 6 platform targets (linux/win/osx x x64/arm64) are stored as archives in `src/python_redlines/dist/` and `src/python_redlines/dist_docxodus/`, included in the wheel. The build script `build_differ.py` compiles both engines using `dotnet publish`. + `build_differ.py` compiles an engine for a given RID with `dotnet publish` and + writes a single flat archive into the corresponding binary package's `_binaries/`. + +## Build & release flow + +- A binary-package wheel must contain **exactly one** platform archive. Each + `build_differ.py ` invocation clears old archives, so build one RID, build + the wheel, repeat. +- `.github/workflows/ci.yml` — tests on each OS (native RID) + builds all wheels. +- `.github/workflows/python-publish.yml` — on release, builds per-platform engine + wheels across 3 OS runners, the core sdist+wheel, and publishes all three packages. -## Key Files +## Version management -- `src/python_redlines/engines.py` — BaseEngine, XmlPowerToolsEngine, and DocxodusEngine classes -- `src/python_redlines/__init__.py` — Exports all engine classes -- `src/python_redlines/__about__.py` — Single source of truth for package version -- `csproj/Program.cs` — Open-XML-PowerTools C# comparison utility -- `docxodus/` — Docxodus git submodule (tools/redline/ contains the CLI) -- `build_differ.py` — Cross-platform C# build orchestration for both engines -- `hatch_run_build_hook.py` — Hatch build hook that triggers C# compilation -- `tests/fixtures/` — Test `.docx` files (original, modified, expected_redline) +`packages/core/src/python_redlines/__about__.py` is the single source of truth. +The two binary packages read it via `[tool.hatch.version] path = "../core/..."`, +so all three always share one version. Bump only that file. ## Testing Notes -Tests must be run from the project root (fixtures use relative paths like `tests/fixtures/original.docx`). The XmlPowerToolsEngine integration test validates that comparing the fixture documents produces exactly 9 revisions. Docxodus uses a different stdout format (`"revision(s) found"` vs `"Revisions found: 9"`). +Tests live in repo-root `tests/` and must be run from the repo root (fixtures use +relative paths like `tests/fixtures/original.docx`). They require all three packages +installed and the binaries built for the current platform. The XmlPowerToolsEngine +integration test validates exactly 9 revisions on the fixture documents. ## Stdout Format Differences diff --git a/README.md b/README.md index bd5648c..eec3ff7 100644 --- a/README.md +++ b/README.md @@ -1,22 +1,49 @@ # Python-Redlines: Docx Redlines (Tracked Changes) for the Python Ecosystem -## Project Goal - Democratizing DOCX Comparisons +Generate tracked-change "redline" `.docx` documents from Python — compare two Word files +and get back a third document showing every insertion, deletion, and (optionally) move as +native Word tracked changes. -The main goal of this project is to address the significant gap in the open-source ecosystem around `.docx` document -comparison tools. Currently, the process of comparing and generating redline documents (documents that highlight -changes between versions) is complex and largely dominated by commercial software. These -tools, while effective, often come with cost barriers and limitations in terms of accessibility and integration -flexibility. +Comparing `.docx` documents has long been dominated by commercial software, with cost +barriers and little integration flexibility. Python-Redlines brings open-source `.docx` +redlining to the Python ecosystem so legal hackers, hobbyists, and product teams can build +on it freely: two documents in, one redline out. -`Python-redlines` aims to democratize the ability to run tracked change redlines for .docx, providing the -open-source community with a tool to create `.docx` redlines without the need for commercial software. This will let -more legal hackers and hobbyist innovators experiment and create tooling for enterprise and legal. +## Quick Start + +The **default engine is [Docxodus](https://github.com/JSv4/Docxodus)** — a modernized, +actively-maintained .NET 8 comparison engine (detailed below). Install it and you're +running; the engine binary is prebuilt and embedded in the wheel, so there is **no .NET +SDK to install and nothing to compile**: + +```commandline +pip install python-redlines[docxodus] +``` + +```python +from python_redlines import DocxodusEngine + +with open("original.docx", "rb") as f: + original = f.read() +with open("modified.docx", "rb") as f: + modified = f.read() + +engine = DocxodusEngine() +redline_bytes, stdout, stderr = engine.run_redline("Reviewer", original, modified) + +with open("redline.docx", "wb") as f: + f.write(redline_bytes) +``` + +That's the whole thing. The rest of this README covers the second (legacy) engine, +comparison settings, and how the packages are built and distributed. ## Comparison Engines -Python-Redlines ships with **two comparison engines** — choose the one that best fits your needs: +Python-Redlines provides **two comparison engines**. `DocxodusEngine` is the default and +recommended choice; `XmlPowerToolsEngine` remains available as a legacy option. -### `DocxodusEngine` — Recommended +### `DocxodusEngine` — Default (Recommended) **[Docxodus](https://github.com/JSv4/Docxodus)** is a modernized .NET 8.0 fork of Open-XML-PowerTools with significant improvements: @@ -56,41 +83,28 @@ Both engines share the same API — the only difference is the class you instant ### Install the Library -```commandline -pip install git+https://github.com/JSv4/Python-Redlines -``` +The comparison engines are compiled .NET binaries, but they are **prebuilt and embedded +in the published wheels** — you do not need the .NET SDK (or any local compilation) to +install or use `python-redlines`. -You can add this as a dependency like so: +Each engine ships in its own optional companion package. Install the engine(s) you need +as extras: -```requirements -python_redlines @ git+https://github.com/JSv4/Python-Redlines@v0.0.4 +```commandline +pip install python-redlines[docxodus] # Docxodus engine +pip install python-redlines[ooxmlpowertools] # Open-XML-PowerTools engine +pip install python-redlines[all] # both engines ``` -### Use the Library - -If you just want to use the tool, jump into our [quickstart guide](docs/quickstart.md). - -### Quick Example - -```python -from python_redlines import DocxodusEngine - -# Load your documents as bytes -with open("original.docx", "rb") as f: - original = f.read() -with open("modified.docx", "rb") as f: - modified = f.read() - -# Generate a redline document -engine = DocxodusEngine() -redline_bytes, stdout, stderr = engine.run_redline("Reviewer", original, modified) +Prebuilt wheels are available for Linux, macOS, and Windows (x64 and arm64); `pip` +selects the wheel matching your platform automatically. Instantiating an engine whose +companion package is not installed raises `EngineNotInstalledError` telling you which +extra to install. -# Save the result -with open("redline.docx", "wb") as f: - f.write(redline_bytes) +### Use the Library -print(stdout) # e.g. "Redline complete: 9 revision(s) found" -``` +See the [Quick Start](#quick-start) above for a minimal example, or the +[quickstart guide](docs/quickstart.md) for a step-by-step walkthrough. ## Comparison Settings (DocxodusEngine only) @@ -131,43 +145,42 @@ redline_bytes, stdout, stderr = engine.run_redline( ## Architecture Overview Both engines follow the same pattern: a Python wrapper class invokes a self-contained C# binary via subprocess. -The binary takes four arguments: ` `. + +The repository is a **monorepo of three separately-published packages**: + +| Package | PyPI name | Contents | +|---|---|---| +| `packages/core` | `python-redlines` | Pure-Python wrapper; no binaries | +| `packages/ooxmlpowertools` | `python-redlines-ooxmlpowertools` | Open-XML-PowerTools engine binary | +| `packages/docxodus` | `python-redlines-docxodus` | Docxodus engine binary | + +The core package's `[docxodus]` / `[ooxmlpowertools]` / `[all]` extras pull in the +binary packages. Each binary package is published as **per-platform wheels** (Linux, +macOS, Windows × x64/arm64), each embedding one prebuilt, self-contained .NET binary. ``` python-redlines/ │ ├── csproj/ # XmlPowerTools C# source -│ ├── Program.cs -│ └── redlines.csproj -│ -├── docxodus/ # Docxodus git submodule -│ └── tools/redline/ -│ ├── Program.cs -│ └── redline.csproj +├── docxodus/ # Docxodus git submodule (tools/redline/) │ -├── src/ -│ └── python_redlines/ -│ ├── engines.py # BaseEngine, XmlPowerToolsEngine, DocxodusEngine -│ ├── dist/ # XmlPowerTools compressed binaries -│ ├── dist_docxodus/ # Docxodus compressed binaries -│ ├── bin/ # XmlPowerTools extracted binaries (runtime) -│ ├── bin_docxodus/ # Docxodus extracted binaries (runtime) -│ ├── __about__.py -│ └── __init__.py +├── packages/ +│ ├── core/ # -> python-redlines +│ │ └── src/python_redlines/ # engines.py, __init__.py, __about__.py +│ ├── ooxmlpowertools/ # -> python-redlines-ooxmlpowertools +│ │ ├── hatch_build.py # stamps the wheel platform tag +│ │ └── src/python_redlines_ooxmlpowertools/_binaries/ +│ └── docxodus/ # -> python-redlines-docxodus +│ ├── hatch_build.py +│ └── src/python_redlines_docxodus/_binaries/ │ -├── tests/ -│ ├── fixtures/ -│ ├── test_openxml_differ.py # XmlPowerTools integration test -│ ├── test_docxodus_engine.py # Docxodus integration test -│ └── test_engine_contract.py # Shared contract tests for both engines -│ -├── build_differ.py # Builds both engines for all platforms -├── pyproject.toml -└── README.md +├── tests/ # integration + contract tests (run from root) +├── build_differ.py # compiles engines into each package's _binaries/ +└── pyproject.toml # shared pytest/coverage config only ``` -Pre-compiled binaries for 6 platform targets (linux/win/osx x x64/arm64) are bundled in the wheel for each engine. -On first use, the appropriate binary is extracted and cached. +At runtime the wrapper finds its companion binary package via `importlib.resources`, +extracts the platform archive once into the user cache directory, and runs it. ### Stdout Differences @@ -182,8 +195,8 @@ The two engines produce slightly different stdout messages: ### Prerequisites -- Python 3.8+ -- .NET 8.0 SDK (for building C# binaries) +- Python 3.9+ +- .NET 8.0 SDK (only for building the engine binaries locally) ### Setup @@ -194,22 +207,29 @@ cd Python-Redlines # If you already cloned without submodules git submodule update --init --recursive + +# Build the engine binaries for your platform (RIDs: linux-x64, win-x64, osx-arm64, ...) +python build_differ.py linux-x64 + +# Install all three packages editable +pip install -e packages/core -e packages/ooxmlpowertools -e packages/docxodus pytest ``` ### Commands ```bash -# Run tests -hatch run test +# Run tests (from the repo root) +python -m pytest tests/ # Run a single test -hatch run test tests/test_openxml_differ.py::test_run_redlines_with_real_files +python -m pytest tests/test_openxml_differ.py::test_run_redlines_with_real_files -# Build C# binaries for all platforms -hatch run build +# Build engine binaries for one or more platforms +python build_differ.py linux-x64 win-x64 +python build_differ.py --all -# Build Python package -hatch build +# Build a package wheel +python -m build packages/core ``` ### Detailed Dev Setup diff --git a/build_differ.py b/build_differ.py index 8c98dfc..17c5315 100644 --- a/build_differ.py +++ b/build_differ.py @@ -1,112 +1,131 @@ -import subprocess +"""Build the C# redline engine binaries and package them for the wheels. + +Each engine's binary ships in its own companion package; this script compiles +an engine for a given platform (.NET runtime identifier) and writes a single +flat archive into that package's ``_binaries/`` directory. A binary-package +wheel must contain exactly one such archive, so each invocation clears any +existing archives first. + +Usage: + python build_differ.py [ ...] + python build_differ.py --all + +Valid RIDs: linux-x64, linux-arm64, win-x64, win-arm64, osx-x64, osx-arm64 +""" import os +import subprocess +import sys import tarfile import zipfile - -RIDS = [ - ("linux-x64", ".tar.gz"), - ("linux-arm64", ".tar.gz"), - ("win-x64", ".zip"), - ("win-arm64", ".zip"), - ("osx-x64", ".tar.gz"), - ("osx-arm64", ".tar.gz"), +RIDS = ["linux-x64", "linux-arm64", "win-x64", "win-arm64", "osx-x64", "osx-arm64"] + +ENGINES = [ + { + "name": "ooxmlpowertools", + "csproj": os.path.join("csproj"), + "csproj_file": os.path.join("csproj", "redlines.csproj"), + "binaries_dir": os.path.join( + "packages", "ooxmlpowertools", "src", + "python_redlines_ooxmlpowertools", "_binaries", + ), + }, + { + "name": "docxodus", + "csproj": os.path.join("docxodus", "tools", "redline"), + "csproj_file": os.path.join("docxodus", "tools", "redline", "redline.csproj"), + "binaries_dir": os.path.join( + "packages", "docxodus", "src", + "python_redlines_docxodus", "_binaries", + ), + }, ] -def get_version(): - """ - Extracts the version from the specified __about__.py file. - """ - about = {} - with open('./src/python_redlines/__about__.py') as f: - exec(f.read(), about) - return about['__version__'] - - def run_command(command): - """ - Runs a shell command and prints its output. Raises on non-zero exit code. - """ + """Run a shell command, streaming output. Raises on a non-zero exit code.""" process = subprocess.Popen(command, shell=True, stdout=subprocess.PIPE, stderr=subprocess.STDOUT) for line in process.stdout: - print(line.decode().strip()) + print(line.decode().rstrip()) process.wait() if process.returncode != 0: raise RuntimeError(f"Command failed with exit code {process.returncode}: {command}") -def compress_files(source_dir, target_file): - """ - Compresses files in the specified directory into a tar.gz or zip file. - """ - if target_file.endswith('.tar.gz'): +def archive_name(rid): + return f"{rid}.zip" if rid.startswith("win-") else f"{rid}.tar.gz" + + +def compress_dir(source_dir, target_file): + """Compress the contents of source_dir (flat, no parent prefix) into target_file.""" + files = [] + for root, _, names in os.walk(source_dir): + for name in names: + full = os.path.join(root, name) + files.append((full, os.path.relpath(full, source_dir))) + + if target_file.endswith(".tar.gz"): with tarfile.open(target_file, "w:gz") as tar: - tar.add(source_dir, arcname=os.path.basename(source_dir)) - elif target_file.endswith('.zip'): - with zipfile.ZipFile(target_file, 'w', zipfile.ZIP_DEFLATED) as zipf: - for root, dirs, files in os.walk(source_dir): - for file in files: - zipf.write(os.path.join(root, file), - os.path.relpath(os.path.join(root, file), - os.path.join(source_dir, '..'))) - - -def cleanup_old_builds(dist_dir, current_version): - """ - Deletes any build files ending in .zip or .tar.gz in the dist_dir with a different version tag. - """ - for file in os.listdir(dist_dir): - if not file.endswith((f'{current_version}.zip', f'{current_version}.tar.gz', '.gitignore')): - file_path = os.path.join(dist_dir, file) - os.remove(file_path) - print(f"Deleted old build file: {file}") - - -def build_engine(csproj_path, dist_dir, version): - """ - Builds a C# engine for all platform targets, compresses the output, and cleans up old builds. - - :param csproj_path: Path to the .csproj directory (e.g. './csproj' or './docxodus/tools/redline') - :param dist_dir: Path to the distribution directory for compressed binaries - :param version: Version string for archive naming - """ - # Build for each RID - for rid, _ in RIDS: - print(f"Building {csproj_path} for {rid}...") - run_command(f'dotnet publish {csproj_path} -c Release -r {rid} --self-contained') - - # Determine the build output base directory - # dotnet publish outputs to /bin/Release/net8.0/ - build_base = os.path.join(csproj_path, 'bin', 'Release', 'net8.0') - - # Compress each build - for rid, ext in RIDS: - build_dir = os.path.join(build_base, rid) - archive_path = os.path.join(dist_dir, f"{rid}-{version}{ext}") - print(f"Compressing {rid} to {archive_path}...") - compress_files(build_dir, archive_path) - - cleanup_old_builds(dist_dir, version) - - -def main(): - version = get_version() - print(f"Version: {version}") - - # Build the XmlPowerTools engine (original) - build_engine('./csproj', './src/python_redlines/dist/', version) - - # Build the Docxodus engine (if submodule is available) - docxodus_csproj = './docxodus/tools/redline' - if os.path.exists(os.path.join(docxodus_csproj, 'redline.csproj')): - build_engine(docxodus_csproj, './src/python_redlines/dist_docxodus/', version) + for full, arcname in files: + tar.add(full, arcname=arcname) + elif target_file.endswith(".zip"): + with zipfile.ZipFile(target_file, "w", zipfile.ZIP_DEFLATED) as zf: + for full, arcname in files: + zf.write(full, arcname=arcname) else: - print("WARNING: Docxodus submodule not found at docxodus/tools/redline/redline.csproj — skipping Docxodus build.") - print("Run 'git submodule update --init --recursive' to initialize the submodule.") + raise ValueError(f"Unsupported archive format: {target_file}") + + +def clean_binaries_dir(binaries_dir): + """Remove existing archives so each wheel ships exactly one platform's binary.""" + for name in os.listdir(binaries_dir): + if name.endswith((".tar.gz", ".zip")): + os.remove(os.path.join(binaries_dir, name)) + + +def build_engine_for_rid(engine, rid): + csproj = engine["csproj"] + print(f"[{engine['name']}] Building {rid} ...") + run_command(f"dotnet publish {csproj} -c Release -r {rid} --self-contained") + + publish_dir = os.path.join(csproj, "bin", "Release", "net8.0", rid, "publish") + if not os.path.isdir(publish_dir): + raise RuntimeError(f"Expected publish output not found: {publish_dir}") + + target = os.path.join(engine["binaries_dir"], archive_name(rid)) + print(f"[{engine['name']}] Compressing -> {target}") + compress_dir(publish_dir, target) + + +def main(argv): + if not argv or argv[0] in ("-h", "--help"): + print(__doc__) + return 0 + + rids = RIDS if argv[0] == "--all" else argv + unknown = [r for r in rids if r not in RIDS] + if unknown: + print(f"ERROR: unknown RID(s): {', '.join(unknown)}") + print(f"Valid RIDs: {', '.join(RIDS)}") + return 1 + + for engine in ENGINES: + if not os.path.exists(engine["csproj_file"]): + print(f"WARNING: {engine['name']} project not found at " + f"{engine['csproj_file']} — skipping.") + if engine["name"] == "docxodus": + print("Run 'git submodule update --init --recursive' to initialize " + "the Docxodus submodule.") + continue + + os.makedirs(engine["binaries_dir"], exist_ok=True) + clean_binaries_dir(engine["binaries_dir"]) + for rid in rids: + build_engine_for_rid(engine, rid) - print("Build and compression complete.") + print("Build complete.") + return 0 if __name__ == "__main__": - main() + sys.exit(main(sys.argv[1:])) diff --git a/dist/.gitignore b/dist/.gitignore deleted file mode 100644 index c96a04f..0000000 --- a/dist/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore \ No newline at end of file diff --git a/docs/developer-guide.md b/docs/developer-guide.md index 3dee299..175beeb 100644 --- a/docs/developer-guide.md +++ b/docs/developer-guide.md @@ -1,103 +1,95 @@ -# Developer Guide for RedlinesWrapper +# Developer Guide ## Prerequisites -- Python 3.7 or higher installed -- .NET SDK for building C# binaries or .NET Runtime to run them -- Hatch for Python environment and package management +- Python 3.9 or higher +- .NET 8.0 SDK (to compile the C# engine binaries) +- `git` (the Docxodus engine is a submodule) -## Setting Up the Project +## Repository layout -### Step 1: Clone the Repository +This repository is a monorepo that publishes three PyPI packages, each with its own +`pyproject.toml` under `packages/`: -Clone the Python-Docx-Redlines repository to your local +| Directory | PyPI name | Contents | +|---|---|---| +| `packages/core` | `python-redlines` | Pure-Python wrapper | +| `packages/ooxmlpowertools` | `python-redlines-ooxmlpowertools` | Open-XML-PowerTools engine binary | +| `packages/docxodus` | `python-redlines-docxodus` | Docxodus engine binary | -machine. Use Git to clone the repository using the following command: +The repo root is not itself installable; its `pyproject.toml` only holds shared +pytest/coverage configuration. -```bash -git clone https://github.com/JSv4/Python-Docx-Redlines -cd python-docx-redlines -``` - -### Step 2: Install Hatch - -If Hatch is not already installed, install it using pip: - -```bash -pip install hatch hatchling -``` - -### Step 3: Create and Activate the Virtual Environment - -Inside the project directory, create a virtual environment using Hatch: - -```bash -hatch env create -``` - -Activate the virtual environment: +## Setting up ```bash -hatch shell -``` +# Clone with submodules +git clone --recurse-submodules https://github.com/JSv4/Python-Redlines +cd Python-Redlines -### Step 4: Install Dependencies +# Or, if already cloned: +git submodule update --init --recursive -Install the necessary Python dependencies: +# Build the engine binaries for your platform +python build_differ.py linux-x64 # or win-x64 / osx-arm64 / ... -```bash -pip install .[dev] +# Install all three packages editable, plus pytest +pip install -e packages/core -e packages/ooxmlpowertools -e packages/docxodus pytest ``` -## Building the C# Binaries +## Building the C# binaries -You can use the binaries distributed with the project, or, if you want to build new binaries for some reason, you can -use our build script, integrated as a hatch tool. +`build_differ.py` compiles an engine for one or more .NET runtime identifiers (RIDs) +and writes a single flat archive into each binary package's `_binaries/` directory. ```bash -hatch run build +python build_differ.py linux-x64 # one platform +python build_differ.py linux-x64 win-x64 # several +python build_differ.py --all # all six RIDs ``` -### Under the Hood +Valid RIDs: `linux-x64`, `linux-arm64`, `win-x64`, `win-arm64`, `osx-x64`, `osx-arm64`. -We're just using dotnet to build binaries for [Program.cs](csproj/Program.cs), a command line utility that exposes -`WmlComparer's` redlining capabilities. We are currently target win-x64 and linux-x64 builds, but any runtime -[supported by .NET](https://learn.microsoft.com/en-us/dotnet/core/rid-catalog) is theoretically supported. +Under the hood this runs `dotnet publish -c Release -r --self-contained` for +`csproj/` (Open-XML-PowerTools) and `docxodus/tools/redline/` (Docxodus), then +compresses each `publish/` output into `.tar.gz` (or `.zip` on Windows). -**Our build script does the following:** +## Building wheels -1. Build a binary for Linux: +A binary-package wheel must contain **exactly one** platform archive, so build one RID +at a time: ```bash -dotnet publish -c Release -r linux-x64 --self-contained +python build_differ.py linux-x64 +python -m build --wheel packages/ooxmlpowertools +python -m build --wheel packages/docxodus ``` -2. Build a binary for Windows: +The core package is pure Python and platform-independent: ```bash -dotnet publish -c Release -r win-x64 --self-contained +python -m build packages/core ``` -3. Build a binary for MacOS: - -```bash -dotnet publish -c Release -r osx-x64 --self-contained -``` - -4. Archive and package binaries into `./dist/`: - +Each binary package's `hatch_build.py` build hook detects the RID of the archive in +`_binaries/` and stamps the wheel with the matching platform tag (e.g. +`manylinux2014_x86_64`, `macosx_11_0_arm64`, `win_amd64`). -## Running Tests +## Running tests -To ensure everything is set up correctly and working as expected, run the tests included in the `tests/` directory. -Execute the tests using pytest: +Run from the repository root (test fixtures use relative paths): ```bash -pytest +python -m pytest tests/ ``` -This will run all the test cases defined in your test files. +Tests require all three packages installed and the engine binaries built for the +current platform. -## Conclusion +## Releasing -You've now set up the Python-Docx-Redlines project, built the necessary C# binaries, and learned how to use the Python wrapper to compare `.docx` files. Running the tests ensures that your setup is correct and the wrapper functions as expected. +`.github/workflows/python-publish.yml` runs on a published GitHub release: it builds +per-platform engine wheels across three OS runners, builds the core sdist + wheel, and +publishes all three packages to PyPI. Bump the version in +`packages/core/src/python_redlines/__about__.py` only — the binary packages read it +from there, so all three always release in lockstep. diff --git a/docs/quickstart.md b/docs/quickstart.md index 2c74b80..3441876 100644 --- a/docs/quickstart.md +++ b/docs/quickstart.md @@ -1,7 +1,17 @@ # Python-Redlines Quickstart -As discussed in the main README, the initial version is a wrapper for the C# api provided by Open-XML-PowerTools and -`WmlComparer`. This readme will show you how to use the XmlPowerToolsEngine to run a redline. +`python-redlines` wraps a C# comparison engine to produce tracked-change redline `.docx` +files. This guide uses the `XmlPowerToolsEngine` (Open-XML-PowerTools); `DocxodusEngine` +works the same way. + +### Step 0: Install + +Install the core package plus the engine you want as an extra. No .NET SDK is needed — +the engine binary is prebuilt and embedded in the wheel. + +```commandline +pip install python-redlines[ooxmlpowertools] +``` ### Step 1: Import and Initialize the Wrapper diff --git a/extract_version.py b/extract_version.py deleted file mode 100644 index 71226d3..0000000 --- a/extract_version.py +++ /dev/null @@ -1,12 +0,0 @@ -# extract_version.py -def get_version(): - """ - Extracts the version from the specified __about__.py file. - """ - about = {} - with open('./src/python_redlines/__about__.py') as f: - exec(f.read(), about) - return about['__version__'] - -if __name__ == "__main__": - print(get_version()) diff --git a/hatch_run_build_hook.py b/hatch_run_build_hook.py deleted file mode 100644 index f20b2b1..0000000 --- a/hatch_run_build_hook.py +++ /dev/null @@ -1,9 +0,0 @@ -import subprocess -from hatchling.builders.hooks.plugin.interface import BuildHookInterface - -class HatchRunBuildHook(BuildHookInterface): - PLUGIN_NAME = 'hatch-run-build' - - def initialize(self, version, build_data): - # Run the 'hatch run build' command - subprocess.run(["python", "-m", "build_differ"], check=True) \ No newline at end of file diff --git a/packages/core/README.md b/packages/core/README.md new file mode 100644 index 0000000..f75be0b --- /dev/null +++ b/packages/core/README.md @@ -0,0 +1,34 @@ +# python-redlines + +Generate tracked-change "redline" `.docx` documents by comparing two Word files. + +`python-redlines` is the pure-Python core. The comparison engines themselves are +compiled .NET binaries shipped in separate, optional companion packages — install +the one(s) you need as extras: + +```bash +pip install python-redlines[docxodus] # Docxodus engine +pip install python-redlines[ooxmlpowertools] # Open-XML-PowerTools engine +pip install python-redlines[all] # both +``` + +Binaries are prebuilt for each platform and embedded in the companion package's +wheel — no .NET SDK and no local compilation are needed to install or use it. + +## Usage + +```python +from python_redlines import DocxodusEngine + +engine = DocxodusEngine() +redline_bytes, stdout, stderr = engine.run_redline( + "Author Name", + original=open("original.docx", "rb").read(), + modified=open("modified.docx", "rb").read(), +) +``` + +If an engine's companion package is not installed, instantiating the engine +raises `EngineNotInstalledError` with the `pip install` command to fix it. + +See the [project repository](https://github.com/JSv4/Python-Redlines) for details. diff --git a/packages/core/pyproject.toml b/packages/core/pyproject.toml new file mode 100644 index 0000000..62d0bee --- /dev/null +++ b/packages/core/pyproject.toml @@ -0,0 +1,47 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "python-redlines" +dynamic = ["version"] +description = "Generate tracked-change redline .docx documents by comparing Word files." +readme = "README.md" +requires-python = ">=3.9" +license = "MIT" +keywords = ["docx", "redline", "diff", "tracked-changes", "openxml", "word"] +authors = [ + { name = "John Scrudato IV" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python", + "Programming Language :: Python :: 3.9", + "Programming Language :: Python :: 3.10", + "Programming Language :: Python :: 3.11", + "Programming Language :: Python :: 3.12", + "Programming Language :: Python :: Implementation :: CPython", +] +dependencies = [ + "platformdirs>=3.0", +] + +# Engine binaries ship in separate companion packages. Install an engine via +# its extra, e.g. `pip install python-redlines[docxodus]`. All three packages +# are released together from the same repository on each tagged release. +[project.optional-dependencies] +ooxmlpowertools = ["python-redlines-ooxmlpowertools"] +docxodus = ["python-redlines-docxodus"] +all = ["python-redlines-ooxmlpowertools", "python-redlines-docxodus"] + +[project.urls] +Homepage = "https://github.com/JSv4/Python-Redlines" +Issues = "https://github.com/JSv4/Python-Redlines/issues" +Source = "https://github.com/JSv4/Python-Redlines" + +[tool.hatch.version] +path = "src/python_redlines/__about__.py" + +[tool.hatch.build.targets.wheel] +packages = ["src/python_redlines"] diff --git a/src/python_redlines/__about__.py b/packages/core/src/python_redlines/__about__.py similarity index 81% rename from src/python_redlines/__about__.py rename to packages/core/src/python_redlines/__about__.py index b83c8b5..dbcffe0 100644 --- a/src/python_redlines/__about__.py +++ b/packages/core/src/python_redlines/__about__.py @@ -1,4 +1,4 @@ # SPDX-FileCopyrightText: 2024-present U.N. Owen # # SPDX-License-Identifier: MIT -__version__ = "0.0.4" +__version__ = "0.1.0" diff --git a/packages/core/src/python_redlines/__init__.py b/packages/core/src/python_redlines/__init__.py new file mode 100644 index 0000000..56014f6 --- /dev/null +++ b/packages/core/src/python_redlines/__init__.py @@ -0,0 +1,19 @@ +# SPDX-FileCopyrightText: 2024-present U.N. Owen +# +# SPDX-License-Identifier: MIT + +from .__about__ import __version__ +from .engines import ( + BaseEngine, + DocxodusEngine, + EngineNotInstalledError, + XmlPowerToolsEngine, +) + +__all__ = [ + "BaseEngine", + "XmlPowerToolsEngine", + "DocxodusEngine", + "EngineNotInstalledError", + "__version__", +] diff --git a/src/python_redlines/engines.py b/packages/core/src/python_redlines/engines.py similarity index 55% rename from src/python_redlines/engines.py rename to packages/core/src/python_redlines/engines.py index c2ca8dd..0a9f346 100644 --- a/src/python_redlines/engines.py +++ b/packages/core/src/python_redlines/engines.py @@ -1,104 +1,129 @@ -import subprocess -import tempfile +import importlib.metadata +import importlib.resources +import logging import os import platform -import logging -import zipfile +import subprocess import tarfile +import tempfile +import zipfile from pathlib import Path -from typing import Union, Tuple, Optional +from typing import Optional, Tuple, Union + +import platformdirs from .__about__ import __version__ logger = logging.getLogger(__name__) +class EngineNotInstalledError(ImportError): + """Raised when an engine is used but its binary package is not installed.""" + + +def _detect_rid() -> str: + """Return the .NET-style runtime identifier for the current platform.""" + os_name = platform.system().lower() + machine = platform.machine().lower() + + if machine in ('x86_64', 'amd64'): + arch = 'x64' + elif machine in ('arm64', 'aarch64'): + arch = 'arm64' + else: + raise EnvironmentError(f"Unsupported architecture: {machine}") + + if os_name == 'linux': + return f'linux-{arch}' + if os_name == 'windows': + return f'win-{arch}' + if os_name == 'darwin': + return f'osx-{arch}' + raise EnvironmentError(f"Unsupported OS: {os_name}") + + class BaseEngine(object): """ - Base class for redline comparison engines. Subclasses must define: - - DIST_DIR_NAME: directory name under src/python_redlines/ holding compressed binaries - - BIN_DIR_NAME: directory name under src/python_redlines/ for extracted binaries - - BINARY_BASE_NAME: the name of the executable (without .exe extension) + Base class for redline comparison engines. Each engine ships its compiled + binary in a separate, optional companion package; subclasses declare: + - BINARY_PACKAGE: importable package name that ships the binary archives + - BINARY_BASE_NAME: the executable name (without .exe extension) + - EXTRA_NAME: the python-redlines extra that installs the companion package """ - DIST_DIR_NAME: str = NotImplemented - BIN_DIR_NAME: str = NotImplemented + BINARY_PACKAGE: str = NotImplemented BINARY_BASE_NAME: str = NotImplemented + EXTRA_NAME: str = NotImplemented def __init__(self, target_path: Optional[str] = None): self.target_path = target_path - self.extracted_binaries_path = self._unzip_binary() + self.extracted_binaries_path = self._resolve_binary() - def _unzip_binary(self): + def _resolve_binary(self) -> str: """ - Unzips the appropriate C# binary for the current platform. + Locate the platform binary inside the companion package, extracting it + once into a writable cache directory. Returns the path to the executable. """ - base_path = os.path.dirname(__file__) - binaries_path = os.path.join(base_path, self.DIST_DIR_NAME) - target_path = self.target_path if self.target_path else os.path.join(base_path, self.BIN_DIR_NAME) + rid = _detect_rid() + is_windows = rid.startswith('win-') + archive_name = f'{rid}.zip' if is_windows else f'{rid}.tar.gz' + binary_name = f'{self.BINARY_BASE_NAME}.exe' if is_windows else self.BINARY_BASE_NAME - if not os.path.exists(target_path): - os.makedirs(target_path) + try: + package_root = importlib.resources.files(self.BINARY_PACKAGE) + except ModuleNotFoundError as exc: + raise EngineNotInstalledError( + f"{type(self).__name__} requires the '{self.BINARY_PACKAGE}' package. " + f"Install it with: pip install python-redlines[{self.EXTRA_NAME}]" + ) from exc - # Get the binary name and zip name based on the OS and architecture - binary_name, zip_name = self._get_binaries_info() + archive = package_root / '_binaries' / archive_name + if not archive.is_file(): + raise EngineNotInstalledError( + f"{type(self).__name__}: '{self.BINARY_PACKAGE}' is installed but contains " + f"no binary for platform '{rid}'. The wheel may target a different platform." + ) - # Check if the binary already exists. If not, extract it. - full_binary_path = os.path.join(target_path, binary_name) + extract_root = self._extraction_root() / rid + binary_path = extract_root / binary_name - if not os.path.exists(full_binary_path): - zip_path = os.path.join(binaries_path, zip_name) - self._extract_binary(zip_path, target_path) + if not binary_path.exists(): + self._extract_archive(archive, extract_root) - return os.path.join(target_path, binary_name) + if not is_windows: + os.chmod(binary_path, 0o755) - def _extract_binary(self, zip_path: str, target_path: str): - """ - Extracts the binary from the zip file based on the extension. Supports .zip and .tar.gz files - :parameter - zip_path: str - The path to the zip file - target_path: str - The path to extract the binary to - """ - if zip_path.endswith('.zip'): - with zipfile.ZipFile(zip_path, 'r') as zip_ref: - zip_ref.extractall(target_path) + return str(binary_path) - elif zip_path.endswith('.tar.gz'): - with tarfile.open(zip_path, 'r:gz') as tar_ref: - tar_ref.extractall(target_path) + def _extraction_root(self) -> Path: + """Directory the binary is extracted into (writable, outside site-packages).""" + if self.target_path: + return Path(self.target_path) - def _get_binaries_info(self): - """ - Returns the binary name and zip name based on the OS and architecture - :return - binary_name: str - The name of the binary file - zip_name: str - The name of the zip file - """ - os_name = platform.system().lower() - arch = platform.machine().lower() - - if arch in ('x86_64', 'amd64'): - arch = 'x64' - elif arch in ('arm64', 'aarch64'): - arch = 'arm64' - else: - raise EnvironmentError(f"Unsupported architecture: {arch}") - - if os_name == 'linux': - zip_name = f"linux-{arch}-{__version__}.tar.gz" - binary_name = f'linux-{arch}/{self.BINARY_BASE_NAME}' - - elif os_name == 'windows': - zip_name = f"win-{arch}-{__version__}.zip" - binary_name = f'win-{arch}/{self.BINARY_BASE_NAME}.exe' - - elif os_name == 'darwin': - zip_name = f"osx-{arch}-{__version__}.tar.gz" - binary_name = f'osx-{arch}/{self.BINARY_BASE_NAME}' + try: + pkg_version = importlib.metadata.version(self.BINARY_PACKAGE.replace('_', '-')) + except importlib.metadata.PackageNotFoundError: + pkg_version = __version__ - else: - raise EnvironmentError("Unsupported OS") + return Path(platformdirs.user_cache_dir('python-redlines')) / self.EXTRA_NAME / pkg_version - return binary_name, zip_name + @staticmethod + def _extract_archive(archive, target_path: Path): + """Extract a .zip or .tar.gz archive (a Traversable) into target_path.""" + target_path.mkdir(parents=True, exist_ok=True) + name = archive.name + + with importlib.resources.as_file(archive) as archive_path: + if name.endswith('.zip'): + with zipfile.ZipFile(archive_path, 'r') as zip_ref: + zip_ref.extractall(target_path) + elif name.endswith('.tar.gz'): + with tarfile.open(archive_path, 'r:gz') as tar_ref: + try: + tar_ref.extractall(target_path, filter='data') + except TypeError: + tar_ref.extractall(target_path) + else: + raise ValueError(f"Unsupported archive format: {name}") def _build_command(self, author_tag: str, original_path, modified_path, target_path, **kwargs): """ @@ -159,15 +184,15 @@ def _write_to_temp_file(self, data): class XmlPowerToolsEngine(BaseEngine): - DIST_DIR_NAME = 'dist' - BIN_DIR_NAME = 'bin' + BINARY_PACKAGE = 'python_redlines_ooxmlpowertools' BINARY_BASE_NAME = 'redlines' + EXTRA_NAME = 'ooxmlpowertools' class DocxodusEngine(BaseEngine): - DIST_DIR_NAME = 'dist_docxodus' - BIN_DIR_NAME = 'bin_docxodus' + BINARY_PACKAGE = 'python_redlines_docxodus' BINARY_BASE_NAME = 'redline' + EXTRA_NAME = 'docxodus' # Boolean flags (default False — presence enables) _BOOL_FLAGS = [ diff --git a/packages/docxodus/README.md b/packages/docxodus/README.md new file mode 100644 index 0000000..d94c0f3 --- /dev/null +++ b/packages/docxodus/README.md @@ -0,0 +1,11 @@ +# python-redlines-docxodus + +Compiled Docxodus redline engine binary for +[`python-redlines`](https://pypi.org/project/python-redlines/). + +This package only contains the platform-specific engine binary. Install it via +the `python-redlines` extra rather than directly: + +```bash +pip install python-redlines[docxodus] +``` diff --git a/packages/docxodus/hatch_build.py b/packages/docxodus/hatch_build.py new file mode 100644 index 0000000..120046d --- /dev/null +++ b/packages/docxodus/hatch_build.py @@ -0,0 +1,42 @@ +"""Wheel build hook: stamp the platform tag from the bundled binary archive. + +Each binary package wheel must target exactly one platform. The archive placed +in src//_binaries/ by build_differ.py determines the wheel's platform tag. +""" +import pathlib + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + +# .NET runtime identifier -> wheel platform tag +PLATFORM_TAGS = { + "linux-x64": "manylinux2014_x86_64", + "linux-arm64": "manylinux2014_aarch64", + "win-x64": "win_amd64", + "win-arm64": "win_arm64", + "osx-x64": "macosx_11_0_x86_64", + "osx-arm64": "macosx_11_0_arm64", +} + + +class RedlinesBinaryBuildHook(BuildHookInterface): + PLUGIN_NAME = "custom" + + def initialize(self, version, build_data): + archives = sorted( + p for p in (pathlib.Path(self.root) / "src").glob("*/_binaries/*") + if p.name.endswith((".tar.gz", ".zip")) + ) + if len(archives) != 1: + raise ValueError( + f"Expected exactly one binary archive under src/*/_binaries/, " + f"found {len(archives)}: {[a.name for a in archives]}. " + f"Run `python build_differ.py ` to populate it before building." + ) + + rid = archives[0].name.split(".", 1)[0] + if rid not in PLATFORM_TAGS: + raise ValueError(f"Unknown runtime identifier '{rid}' from archive {archives[0].name}") + + build_data["pure_python"] = False + build_data["infer_tag"] = False + build_data["tag"] = f"py3-none-{PLATFORM_TAGS[rid]}" diff --git a/packages/docxodus/pyproject.toml b/packages/docxodus/pyproject.toml new file mode 100644 index 0000000..b1a4543 --- /dev/null +++ b/packages/docxodus/pyproject.toml @@ -0,0 +1,35 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "python-redlines-docxodus" +dynamic = ["version"] +description = "Docxodus redline engine binary for python-redlines." +readme = "README.md" +requires-python = ">=3.9" +license = "MIT" +authors = [ + { name = "John Scrudato IV" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", +] + +[project.urls] +Homepage = "https://github.com/JSv4/Python-Redlines" +Source = "https://github.com/JSv4/Python-Redlines" + +# Version is single-sourced from the core package so all three release together. +[tool.hatch.version] +path = "../core/src/python_redlines/__about__.py" + +[tool.hatch.build.targets.wheel] +packages = ["src/python_redlines_docxodus"] +artifacts = ["*.tar.gz", "*.zip"] + +# Stamps the wheel with the correct platform tag based on the archive in _binaries/. +[tool.hatch.build.targets.wheel.hooks.custom] +path = "hatch_build.py" diff --git a/packages/docxodus/src/python_redlines_docxodus/__init__.py b/packages/docxodus/src/python_redlines_docxodus/__init__.py new file mode 100644 index 0000000..7bb3506 --- /dev/null +++ b/packages/docxodus/src/python_redlines_docxodus/__init__.py @@ -0,0 +1,10 @@ +"""Docxodus redline engine binary for python-redlines. + +This package ships only the compiled engine binary for a single platform, +under ``_binaries/``. It is installed automatically via:: + + pip install python-redlines[docxodus] + +It is not meant to be imported directly; ``python_redlines.DocxodusEngine`` +locates the binary here at runtime. +""" diff --git a/packages/docxodus/src/python_redlines_docxodus/_binaries/.gitkeep b/packages/docxodus/src/python_redlines_docxodus/_binaries/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/packages/ooxmlpowertools/README.md b/packages/ooxmlpowertools/README.md new file mode 100644 index 0000000..5231bbf --- /dev/null +++ b/packages/ooxmlpowertools/README.md @@ -0,0 +1,11 @@ +# python-redlines-ooxmlpowertools + +Compiled Open-XML-PowerTools redline engine binary for +[`python-redlines`](https://pypi.org/project/python-redlines/). + +This package only contains the platform-specific engine binary. Install it via +the `python-redlines` extra rather than directly: + +```bash +pip install python-redlines[ooxmlpowertools] +``` diff --git a/packages/ooxmlpowertools/hatch_build.py b/packages/ooxmlpowertools/hatch_build.py new file mode 100644 index 0000000..120046d --- /dev/null +++ b/packages/ooxmlpowertools/hatch_build.py @@ -0,0 +1,42 @@ +"""Wheel build hook: stamp the platform tag from the bundled binary archive. + +Each binary package wheel must target exactly one platform. The archive placed +in src//_binaries/ by build_differ.py determines the wheel's platform tag. +""" +import pathlib + +from hatchling.builders.hooks.plugin.interface import BuildHookInterface + +# .NET runtime identifier -> wheel platform tag +PLATFORM_TAGS = { + "linux-x64": "manylinux2014_x86_64", + "linux-arm64": "manylinux2014_aarch64", + "win-x64": "win_amd64", + "win-arm64": "win_arm64", + "osx-x64": "macosx_11_0_x86_64", + "osx-arm64": "macosx_11_0_arm64", +} + + +class RedlinesBinaryBuildHook(BuildHookInterface): + PLUGIN_NAME = "custom" + + def initialize(self, version, build_data): + archives = sorted( + p for p in (pathlib.Path(self.root) / "src").glob("*/_binaries/*") + if p.name.endswith((".tar.gz", ".zip")) + ) + if len(archives) != 1: + raise ValueError( + f"Expected exactly one binary archive under src/*/_binaries/, " + f"found {len(archives)}: {[a.name for a in archives]}. " + f"Run `python build_differ.py ` to populate it before building." + ) + + rid = archives[0].name.split(".", 1)[0] + if rid not in PLATFORM_TAGS: + raise ValueError(f"Unknown runtime identifier '{rid}' from archive {archives[0].name}") + + build_data["pure_python"] = False + build_data["infer_tag"] = False + build_data["tag"] = f"py3-none-{PLATFORM_TAGS[rid]}" diff --git a/packages/ooxmlpowertools/pyproject.toml b/packages/ooxmlpowertools/pyproject.toml new file mode 100644 index 0000000..4b09428 --- /dev/null +++ b/packages/ooxmlpowertools/pyproject.toml @@ -0,0 +1,35 @@ +[build-system] +requires = ["hatchling"] +build-backend = "hatchling.build" + +[project] +name = "python-redlines-ooxmlpowertools" +dynamic = ["version"] +description = "Open-XML-PowerTools redline engine binary for python-redlines." +readme = "README.md" +requires-python = ">=3.9" +license = "MIT" +authors = [ + { name = "John Scrudato IV" }, +] +classifiers = [ + "Development Status :: 4 - Beta", + "License :: OSI Approved :: MIT License", + "Programming Language :: Python :: 3", +] + +[project.urls] +Homepage = "https://github.com/JSv4/Python-Redlines" +Source = "https://github.com/JSv4/Python-Redlines" + +# Version is single-sourced from the core package so all three release together. +[tool.hatch.version] +path = "../core/src/python_redlines/__about__.py" + +[tool.hatch.build.targets.wheel] +packages = ["src/python_redlines_ooxmlpowertools"] +artifacts = ["*.tar.gz", "*.zip"] + +# Stamps the wheel with the correct platform tag based on the archive in _binaries/. +[tool.hatch.build.targets.wheel.hooks.custom] +path = "hatch_build.py" diff --git a/packages/ooxmlpowertools/src/python_redlines_ooxmlpowertools/__init__.py b/packages/ooxmlpowertools/src/python_redlines_ooxmlpowertools/__init__.py new file mode 100644 index 0000000..b6e35e2 --- /dev/null +++ b/packages/ooxmlpowertools/src/python_redlines_ooxmlpowertools/__init__.py @@ -0,0 +1,10 @@ +"""Open-XML-PowerTools redline engine binary for python-redlines. + +This package ships only the compiled engine binary for a single platform, +under ``_binaries/``. It is installed automatically via:: + + pip install python-redlines[ooxmlpowertools] + +It is not meant to be imported directly; ``python_redlines.XmlPowerToolsEngine`` +locates the binary here at runtime. +""" diff --git a/packages/ooxmlpowertools/src/python_redlines_ooxmlpowertools/_binaries/.gitkeep b/packages/ooxmlpowertools/src/python_redlines_ooxmlpowertools/_binaries/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/pyproject.toml b/pyproject.toml index 0ccfd85..8df8197 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,101 +1,19 @@ -[build-system] -requires = ["hatchling"] -build-backend = "hatchling.build" - -[tool.hatch.build.targets.wheel.force-include] -"dist" = "python_redlines/dist" - -[tool.hatch.build.targets.wheel] -artifacts = [ - "*.zip", - "*.tar.gz", -] -[tool.hatch.build.targets.sdist] -include = [ - "python_redlines/dist", - "python_redlines/dist_docxodus", - "python_redlines/bin", - "python_redlines/bin_docxodus", -] - -# Build hook to build the binaries for distribution... -[tool.hatch.build.hooks.custom] -path = "hatch_run_build_hook.py" - -[project] -name = "python-redlines" -dynamic = ["version"] -description = '' -readme = "README.md" -requires-python = ">=3.8" -license = "MIT" -keywords = [] -authors = [ - { name = "John Scrudato IV" }, -] -classifiers = [ - "Development Status :: 4 - Beta", - "Programming Language :: Python", - "Programming Language :: Python :: 3.8", - "Programming Language :: Python :: 3.9", - "Programming Language :: Python :: 3.10", - "Programming Language :: Python :: 3.11", - "Programming Language :: Python :: 3.12", - "Programming Language :: Python :: Implementation :: CPython", - "Programming Language :: Python :: Implementation :: PyPy", -] -dependencies = [ - "hatch", - "hatchling" -] - -[project.urls] -Documentation = "https://github.com/unknown/python-redlines#readme" -Issues = "https://github.com/unknown/python-redlines/issues" -Source = "https://github.com/unknown/python-redlines" - -[tool.hatch.version] -path = "src/python_redlines/__about__.py" - -[tool.hatch.envs.default] -dependencies = [ - "coverage[toml]>=6.5", - "pytest", -] -[tool.hatch.envs.default.scripts] -test = "pytest {args:tests}" -test-cov = "coverage run -m pytest {args:tests}" -cov-report = [ - "- coverage combine", - "coverage report", -] -cov = [ - "test-cov", - "cov-report", -] -build = "python -m build_differ" - -[[tool.hatch.envs.all.matrix]] -python = ["3.8", "3.9", "3.10", "3.11", "3.12"] - -[tool.hatch.envs.types] -dependencies = [ - "mypy>=1.0.0", -] -[tool.hatch.envs.types.scripts] -check = "mypy --install-types --non-interactive {args:src/python_redlines tests}" +# This repository is a monorepo of three separately-published packages, each +# under packages/ with its own pyproject.toml: +# +# packages/core -> python-redlines (pure Python) +# packages/ooxmlpowertools -> python-redlines-ooxmlpowertools (engine binary) +# packages/docxodus -> python-redlines-docxodus (engine binary) +# +# The repo root is not itself an installable project. This file holds only the +# shared tooling configuration (pytest, coverage) used when running the tests. + +[tool.pytest.ini_options] +testpaths = ["tests"] [tool.coverage.run] -source_pkgs = ["python_redlines", "tests"] +source_pkgs = ["python_redlines"] branch = true -parallel = true -omit = [ - "src/python_redlines/__about__.py", -] - -[tool.coverage.paths] -python_redlines = ["src/python_redlines", "*/python-redlines/src/python_redlines"] -tests = ["tests", "*/python-redlines/tests"] [tool.coverage.report] exclude_lines = [ diff --git a/src/python_redlines/__init__.py b/src/python_redlines/__init__.py deleted file mode 100644 index 824b82e..0000000 --- a/src/python_redlines/__init__.py +++ /dev/null @@ -1,7 +0,0 @@ -# SPDX-FileCopyrightText: 2024-present U.N. Owen -# -# SPDX-License-Identifier: MIT - -from .engines import XmlPowerToolsEngine, DocxodusEngine, BaseEngine - -__all__ = ["XmlPowerToolsEngine", "DocxodusEngine", "BaseEngine"] diff --git a/src/python_redlines/bin/.gitignore b/src/python_redlines/bin/.gitignore deleted file mode 100644 index c96a04f..0000000 --- a/src/python_redlines/bin/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore \ No newline at end of file diff --git a/src/python_redlines/bin_docxodus/.gitignore b/src/python_redlines/bin_docxodus/.gitignore deleted file mode 100644 index d6b7ef3..0000000 --- a/src/python_redlines/bin_docxodus/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore diff --git a/src/python_redlines/dist/.gitignore b/src/python_redlines/dist/.gitignore deleted file mode 100644 index c96a04f..0000000 --- a/src/python_redlines/dist/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore \ No newline at end of file diff --git a/src/python_redlines/dist_docxodus/.gitignore b/src/python_redlines/dist_docxodus/.gitignore deleted file mode 100644 index d6b7ef3..0000000 --- a/src/python_redlines/dist_docxodus/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -* -!.gitignore