Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
111 changes: 111 additions & 0 deletions .github/workflows/update-python-pkg-index.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,111 @@
name: Updates the Python package index upon an event.

on:
repository_dispatch:
types: [update_package_index]

# According to the [documentation](https://docs.github.com/en/actions/how-tos/manage-workflow-runs/manually-run-a-workflow#configuring-a-workflow-to-run-manually)
# it is only possible to trigger a workflow manually, if it is located in the default branch.
workflow_dispatch:
inputs:
source_repo:
description: 'Name of the repo that contains the dependency.'
required: true
type: string
source_org:
description: 'Name of the organization/user that owns the dependency repo.'
required: true
type: string
dependency_ref:
description: 'Reference that in the dependency repo that should be checked out and turned into a dependency.'
required: true
type: string

# We need this until this file is not in `main`, without it the web interface will not pick it up.
# See https://stackoverflow.com/a/71057825
#pull_request:

jobs:
update-index:
runs-on: ubuntu-latest
steps:
- name: Print all variables
shell: bash
run: |
echo "source_repo: ${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}"
echo "source_org: ${{ inputs.source_org == '' && github.event.client_payload.source_org || inputs.source_org }}"
echo "dep_ref: ${{ inputs.dependency_ref == '' && github.event.client_payload.dependency_ref || inputs.dependency_ref }}"
echo "payload: ${{ toJson(github.event.client_payload) }}"

- name: Checkout the `main` branch of the Python package index.
uses: actions/checkout@v4
with:
path: index_repo
ref: main # We always work on main!

- name: Checkout the repo of the dependency that should be added to the index.
uses: actions/checkout@v4
with:
repository: ${{ inputs.source_org == '' && github.event.client_payload.source_org || inputs.source_org }}/${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}
path: ${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}
submodules: 'recursive'
ref: ${{ inputs.dependency_ref == '' && github.event.client_payload.dependency_ref || inputs.dependency_ref }}

- name: Build the distribution file.
shell: bash
run: |
DEPENDENCY_REPO="${PWD}/${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}"
PACKAGE_BUILD_FOLDER="${PWD}/index_repo/build"

cd "${DEPENDENCY_REPO}"
python -m pip install build --user
python -m build --wheel --outdir "${PACKAGE_BUILD_FOLDER}"

- name: Test the distribution file.
shell: bash
run: |
PACKAGE_BUILD_FOLDER="${PWD}/index_repo/build"
DESTINATION_FOLDER="${PWD}/index_repo/${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}"
mkdir -p "${DESTINATION_FOLDER}"

readarray -t -d "" PACKAGE_FILES < <(find "${PACKAGE_BUILD_FOLDER}" -type f -print0)
for I in ${!PACKAGE_FILES[@]}
do
PACKAGE_FILE="${PACKAGE_FILES[$I]}"
pip install --force-reinstall --upgrade --no-deps "${PACKAGE_FILE}"
if [ $? -ne 0 ]
then
echo "Failed to install package '${PACKAGE_FILE}'"
exit 3
fi
echo "Successfully tested '${PACKAGE_FILE}'"
cp -t "${DESTINATION_FOLDER}" "${PACKAGE_FILE}"
done

- name: Rescan the package index and update the static `index.html` files.
shell: bash
env:
CI_COMMIT_MESSAGE: updated dependency "${{ inputs.source_org == '' && github.event.client_payload.source_org || inputs.source_org }}/${{ inputs.source_repo == '' && github.event.client_payload.source_repo || inputs.source_repo }}"
CI_COMMIT_AUTHOR: github-actions[bot]
CI_COMMIT_EMAIL: username@users.noreply.github.com
run: |
cd ./index_repo

# Not fully sure if this check is useful, because it seems that creating a wheel is not reproducible.
# I.e. creating a wheel from a commit and then generating another wheel will result in a "different",
# in terms of its hash, file than the first time.
if ! git status --porcelain --untracked-files=no ; then
# There are no changed.
echo "There were no changes!"
exit 0
fi

# Update all the packages.
python generator.py

# We directly push to main!
git config --global user.name "${{ env.CI_COMMIT_AUTHOR }}"
git config --global user.email "${{ env.CI_COMMIT_EMAIL }}"
git add .
git commit --no-verify -m "${CI_COMMIT_MESSAGE}"
git push origin main
20 changes: 2 additions & 18 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,18 +1,2 @@
# This .gitignore is appropriate for repositories deployed to GitHub Pages and using
# a Gemfile as specified at https://github.com/github/pages-gem#conventional

# Basic Jekyll gitignores (synchronize to Jekyll.gitignore)
_site/
.sass-cache/
.jekyll-cache/
.jekyll-metadata

# Additional Ruby/bundler ignore for when you run: bundle install
/vendor

# Specific ignore for GitHub Pages
# GitHub Pages will always use its own deployed version of pages-gem
# This means GitHub Pages will NOT use your Gemfile.lock and therefore it is
# counterproductive to check this file into the repository.
# Details at https://github.com/github/pages-gem/issues/768
Gemfile.lock
.token*
build/
84 changes: 82 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,82 @@
# pypi-index
Python packages index
# Custom GT4Py Python Package Index Server
This repo hosts the custom packages that are needed to use GT4Py, these currently includes:
- [GridTools/dace](https://github.com/GridTools/dace), currently only for the `next`.
- [ghex-org/GHEX](https://github.com/ghex-org/GHEX)


# Usage
The repo is intended to work fully automatically, orchestrated by GitHub actions.

## Workflow `update-python-pkg-index.yml`
This is the main workflow, in short it does:
- Pulls the repo, whose package should be updated.
- Creates a wheel from the repo that has been pulled.
- Tests if the wheel can be installed.
- Updated the package index, i.e. regenerates the `index.html` files, for this `generator.py` is used.
- Creates a commit containing the updated indexes and the generated wheel.
- Pushes the new commit directly to `main`.

The workflow can be started manually, either through the GitHub web interface or through the `issue_update.sh` script.
In either case some information have to be provided:
- The name of the repo on GitHub, generally referred to as "source repo".
- The owner (user or organization) that owns the repo, generally referred to as "source owner".
- The branch of the repo from which a Python package should be created, generally referred to as "dependency ref".

> According to the [documentation](https://docs.github.com/en/actions/reference/workflows-and-actions/events-that-trigger-workflows#repository_dispatch) the
> `repository_dispatch` trigger (the one that is used such that _other_ repo can start the update) only works when
> the workflow file is located on the default branch!


## `generator.py`
Script for updating the static pages.
It works by scanning subfolders, currently `dace` and `ghex`, and creates an index based on all Python packages it founds in them.
It is usually run by by the workflow automatically.


## `issue_update.sh`
A simple script that allows to issue a manual remote update of the index.
For more information please see its help output.


## `update_workflows`
This folder contains the workflows that must be installed into the repos containing the dependency, these workflows then triggers the update chain.
Here are the steps that are needed to install them.


### Token
The first step is to create an access token for the package index.
It is recommended that a [fine grained access token](https://docs.github.com/en/authentication/keeping-your-account-and-data-secure/managing-your-personal-access-tokens#fine-grained-personal-access-tokens) is used.
The token should only grant access to the index repo and must have the '"Contents" repository permissions (write)' permission.

Then you must install the token in the depending repo, the updater workflow expect the nae `PKG_UPDATE_TOKEN`.


### General Process of Installing the Workflow
The installations of workflows is not straight forward.
First you must activate (uncomment) the `pull_request` trigger and push it.
The net effect is that it will run once and GitHub will pick it up then.
Afterwards you have to disable that trigger again.


### DaCe
For DaCe the `dace-updater.yml` must be added to the DaCe repo.
Follow the steps above and place it in its [own dedicated PR](https://github.com/GridTools/dace/pull/12).
Note that it only works if this PR is is included in the `gt4py-next-integration` branch, see [these instructions](https://github.com/GridTools/dace/pull/1).

The workflow listens for pushes to tags for the form `__gt4py-next-integration_*`, if such a push is detected, it will then inform the index repo about the new version.


# Design and Working
The index works currently in "pull mode".
This means that the dependent repos, i.e. DaCe or GHEX, informs the index (this repo), that a new version is available.
The index will then download the depending repo, build the Python package and update the html pages.

However, it would be conceptually simpler, if the index is passive, i.e. if the dependent repos would build the Python package themself and push it to the index.
This design, "push mode", should become the new operation mode in the future.


# TODO:
- Install in DaCe
- Install in GHEX
- Configure the page to use `main` as source.

Empty file added dace/.gitkeep
Empty file.
121 changes: 121 additions & 0 deletions generator.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
"""Regenerates the index based on the specified folders.
"""
from typing import Final, Sequence

import hashlib
import pathlib
import re
import sys

#: The header for a html page.
#: It contains the opening `<body>` tag and has the `Titel` interpolation.
HTML_HEADER: Final[str] = """\
<!DOCTYPE HTML>
<html>
<head>
<title>{Title}</title>
<meta charset="UTF-8" />
</head>

<body>
<h1>{Title}</h1>
"""

#: Contains the footer of an html page.
#: This includes the closing `</body>` tag.
HTML_FOOTER: Final[str] = """\
</body>
</html>
"""


def normalize_name(name: str) -> bool:
"""Normalize the project name according to the rules in PEP503."""
return re.sub(r"[-_.]+", "-", name).lower()


def write_project_index(
base_folder: pathlib.Path,
project_name: str,
) -> int:
# Project folder must exists because we assume that the files are located inside.
project_folder = base_folder / project_name
if not project_folder.is_dir():
raise NotADirectoryError(
f"Expected that the project folder `{project_folder}` for project `{project_name}` exists."
)

found_packages = 0
normalized_project_name = normalize_name(project_name)
with open(project_folder / "index.html", "wt") as index:
index.write(HTML_HEADER.format(Title=f"Custom Package for '{project_name}'"))

for file in project_folder.iterdir():
filename = file.name
if filename.startswith(".") or not any(filename.endswith(ext) for ext in [".zip", ".tar.gz", ".whl"]):
print(
f"While building the index for project '{project_name}' found non Python package file '{filename}', which will be ignored.",
file=sys.stderr,
flush=True,
)
continue
assert filename.startswith(normalized_project_name + "-")

# Compute the hash such that we can append it to the link.
with open(file, "rb") as F:
digest = hashlib.file_digest(F, "sha256")

# PEP503 says that the text of the anchor element must be the filename, so there
# is not need for fancy processing of the file name. Furthermore, we assume that
# the file names have the correct normalized name and version.
index.write(
f'\t\t<a href="{filename}#sha256={digest.hexdigest()}">{filename}</a> </br>\n'.replace("\t", " ")
)
found_packages += 1
index.write(HTML_FOOTER)

return found_packages


def write_package_index(
base_folder: pathlib.Path,
packages: Sequence[str],
) -> None:

with open(base_folder / "index.html", "wt") as index:
index.write(HTML_HEADER.format(Title=f"Custom Package Index for GT4Py"))

for project_name in packages:
project_folder = base_folder / project_name
normalized_project_name = normalize_name(project_name)
if not project_folder.is_dir():
print(
f"There is not folder associated to the project `{project_name}`, skipping it.",
flush=True,
file=sys.stderr,
)
continue

# Now generate the index for that file.
found_packages = write_project_index(base_folder, project_name)

if found_packages == 0:
# Consider no packages not as an error, only output a warning.
# TODO: Consider removing the folder.
print(
f"No packages for project `{project_name}` could be located.",
flush=True,
file=sys.stderr,
)
continue

index.write(f'\t\t<a href="{project_name}">{normalized_project_name}</a>\n'.replace("\t", " "))

index.write(HTML_FOOTER)


if __name__ == "__main__":
write_package_index(
base_folder=pathlib.Path(__file__).parent,
packages=["dace", "ghex"],
)
Empty file added ghex/.gitkeep
Empty file.
11 changes: 11 additions & 0 deletions index.html
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
<!DOCTYPE HTML>
<html>
<head>
<title>Custom Python Package Index for the GridTools organization</title>
<meta charset="UTF-8" />
</head>

<body>
<h1>Custom Python Package Index for the GridTools organization</h1>
</body>
</html>
Loading