diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index bbed4a471..292e8d698 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -2,3 +2,5 @@ 3bc18907354a40f1d89dca1833a2719ba7fb0933 # Reorder import statements with isort 68a72c5a603283f70abce2651dcde9c6f0177c41 +# Migrate code style to Black 24 +d4dbd73fe6a91964af82fbf6e6cb8d70b77569a3 diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..2390d8c80 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,10 @@ +version: 2 +updates: + - package-ecosystem: "github-actions" + directory: "/" + schedule: + interval: "monthly" + groups: + github-actions: + patterns: + - "*" diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 77a16e4a1..f7c8a9966 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -10,18 +10,20 @@ on: env: PIPX_HOME: "/home/runner/.cache/pipx" PIPX_BIN_DIR: "/home/runner/.local/bin" - POETRY_VERSION: "1.5.1" + POETRY_VERSION: "1.8.2" +permissions: + contents: read jobs: lint: runs-on: ubuntu-22.04 name: lint with isort, Black & flake8 steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 - name: "Prepare: restore caches, install Poetry, set up Python" uses: ./.github/actions/prepare with: - python-version: "3.10" + python-version: "3.11" poetry-version: ${{ env.POETRY_VERSION }} - name: Install Python dev dependencies run: | @@ -40,12 +42,12 @@ jobs: runs-on: ubuntu-22.04 name: check CLI startup time steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 - name: "Prepare: restore caches, install Poetry, set up Python" id: prepare uses: ./.github/actions/prepare with: - python-version: "3.9" + python-version: "3.10" poetry-version: ${{ env.POETRY_VERSION }} - name: Install Python dependencies run: | @@ -59,10 +61,10 @@ jobs: timeout-minutes: 15 strategy: matrix: - python-version: ["3.8", "3.9", "3.10"] + python-version: ["3.9", "3.10", "3.11", "3.12"] name: test on Python ${{ matrix.python-version }} steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 - name: Install system packages run: | sudo apt-get install \ @@ -77,32 +79,42 @@ jobs: - name: Install Python dependencies run: | # Selectively install the optional dependencies for some Python versions - # For Python 3.8: - if [[ ${{ matrix.python-version }} == '3.8' ]]; then - poetry install -E "nn omikuji yake voikko stwfsa"; - fi # For Python 3.9: if [[ ${{ matrix.python-version }} == '3.9' ]]; then + poetry install -E "nn omikuji yake voikko stwfsa"; + fi + # For Python 3.10: + if [[ ${{ matrix.python-version }} == '3.10' ]]; then poetry install -E "fasttext spacy"; # download the small English pretrained spaCy model needed by spacy analyzer poetry run python -m spacy download en_core_web_sm --upgrade-strategy only-if-needed fi - # For Python 3.10: - if [[ ${{ matrix.python-version }} == '3.10' ]]; then - poetry install -E "nn omikuji yake stwfsa"; + # For Python 3.11: + if [[ ${{ matrix.python-version }} == '3.11' ]]; then + poetry install -E "nn fasttext yake stwfsa voikko spacy"; + # download the small English pretrained spaCy model needed by spacy analyzer + poetry run python -m spacy download en_core_web_sm --upgrade-strategy only-if-needed + fi + # For Python 3.12: + if [[ ${{ matrix.python-version }} == '3.12' ]]; then + poetry install -E "fasttext yake voikko spacy"; + # download the small English pretrained spaCy model needed by spacy analyzer + poetry run python -m spacy download en_core_web_sm --upgrade-strategy only-if-needed fi poetry run python -m nltk.downloader punkt - name: Test with pytest run: | poetry run pytest --cov=./ --cov-report xml - if [[ ${{ matrix.python-version }} == '3.9' ]]; then + if [[ ${{ matrix.python-version }} == '3.10' ]]; then poetry run pytest --cov=./ --cov-report xml --cov-append -m slow fi - name: Upload coverage to Codecov - uses: codecov/codecov-action@81cd2dc8148241f03f5839d295e000b8f761e378 # v3.1.0 + uses: codecov/codecov-action@c16abc29c95fcf9174b58eb7e1abf4c866893bc8 # v4.1.1 + env: + CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }} - name: Save cache if: steps.prepare.outputs.cache-matched-key != format('poetry-installation-and-cache-{0}-{1}-{2}', matrix.python-version, env.POETRY_VERSION, hashFiles('**/poetry.lock')) - uses: actions/cache/save@v3 + uses: actions/cache/save@0c45773b623bea8c8e75f6c82b208c3cf94ea4f9 # v4.0.2 with: path: | ~/.cache/pipx/venvs @@ -117,7 +129,7 @@ jobs: timeout-minutes: 15 steps: - name: "Build image for testing" - uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3.2.0 + uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 with: push: false tags: test-image @@ -133,20 +145,20 @@ jobs: if: github.event_name == 'push' && github.ref == 'refs/heads/main' steps: - name: Login to Quay.io - uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a # v2.1.0 + uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # v3.1.0 with: registry: quay.io username: ${{ secrets.YHTEENTOIMIVUUSPALVELUT_QUAY_IO_USERNAME }} password: ${{ secrets.YHTEENTOIMIVUUSPALVELUT_QUAY_IO_PASSWORD }} - name: Docker meta id: meta - uses: docker/metadata-action@57396166ad8aefe6098280995947635806a0e6ea # v4.1.1 + uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 with: images: quay.io/natlibfi/annif tags: | latest - name: Build and push to Quay.io - uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3.2.0 + uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 with: push: true tags: ${{ steps.meta.outputs.tags }} @@ -158,7 +170,7 @@ jobs: runs-on: ubuntu-22.04 if: github.event_name == 'push' && contains(github.ref, 'refs/tags/') steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 - name: "Prepare: restore caches, install Poetry, set up Python" uses: ./.github/actions/prepare with: @@ -172,14 +184,14 @@ jobs: poetry publish --build - name: Login to Quay.io - uses: docker/login-action@f4ef78c080cd8ba55a85445d5b36e214a81df20a # v2.1.0 + uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # v3.1.0 with: registry: quay.io username: ${{ secrets.YHTEENTOIMIVUUSPALVELUT_QUAY_IO_USERNAME }} password: ${{ secrets.YHTEENTOIMIVUUSPALVELUT_QUAY_IO_PASSWORD }} - name: Docker meta id: meta - uses: docker/metadata-action@57396166ad8aefe6098280995947635806a0e6ea # v4.1.1 + uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 with: images: quay.io/natlibfi/annif tags: | @@ -187,7 +199,7 @@ jobs: type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} - name: Build and push to Quay.io - uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3.2.0 + uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 with: push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 05cc39b7e..6eacc6fdd 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -8,6 +8,9 @@ on: schedule: - cron: "22 11 * * 6" +permissions: + contents: read + jobs: analyze: name: Analyze @@ -24,18 +27,18 @@ jobs: steps: - name: Checkout - uses: actions/checkout@v3 + uses: actions/checkout@9bb56186c3b09b4f86b1c65136769dd318469633 # v4.1.2 - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@1b1aada464948af03b950897e5eb522f92603cc2 # v3.24.9 with: languages: ${{ matrix.language }} queries: +security-and-quality - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@1b1aada464948af03b950897e5eb522f92603cc2 # v3.24.9 - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@1b1aada464948af03b950897e5eb522f92603cc2 # v3.24.9 with: category: "/language:${{ matrix.language }}" diff --git a/.github/workflows/docker-rebuild.yml b/.github/workflows/docker-rebuild.yml index 2a3f53b79..2b4301bb0 100644 --- a/.github/workflows/docker-rebuild.yml +++ b/.github/workflows/docker-rebuild.yml @@ -1,5 +1,8 @@ name: "Docker rebuild" on: workflow_dispatch +permissions: + contents: read + jobs: rebuild-docker-images: name: "Docker rebuild" @@ -7,7 +10,7 @@ jobs: timeout-minutes: 15 steps: - name: "Build for testing" - uses: docker/build-push-action@c56af957549030174b10d6867f20e78cfd7debc5 # v3.2.0 + uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 with: push: false tags: test-image @@ -15,14 +18,14 @@ jobs: run: | docker run --rm --workdir /Annif test-image pytest -p no:cacheprovider - name: Login to Quay.io - uses: docker/login-action@465a07811f14bebb1938fbed4728c6a1ff8901fc # v2.2.0 + uses: docker/login-action@e92390c5fb421da1463c202d546fed0ec5c39f20 # v3.1.0 with: registry: quay.io username: ${{ secrets.YHTEENTOIMIVUUSPALVELUT_QUAY_IO_USERNAME }} password: ${{ secrets.YHTEENTOIMIVUUSPALVELUT_QUAY_IO_PASSWORD }} - name: Docker meta id: meta - uses: docker/metadata-action@2c0bd771b40637d97bf205cbccdd294a32112176 # v4.5.0 + uses: docker/metadata-action@8e5442c4ef9f78752691e2d8f8d19755c6f78e81 # v5.5.1 with: images: quay.io/natlibfi/annif flavor: | @@ -32,7 +35,7 @@ jobs: type=semver,pattern={{version}} type=semver,pattern={{major}}.{{minor}} - name: Build and push to Quay.io - uses: docker/build-push-action@44ea916f6c540f9302d50c2b1e5a8dc071f15cdf # v4.1.0 + uses: docker/build-push-action@2cdde995de11925a030ce8070c3d77a52ffcf1c0 # v5.3.0 with: push: true tags: ${{ steps.meta.outputs.tags }} diff --git a/.scrutinizer.yml b/.scrutinizer.yml index ebf755aa4..095b59f43 100644 --- a/.scrutinizer.yml +++ b/.scrutinizer.yml @@ -4,7 +4,7 @@ checks: duplicate_code: true build: environment: - python: 3.8.12 + python: 3.9.17 dependencies: override: - pip install .[dev] diff --git a/CITATION.cff b/CITATION.cff index 71a2bd3fd..69c88f335 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -33,11 +33,11 @@ authors: affiliation: "National Library of Finland" title: "Annif" abstract: "Annif is an automatic indexing software." -version: 1.0.0-dev +version: 1.2.0-dev license: - Apache-2.0 - GPL-3.0 -date-released: 2023-04-18 +date-released: 2024-04-25 doi: 10.5281/zenodo.2578948 repository-code: "https://github.com/NatLibFi/Annif" contact: diff --git a/Dockerfile b/Dockerfile index dbd98cb07..5ea240ed9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -FROM python:3.10-slim-bullseye +FROM python:3.10-slim-bookworm LABEL org.opencontainers.image.authors="grp-natlibfi-annif@helsinki.fi" SHELL ["/bin/bash", "-c"] @@ -50,8 +50,11 @@ RUN annif completion --bash >> /etc/bash.bashrc # Enable tab completion RUN groupadd -g 998 annif_user && \ useradd -r -u 998 -g annif_user annif_user && \ chmod -R a+rX /Annif && \ - mkdir -p /Annif/tests/data && \ + mkdir -p /Annif/tests/data /Annif/projects.d && \ chown -R annif_user:annif_user /annif-projects /Annif/tests/data USER annif_user +ENV HF_HOME="/tmp" + +ENV GUNICORN_CMD_ARGS="--worker-class uvicorn.workers.UvicornWorker" CMD annif diff --git a/LICENSE.txt b/LICENSE.txt index 861a227e5..6ac606f41 100644 --- a/LICENSE.txt +++ b/LICENSE.txt @@ -1,4 +1,4 @@ -Copyright (c) 2017-2022 University Of Helsinki (The National Library Of Finland) +Copyright (c) 2017-2024 University Of Helsinki (The National Library Of Finland) Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. diff --git a/README.md b/README.md index 6d0eb5f2e..70b35a3f8 100644 --- a/README.md +++ b/README.md @@ -2,10 +2,12 @@ [![DOI](https://zenodo.org/badge/100936800.svg)](https://zenodo.org/badge/latestdoi/100936800) [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0) +[![Container image](https://img.shields.io/badge/container_image-quay.io-blue.svg)](https://quay.io/repository/natlibfi/annif) [![CI/CD](https://github.com/NatLibFi/Annif/actions/workflows/cicd.yml/badge.svg)](https://github.com/NatLibFi/Annif/actions/workflows/cicd.yml) [![codecov](https://codecov.io/gh/NatLibFi/Annif/branch/main/graph/badge.svg)](https://codecov.io/gh/NatLibFi/Annif) -[![Code Climate](https://codeclimate.com/github/NatLibFi/Annif/badges/gpa.svg)](https://codeclimate.com/github/NatLibFi/Annif) [![Scrutinizer Code Quality](https://scrutinizer-ci.com/g/NatLibFi/Annif/badges/quality-score.png?b=main)](https://scrutinizer-ci.com/g/NatLibFi/Annif/?branch=main) +[![Code Climate](https://codeclimate.com/github/NatLibFi/Annif/badges/gpa.svg)](https://codeclimate.com/github/NatLibFi/Annif) +[![OpenSSF Scorecard](https://api.securityscorecards.dev/projects/github.com/NatLibFi/Annif/badge)](https://securityscorecards.dev/viewer/?uri=github.com/NatLibFi/Annif) [![codebeat badge](https://codebeat.co/badges/7a8ef539-0094-48b8-84c2-c413b4a50d57)](https://codebeat.co/projects/github-com-natlibfi-annif-main) [![CodeQL](https://github.com/NatLibFi/Annif/actions/workflows/codeql.yml/badge.svg)](https://github.com/NatLibFi/Annif/actions/workflows/codeql.yml) [![Quality Gate Status](https://sonarcloud.io/api/project_badges/measure?project=NatLibFi_Annif&metric=alert_status)](https://sonarcloud.io/dashboard?id=NatLibFi_Annif) @@ -17,16 +19,16 @@ a statistical automated indexing tool that used metadata from the [Finna.fi](https://finna.fi) discovery interface as a training corpus. This repo contains a rewritten production version of Annif based on the -[prototype](https://github.com/osma/annif). It is a work in progress, but -already functional for many common tasks. +[prototype](https://github.com/osma/annif). -[Finto AI](https://ai.finto.fi/) is a service based on Annif; see the [source code for Finto AI](https://github.com/NatLibFi/FintoAI). +[Finto AI](https://ai.finto.fi/) is a service based on Annif; see the [source code of Finto AI](https://github.com/NatLibFi/FintoAI) +and the [🤗 Hugging Face Hub collection](https://huggingface.co/collections/NatLibFi/annif-models-65b35fb98b7c508c8e8a1570) containing the models Finto AI uses. # Basic install Annif is developed and tested on Linux. If you want to run Annif on Windows or Mac OS, the recommended way is to use Docker (see below) or a Linux virtual machine. -You will need Python 3.8+ to install Annif. +You will need Python 3.9-3.12 to install Annif. The recommended way is to install Annif from [PyPI](https://pypi.org/project/annif/) into a virtual environment. @@ -71,7 +73,7 @@ For details and usage for other shells see [Click documentation](https://click.palletsprojects.com/en/8.1.x/shell-completion/). # Docker install -You can use Annif as a pre-built Docker container. Please see the +You can use Annif as a pre-built Docker container image from [quay.io/natlibfi/annif](https://quay.io/repository/natlibfi/annif) repository. Please see the [wiki documentation](https://github.com/NatLibFi/Annif/wiki/Usage-with-Docker) for details. @@ -130,7 +132,7 @@ Many resources are available: # Publications / How to cite -Two articles about Annif have been published in peer-reviewed Open Access +See below for some articles about Annif in peer-reviewed Open Access journals. The software itself is also archived on Zenodo and has a [citable DOI](https://doi.org/10.5281/zenodo.5654173). @@ -141,6 +143,24 @@ See "Cite this repository" in the details of the repository. ## Annif articles