diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index bbed4a471..292e8d698 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -2,3 +2,5 @@ 3bc18907354a40f1d89dca1833a2719ba7fb0933 # Reorder import statements with isort 68a72c5a603283f70abce2651dcde9c6f0177c41 +# Migrate code style to Black 24 +d4dbd73fe6a91964af82fbf6e6cb8d70b77569a3 diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 716bccddc..3b2e1aa92 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -10,7 +10,7 @@ on: env: PIPX_HOME: "/home/runner/.cache/pipx" PIPX_BIN_DIR: "/home/runner/.local/bin" - POETRY_VERSION: "1.5.1" + POETRY_VERSION: "1.8.0" permissions: contents: read jobs: diff --git a/annif/analyzer/__init__.py b/annif/analyzer/__init__.py index a0f93ced3..27a2cd792 100644 --- a/annif/analyzer/__init__.py +++ b/annif/analyzer/__init__.py @@ -1,4 +1,5 @@ """Collection of language-specific analyzers and analyzer registry for Annif""" + from __future__ import annotations import re diff --git a/annif/analyzer/analyzer.py b/annif/analyzer/analyzer.py index 5ba876f9d..25bdb6b57 100644 --- a/annif/analyzer/analyzer.py +++ b/annif/analyzer/analyzer.py @@ -1,4 +1,5 @@ """Common functionality for analyzers.""" + from __future__ import annotations import abc diff --git a/annif/analyzer/simple.py b/annif/analyzer/simple.py index 4cc35e6f1..c3ff7240a 100644 --- a/annif/analyzer/simple.py +++ b/annif/analyzer/simple.py @@ -1,4 +1,5 @@ """Simple analyzer for Annif. Only folds words to lower case.""" + from __future__ import annotations from . import analyzer diff --git a/annif/analyzer/simplemma.py b/annif/analyzer/simplemma.py index e535b25de..e549a2585 100644 --- a/annif/analyzer/simplemma.py +++ b/annif/analyzer/simplemma.py @@ -1,4 +1,5 @@ """Simplemma analyzer for Annif, based on simplemma lemmatizer.""" + from __future__ import annotations import simplemma diff --git a/annif/analyzer/snowball.py b/annif/analyzer/snowball.py index 57990c2a1..1a17702eb 100644 --- a/annif/analyzer/snowball.py +++ b/annif/analyzer/snowball.py @@ -1,4 +1,5 @@ """Snowball analyzer for Annif, based on nltk Snowball stemmer.""" + from __future__ import annotations import functools diff --git a/annif/analyzer/spacy.py b/annif/analyzer/spacy.py index b5e9cbc55..184f03ffc 100644 --- a/annif/analyzer/spacy.py +++ b/annif/analyzer/spacy.py @@ -1,4 +1,5 @@ """spaCy analyzer for Annif which uses spaCy for lemmatization""" + from __future__ import annotations import annif.util diff --git a/annif/analyzer/voikko.py b/annif/analyzer/voikko.py index e6e693d65..b3e7d5007 100644 --- a/annif/analyzer/voikko.py +++ b/annif/analyzer/voikko.py @@ -1,4 +1,5 @@ """Voikko analyzer for Annif, based on libvoikko library.""" + from __future__ import annotations import functools diff --git a/annif/backend/__init__.py b/annif/backend/__init__.py index cbeeb648e..7be1264b4 100644 --- a/annif/backend/__init__.py +++ b/annif/backend/__init__.py @@ -1,4 +1,5 @@ """Registry of backend types for Annif""" + from __future__ import annotations from typing import TYPE_CHECKING, Type diff --git a/annif/backend/backend.py b/annif/backend/backend.py index be49e79fa..69f730d5d 100644 --- a/annif/backend/backend.py +++ b/annif/backend/backend.py @@ -1,4 +1,5 @@ """Common functionality for backends.""" + from __future__ import annotations import abc diff --git a/annif/backend/dummy.py b/annif/backend/dummy.py index bb52e22e6..d10ce8d16 100644 --- a/annif/backend/dummy.py +++ b/annif/backend/dummy.py @@ -1,4 +1,5 @@ """Dummy backend for testing basic interaction of projects and backends""" + from __future__ import annotations from typing import TYPE_CHECKING, Any diff --git a/annif/backend/ensemble.py b/annif/backend/ensemble.py index 9953c9e6c..a25e8a03f 100644 --- a/annif/backend/ensemble.py +++ b/annif/backend/ensemble.py @@ -1,4 +1,5 @@ """Ensemble backend that combines results from multiple projects""" + from __future__ import annotations from typing import TYPE_CHECKING, Any diff --git a/annif/backend/fasttext.py b/annif/backend/fasttext.py index 23c33539a..e102b02ba 100644 --- a/annif/backend/fasttext.py +++ b/annif/backend/fasttext.py @@ -1,4 +1,5 @@ """Annif backend using the fastText classifier""" + from __future__ import annotations import collections diff --git a/annif/backend/http.py b/annif/backend/http.py index 0fce7f8e4..9036ec152 100644 --- a/annif/backend/http.py +++ b/annif/backend/http.py @@ -1,5 +1,6 @@ """HTTP/REST client backend that makes calls to a web service and returns the results""" + from __future__ import annotations import importlib diff --git a/annif/backend/hyperopt.py b/annif/backend/hyperopt.py index 2c2e7422c..efbc10513 100644 --- a/annif/backend/hyperopt.py +++ b/annif/backend/hyperopt.py @@ -1,4 +1,5 @@ """Hyperparameter optimization functionality for backends""" + from __future__ import annotations import abc diff --git a/annif/backend/mixins.py b/annif/backend/mixins.py index 2bb094641..e4af03d26 100644 --- a/annif/backend/mixins.py +++ b/annif/backend/mixins.py @@ -1,4 +1,5 @@ """Annif backend mixins that can be used to implement features""" + from __future__ import annotations import abc diff --git a/annif/backend/mllm.py b/annif/backend/mllm.py index cbcef11b1..cd274cf65 100644 --- a/annif/backend/mllm.py +++ b/annif/backend/mllm.py @@ -1,4 +1,5 @@ """Maui-like Lexical Matching backend""" + from __future__ import annotations import os.path diff --git a/annif/backend/nn_ensemble.py b/annif/backend/nn_ensemble.py index d4bc09058..e1daab00a 100644 --- a/annif/backend/nn_ensemble.py +++ b/annif/backend/nn_ensemble.py @@ -1,5 +1,6 @@ """Neural network based ensemble backend that combines results from multiple projects.""" + from __future__ import annotations import os.path diff --git a/annif/backend/omikuji.py b/annif/backend/omikuji.py index 7a2e6a1bb..89d8f0ea9 100644 --- a/annif/backend/omikuji.py +++ b/annif/backend/omikuji.py @@ -1,4 +1,5 @@ """Annif backend using the Omikuji classifier""" + from __future__ import annotations import os.path diff --git a/annif/backend/pav.py b/annif/backend/pav.py index 61f4362d1..2ee30337a 100644 --- a/annif/backend/pav.py +++ b/annif/backend/pav.py @@ -2,6 +2,7 @@ learns which concept suggestions from each backend are trustworthy using the PAV algorithm, a.k.a. isotonic regression, to turn raw scores returned by individual backends into probabilities.""" + from __future__ import annotations import os.path @@ -69,13 +70,15 @@ def _merge_source_batches( reg_models = self._get_model(project_id) pav_batch = [ [ - SubjectSuggestion( - subject_id=sugg.subject_id, - score=reg_models[sugg.subject_id].predict([sugg.score])[0], - ) - if sugg.subject_id in reg_models - else SubjectSuggestion( - subject_id=sugg.subject_id, score=sugg.score + ( + SubjectSuggestion( + subject_id=sugg.subject_id, + score=reg_models[sugg.subject_id].predict([sugg.score])[0], + ) + if sugg.subject_id in reg_models + else SubjectSuggestion( + subject_id=sugg.subject_id, score=sugg.score + ) ) # default to raw score for sugg in result ] diff --git a/annif/backend/svc.py b/annif/backend/svc.py index 30fb23eb4..c18f1410b 100644 --- a/annif/backend/svc.py +++ b/annif/backend/svc.py @@ -1,4 +1,5 @@ """Annif backend using a SVM classifier""" + from __future__ import annotations import os.path diff --git a/annif/backend/tfidf.py b/annif/backend/tfidf.py index 1cca639ca..a77704446 100644 --- a/annif/backend/tfidf.py +++ b/annif/backend/tfidf.py @@ -1,5 +1,6 @@ """Backend that returns most similar subjects based on similarity in sparse TF-IDF normalized bag-of-words vector space""" + from __future__ import annotations import os.path diff --git a/annif/backend/yake.py b/annif/backend/yake.py index 2eeef9a9c..c8b933c9b 100644 --- a/annif/backend/yake.py +++ b/annif/backend/yake.py @@ -1,4 +1,5 @@ """Annif backend using Yake keyword extraction""" + # For license remarks of this backend see README.md: # https://github.com/NatLibFi/Annif#license. from __future__ import annotations diff --git a/annif/cli.py b/annif/cli.py index 73f18f02e..d8ca1ea56 100644 --- a/annif/cli.py +++ b/annif/cli.py @@ -1,7 +1,6 @@ """Definitions for command-line (Click) commands for invoking Annif operations and printing the results to console.""" - import collections import importlib import json diff --git a/annif/cli_util.py b/annif/cli_util.py index bbfa96df4..9f33f8153 100644 --- a/annif/cli_util.py +++ b/annif/cli_util.py @@ -1,4 +1,5 @@ """Utility functions for Annif CLI commands""" + from __future__ import annotations import collections diff --git a/annif/config.py b/annif/config.py index 810249bd6..8cdc7d04f 100644 --- a/annif/config.py +++ b/annif/config.py @@ -1,4 +1,5 @@ """Configuration file handling""" + from __future__ import annotations import configparser diff --git a/annif/corpus/__init__.py b/annif/corpus/__init__.py index 4200d2f87..fb0ceef86 100644 --- a/annif/corpus/__init__.py +++ b/annif/corpus/__init__.py @@ -1,6 +1,5 @@ """Annif corpus operations""" - from .combine import CombinedCorpus from .document import ( DocumentDirectory, diff --git a/annif/corpus/combine.py b/annif/corpus/combine.py index 75fcc7f55..a0ad02147 100644 --- a/annif/corpus/combine.py +++ b/annif/corpus/combine.py @@ -1,4 +1,5 @@ """Class for combining multiple corpora so they behave like a single corpus""" + from __future__ import annotations import itertools diff --git a/annif/corpus/document.py b/annif/corpus/document.py index 09a80a309..8d4ef5319 100644 --- a/annif/corpus/document.py +++ b/annif/corpus/document.py @@ -1,4 +1,5 @@ """Clases for supporting document corpora""" + from __future__ import annotations import glob diff --git a/annif/corpus/skos.py b/annif/corpus/skos.py index 462a35241..82a8c5c13 100644 --- a/annif/corpus/skos.py +++ b/annif/corpus/skos.py @@ -1,4 +1,5 @@ """Support for subjects loaded from a SKOS/RDF file""" + from __future__ import annotations import collections @@ -83,11 +84,15 @@ def languages(self) -> set[str]: def _concept_labels(self, concept: URIRef) -> dict[str, str]: by_lang = self.get_concept_labels(concept, self.PREF_LABEL_PROPERTIES) return { - lang: by_lang[lang][0] - if by_lang[lang] # correct lang - else by_lang[None][0] - if by_lang[None] # no language - else self.graph.namespace_manager.qname(concept) + lang: ( + by_lang[lang][0] + if by_lang[lang] # correct lang + else ( + by_lang[None][0] + if by_lang[None] # no language + else self.graph.namespace_manager.qname(concept) + ) + ) for lang in self.languages } diff --git a/annif/corpus/subject.py b/annif/corpus/subject.py index a9ee06397..2e3ffe5de 100644 --- a/annif/corpus/subject.py +++ b/annif/corpus/subject.py @@ -1,4 +1,5 @@ """Classes for supporting subject corpora expressed as directories or files""" + from __future__ import annotations import csv diff --git a/annif/corpus/types.py b/annif/corpus/types.py index e6cd4b252..e94a8c1d9 100644 --- a/annif/corpus/types.py +++ b/annif/corpus/types.py @@ -1,4 +1,5 @@ """Basic types for document and subject corpora""" + from __future__ import annotations import abc diff --git a/annif/datadir.py b/annif/datadir.py index 752da32dd..02987490d 100644 --- a/annif/datadir.py +++ b/annif/datadir.py @@ -1,4 +1,5 @@ """Mixin class for types that need a data directory""" + from __future__ import annotations import os diff --git a/annif/eval.py b/annif/eval.py index 5ec5bd17a..95cdf449d 100644 --- a/annif/eval.py +++ b/annif/eval.py @@ -1,4 +1,5 @@ """Evaluation metrics for Annif""" + from __future__ import annotations import warnings @@ -63,10 +64,10 @@ def ndcg_score(y_true: csr_array, y_pred: csr_array, limit: int | None = None) - scores = np.ones(y_true.shape[0], dtype=np.float32) for i in range(y_true.shape[0]): - true = y_true.getrow(i) + true = y_true[[i]] idcg = dcg_score(true, true, limit) if idcg > 0: - pred = y_pred.getrow(i) + pred = y_pred[[i]] dcg = dcg_score(true, pred, limit) scores[i] = dcg / idcg @@ -86,9 +87,9 @@ def __init__(self, subject_index: SubjectIndex) -> None: def evaluate_many( self, - suggestion_batch: list[list[SubjectSuggestion]] - | SuggestionBatch - | list[Iterator], + suggestion_batch: ( + list[list[SubjectSuggestion]] | SuggestionBatch | list[Iterator] + ), gold_subject_batch: Sequence[SubjectSet], ) -> None: if not isinstance(suggestion_batch, SuggestionBatch): diff --git a/annif/exception.py b/annif/exception.py index b4b9c6552..9132d0134 100644 --- a/annif/exception.py +++ b/annif/exception.py @@ -1,4 +1,5 @@ """Custom exceptions used by Annif""" + from __future__ import annotations from click import ClickException diff --git a/annif/lexical/mllm.py b/annif/lexical/mllm.py index ff8bc5894..b475d2975 100644 --- a/annif/lexical/mllm.py +++ b/annif/lexical/mllm.py @@ -1,4 +1,5 @@ """MLLM (Maui-like Lexical Matchin) model for Annif""" + from __future__ import annotations import collections diff --git a/annif/lexical/tokenset.py b/annif/lexical/tokenset.py index 07c15705d..d577527ad 100644 --- a/annif/lexical/tokenset.py +++ b/annif/lexical/tokenset.py @@ -1,4 +1,5 @@ """Index for fast matching of token sets.""" + from __future__ import annotations import collections diff --git a/annif/lexical/util.py b/annif/lexical/util.py index 28d21a141..ca3cb8fb3 100644 --- a/annif/lexical/util.py +++ b/annif/lexical/util.py @@ -1,4 +1,5 @@ """Utility methods for lexical algorithms""" + from __future__ import annotations import collections diff --git a/annif/openapi/validation.py b/annif/openapi/validation.py index 7f920b35d..e57d6830d 100644 --- a/annif/openapi/validation.py +++ b/annif/openapi/validation.py @@ -1,4 +1,5 @@ """Custom validator for the Annif API.""" + from __future__ import annotations import logging diff --git a/annif/parallel.py b/annif/parallel.py index c6b293f8e..2fead01b9 100644 --- a/annif/parallel.py +++ b/annif/parallel.py @@ -1,4 +1,5 @@ """Parallel processing functionality for Annif""" + from __future__ import annotations import multiprocessing diff --git a/annif/project.py b/annif/project.py index 83f7eda7c..3a4aa5566 100644 --- a/annif/project.py +++ b/annif/project.py @@ -1,4 +1,5 @@ """Project management functionality for Annif""" + from __future__ import annotations import enum diff --git a/annif/registry.py b/annif/registry.py index a97dc366e..e12b30e72 100644 --- a/annif/registry.py +++ b/annif/registry.py @@ -1,4 +1,5 @@ """Registry that keeps track of Annif projects""" + from __future__ import annotations import re diff --git a/annif/rest.py b/annif/rest.py index f848117c8..f8d9f6d7e 100644 --- a/annif/rest.py +++ b/annif/rest.py @@ -1,5 +1,6 @@ """Definitions for REST API operations. These are wired via Connexion to methods defined in the OpenAPI specification.""" + from __future__ import annotations import importlib diff --git a/annif/suggestion.py b/annif/suggestion.py index ddf3ec2e5..fd9eb48cc 100644 --- a/annif/suggestion.py +++ b/annif/suggestion.py @@ -1,4 +1,5 @@ """Representing suggested subjects.""" + from __future__ import annotations import collections @@ -38,7 +39,7 @@ def filter_suggestion( data, rows, cols = [], [], [] for row in range(preds.shape[0]): - arow = preds.getrow(row) + arow = preds[[row]] if limit is not None and limit < len(arow.data): topk_idx = arow.data.argpartition(-limit)[-limit:] else: diff --git a/annif/transform/__init__.py b/annif/transform/__init__.py index 716d874a2..680b02cc0 100644 --- a/annif/transform/__init__.py +++ b/annif/transform/__init__.py @@ -1,4 +1,5 @@ """Functionality for obtaining text transformation from string specification""" + from __future__ import annotations import re diff --git a/annif/transform/inputlimiter.py b/annif/transform/inputlimiter.py index 229766864..d57c6a56c 100644 --- a/annif/transform/inputlimiter.py +++ b/annif/transform/inputlimiter.py @@ -1,5 +1,6 @@ """A simple transformation that truncates the text of input documents to a given character length.""" + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/annif/transform/langfilter.py b/annif/transform/langfilter.py index 6794eb370..f7c985485 100644 --- a/annif/transform/langfilter.py +++ b/annif/transform/langfilter.py @@ -1,5 +1,6 @@ """Transformation filtering out parts of a text that are in a language different from the language of the project.""" + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/annif/transform/transform.py b/annif/transform/transform.py index db71fef37..9a96f2877 100644 --- a/annif/transform/transform.py +++ b/annif/transform/transform.py @@ -1,4 +1,5 @@ """Common functionality for transforming text of input documents.""" + from __future__ import annotations import abc diff --git a/annif/util.py b/annif/util.py index 1702c2e4b..b03c63ec2 100644 --- a/annif/util.py +++ b/annif/util.py @@ -1,4 +1,5 @@ """Utility functions for Annif""" + from __future__ import annotations import glob diff --git a/annif/vocab.py b/annif/vocab.py index 333fa0d69..7f7e125c4 100644 --- a/annif/vocab.py +++ b/annif/vocab.py @@ -1,4 +1,5 @@ """Vocabulary management functionality for Annif""" + from __future__ import annotations import os.path diff --git a/pyproject.toml b/pyproject.toml index 6a06219b4..ca2a30d1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,19 +18,19 @@ keywords = [ "text-classification", "rest-api", "code4lib", - "subject-indexing" + "subject-indexing", ] -classifiers=[ +classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent" + "Operating System :: OS Independent", ] [tool.poetry.dependencies] python = ">=3.9,<3.12" -connexion = {version = "2.14.2", extras = ["swagger-ui"]} +connexion = { version = "2.14.2", extras = ["swagger-ui"] } flask = "2.2.*" flask-cors = "4.0.*" click = "8.1.*" @@ -38,36 +38,36 @@ click-log = "0.4.*" joblib = "1.3.*" nltk = "3.8.*" gensim = "4.3.*" -scikit-learn = "1.3.*" -scipy = "1.10.*" -rdflib = "6.3.*" +scikit-learn = "1.4.*" +scipy = "1.12.*" +rdflib = "7.0.*" gunicorn = "21.2.*" -numpy = "1.24.*" -optuna = "3.3.*" +numpy = "1.26.*" +optuna = "3.6.*" python-dateutil = "2.8.*" tomli = { version = "2.0.*", python = "<3.11" } simplemma = "0.9.*" -jsonschema = "4.17.*" +jsonschema = "4.21.*" -fasttext-wheel = {version = "0.9.2", optional = true} -voikko = {version = "0.5.*", optional = true} -tensorflow-cpu = {version = "2.13.*", optional = true} -lmdb = {version = "1.4.1", optional = true} -omikuji = {version = "0.5.*", optional = true} -yake = {version = "0.4.5", optional = true} -spacy = {version = "3.6.*", optional = true} -stwfsapy = {version="0.3.*", optional = true} +fasttext-wheel = { version = "0.9.2", optional = true } +voikko = { version = "0.5.*", optional = true } +tensorflow-cpu = { version = "2.15.*", optional = true } +lmdb = { version = "1.4.1", optional = true } +omikuji = { version = "0.5.*", optional = true } +yake = { version = "0.4.8", optional = true } +spacy = { version = "3.7.*", optional = true } +stwfsapy = { version = "0.4.*", optional = true } [tool.poetry.dev-dependencies] py = "*" -pytest = "7.*" +pytest = "8.*" requests = "*" pytest-cov = "*" pytest-watch = "*" pytest-flask = "*" flake8 = "*" bumpversion = "*" -black = "23.*" +black = "24.*" isort = "*" schemathesis = "3.*.*" @@ -93,7 +93,5 @@ line_length = "88" skip_gitignore = true [tool.pytest.ini_options] -markers = [ - "slow: marks tests as slow (deselect with '-m \"not slow\"')", -] +markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"] addopts = "-m 'not slow'" diff --git a/tests/test_vocab_csv.py b/tests/test_vocab_csv.py index 0bc3820dd..bdb66e25c 100644 --- a/tests/test_vocab_csv.py +++ b/tests/test_vocab_csv.py @@ -1,6 +1,5 @@ """Unit tests for CSV vocabulary functionality in Annif""" - from annif.corpus import SubjectFileCSV, SubjectIndex diff --git a/tests/test_vocab_skos.py b/tests/test_vocab_skos.py index a3c1e508c..fd93ca6d1 100644 --- a/tests/test_vocab_skos.py +++ b/tests/test_vocab_skos.py @@ -1,6 +1,5 @@ """Unit tests for SKOS vocabulary functionality in Annif""" - import os.path from annif.corpus.skos import SubjectFileSKOS diff --git a/tests/test_vocab_tsv.py b/tests/test_vocab_tsv.py index 1fc318e0f..4d0956d5a 100644 --- a/tests/test_vocab_tsv.py +++ b/tests/test_vocab_tsv.py @@ -1,6 +1,5 @@ """Unit tests for TSV vocabulary functionality in Annif""" - from annif.corpus import SubjectFileTSV, SubjectIndex