From 092aeceaa1ec0fba2e5243955d8b691a146606fe Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 12:11:20 +0200 Subject: [PATCH 01/17] Upgrade to jsonschema 4.21.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 6a06219b4..270202c14 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -47,7 +47,7 @@ optuna = "3.3.*" python-dateutil = "2.8.*" tomli = { version = "2.0.*", python = "<3.11" } simplemma = "0.9.*" -jsonschema = "4.17.*" +jsonschema = "4.21.*" fasttext-wheel = {version = "0.9.2", optional = true} voikko = {version = "0.5.*", optional = true} From f37d601ec1d07561e6015768acb7546c4761f075 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 12:15:27 +0200 Subject: [PATCH 02/17] Upgrade to optuna 3.5.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 270202c14..f836f1c83 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ scipy = "1.10.*" rdflib = "6.3.*" gunicorn = "21.2.*" numpy = "1.24.*" -optuna = "3.3.*" +optuna = "3.5.*" python-dateutil = "2.8.*" tomli = { version = "2.0.*", python = "<3.11" } simplemma = "0.9.*" From 14b320d2c4bc64fdb5462b395e79334c6db80a17 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 14:38:36 +0200 Subject: [PATCH 03/17] Upgrade to numpy 1.26.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f836f1c83..e7a14619e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -42,7 +42,7 @@ scikit-learn = "1.3.*" scipy = "1.10.*" rdflib = "6.3.*" gunicorn = "21.2.*" -numpy = "1.24.*" +numpy = "1.26.*" optuna = "3.5.*" python-dateutil = "2.8.*" tomli = { version = "2.0.*", python = "<3.11" } From 4268d7668ea55b4383d55b316fee57c6d7a80ba0 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 14:39:24 +0200 Subject: [PATCH 04/17] Upgrade to tensorflow 2.15.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e7a14619e..f55e03256 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -51,7 +51,7 @@ jsonschema = "4.21.*" fasttext-wheel = {version = "0.9.2", optional = true} voikko = {version = "0.5.*", optional = true} -tensorflow-cpu = {version = "2.13.*", optional = true} +tensorflow-cpu = {version = "2.15.*", optional = true} lmdb = {version = "1.4.1", optional = true} omikuji = {version = "0.5.*", optional = true} yake = {version = "0.4.5", optional = true} From f5fa9492723c5bfe3d65eed5e053e03f782b4a65 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 14:42:39 +0200 Subject: [PATCH 05/17] Upgrade to scipy 1.12.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index f55e03256..bd5750ead 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -39,7 +39,7 @@ joblib = "1.3.*" nltk = "3.8.*" gensim = "4.3.*" scikit-learn = "1.3.*" -scipy = "1.10.*" +scipy = "1.12.*" rdflib = "6.3.*" gunicorn = "21.2.*" numpy = "1.26.*" From d50ee8231533da25acc4fbea0de354b0fcdfb10b Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 14:46:53 +0200 Subject: [PATCH 06/17] Upgrade to spacy 3.7.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index bd5750ead..3ad90f1d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -55,7 +55,7 @@ tensorflow-cpu = {version = "2.15.*", optional = true} lmdb = {version = "1.4.1", optional = true} omikuji = {version = "0.5.*", optional = true} yake = {version = "0.4.5", optional = true} -spacy = {version = "3.6.*", optional = true} +spacy = {version = "3.7.*", optional = true} stwfsapy = {version="0.3.*", optional = true} [tool.poetry.dev-dependencies] From 3b4a22ec9cee1d4150a87d5a55476917886e1cea Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 14:51:30 +0200 Subject: [PATCH 07/17] Upgrade to pytest 8.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3ad90f1d3..b72380ac8 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -60,7 +60,7 @@ stwfsapy = {version="0.3.*", optional = true} [tool.poetry.dev-dependencies] py = "*" -pytest = "7.*" +pytest = "8.*" requests = "*" pytest-cov = "*" pytest-watch = "*" From 71b79b0d21e6dcb271344fadd99e86a266b3eb7a Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 16:04:05 +0200 Subject: [PATCH 08/17] Upgrade to yake 0.4.8 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index b72380ac8..dcfb6edb3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -54,7 +54,7 @@ voikko = {version = "0.5.*", optional = true} tensorflow-cpu = {version = "2.15.*", optional = true} lmdb = {version = "1.4.1", optional = true} omikuji = {version = "0.5.*", optional = true} -yake = {version = "0.4.5", optional = true} +yake = {version = "0.4.8", optional = true} spacy = {version = "3.7.*", optional = true} stwfsapy = {version="0.3.*", optional = true} From 888a17e0aeea0bf92b9fd04cb585383b51c8d35c Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Fri, 23 Feb 2024 17:32:06 +0200 Subject: [PATCH 09/17] Resolve DeprecationWarning by Scipy DeprecationWarning: "getrow" is deprecated and will be removed in v1.14.0; use "X[[0]]"" instead. --- annif/eval.py | 4 ++-- annif/suggestion.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/annif/eval.py b/annif/eval.py index 5ec5bd17a..eb6d80f4a 100644 --- a/annif/eval.py +++ b/annif/eval.py @@ -63,10 +63,10 @@ def ndcg_score(y_true: csr_array, y_pred: csr_array, limit: int | None = None) - scores = np.ones(y_true.shape[0], dtype=np.float32) for i in range(y_true.shape[0]): - true = y_true.getrow(i) + true = y_true[[i]] idcg = dcg_score(true, true, limit) if idcg > 0: - pred = y_pred.getrow(i) + pred = y_pred[[i]] dcg = dcg_score(true, pred, limit) scores[i] = dcg / idcg diff --git a/annif/suggestion.py b/annif/suggestion.py index ddf3ec2e5..fd7c0c12e 100644 --- a/annif/suggestion.py +++ b/annif/suggestion.py @@ -38,7 +38,7 @@ def filter_suggestion( data, rows, cols = [], [], [] for row in range(preds.shape[0]): - arow = preds.getrow(row) + arow = preds[[row]] if limit is not None and limit < len(arow.data): topk_idx = arow.data.argpartition(-limit)[-limit:] else: From 14b8ed70e6ba955606df8961eead41ddb607ca9d Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Mon, 26 Feb 2024 14:17:08 +0200 Subject: [PATCH 10/17] Upgrade to poetry 1.8.0 in CI/CD --- .github/workflows/cicd.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/cicd.yml b/.github/workflows/cicd.yml index 716bccddc..3b2e1aa92 100644 --- a/.github/workflows/cicd.yml +++ b/.github/workflows/cicd.yml @@ -10,7 +10,7 @@ on: env: PIPX_HOME: "/home/runner/.cache/pipx" PIPX_BIN_DIR: "/home/runner/.local/bin" - POETRY_VERSION: "1.5.1" + POETRY_VERSION: "1.8.0" permissions: contents: read jobs: From 0d620dc4b81a3cb252746ea4bbaa8d131b6c9c65 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Mon, 26 Feb 2024 14:24:08 +0200 Subject: [PATCH 11/17] Upgrade to black 24.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index dcfb6edb3..2d0d29db0 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -67,7 +67,7 @@ pytest-watch = "*" pytest-flask = "*" flake8 = "*" bumpversion = "*" -black = "23.*" +black = "24.*" isort = "*" schemathesis = "3.*.*" From d4dbd73fe6a91964af82fbf6e6cb8d70b77569a3 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Mon, 26 Feb 2024 14:39:39 +0200 Subject: [PATCH 12/17] Migrate codestyle to Black v24 --- annif/analyzer/__init__.py | 1 + annif/analyzer/analyzer.py | 1 + annif/analyzer/simple.py | 1 + annif/analyzer/simplemma.py | 1 + annif/analyzer/snowball.py | 1 + annif/analyzer/spacy.py | 1 + annif/analyzer/voikko.py | 1 + annif/backend/__init__.py | 1 + annif/backend/backend.py | 1 + annif/backend/dummy.py | 1 + annif/backend/ensemble.py | 1 + annif/backend/fasttext.py | 1 + annif/backend/http.py | 1 + annif/backend/hyperopt.py | 1 + annif/backend/mixins.py | 1 + annif/backend/mllm.py | 1 + annif/backend/nn_ensemble.py | 1 + annif/backend/omikuji.py | 1 + annif/backend/pav.py | 17 ++++++++++------- annif/backend/svc.py | 1 + annif/backend/tfidf.py | 1 + annif/backend/yake.py | 1 + annif/cli.py | 1 - annif/cli_util.py | 1 + annif/config.py | 1 + annif/corpus/__init__.py | 1 - annif/corpus/combine.py | 1 + annif/corpus/document.py | 1 + annif/corpus/skos.py | 15 ++++++++++----- annif/corpus/subject.py | 1 + annif/corpus/types.py | 1 + annif/datadir.py | 1 + annif/eval.py | 7 ++++--- annif/exception.py | 1 + annif/lexical/mllm.py | 1 + annif/lexical/tokenset.py | 1 + annif/lexical/util.py | 1 + annif/openapi/validation.py | 1 + annif/parallel.py | 1 + annif/project.py | 1 + annif/registry.py | 1 + annif/rest.py | 1 + annif/suggestion.py | 1 + annif/transform/__init__.py | 1 + annif/transform/inputlimiter.py | 1 + annif/transform/langfilter.py | 1 + annif/transform/transform.py | 1 + annif/util.py | 1 + annif/vocab.py | 1 + pyproject.toml | 28 +++++++++++++--------------- tests/test_vocab_csv.py | 1 - tests/test_vocab_skos.py | 1 - tests/test_vocab_tsv.py | 1 - 53 files changed, 81 insertions(+), 35 deletions(-) diff --git a/annif/analyzer/__init__.py b/annif/analyzer/__init__.py index a0f93ced3..27a2cd792 100644 --- a/annif/analyzer/__init__.py +++ b/annif/analyzer/__init__.py @@ -1,4 +1,5 @@ """Collection of language-specific analyzers and analyzer registry for Annif""" + from __future__ import annotations import re diff --git a/annif/analyzer/analyzer.py b/annif/analyzer/analyzer.py index 5ba876f9d..25bdb6b57 100644 --- a/annif/analyzer/analyzer.py +++ b/annif/analyzer/analyzer.py @@ -1,4 +1,5 @@ """Common functionality for analyzers.""" + from __future__ import annotations import abc diff --git a/annif/analyzer/simple.py b/annif/analyzer/simple.py index 4cc35e6f1..c3ff7240a 100644 --- a/annif/analyzer/simple.py +++ b/annif/analyzer/simple.py @@ -1,4 +1,5 @@ """Simple analyzer for Annif. Only folds words to lower case.""" + from __future__ import annotations from . import analyzer diff --git a/annif/analyzer/simplemma.py b/annif/analyzer/simplemma.py index e535b25de..e549a2585 100644 --- a/annif/analyzer/simplemma.py +++ b/annif/analyzer/simplemma.py @@ -1,4 +1,5 @@ """Simplemma analyzer for Annif, based on simplemma lemmatizer.""" + from __future__ import annotations import simplemma diff --git a/annif/analyzer/snowball.py b/annif/analyzer/snowball.py index 57990c2a1..1a17702eb 100644 --- a/annif/analyzer/snowball.py +++ b/annif/analyzer/snowball.py @@ -1,4 +1,5 @@ """Snowball analyzer for Annif, based on nltk Snowball stemmer.""" + from __future__ import annotations import functools diff --git a/annif/analyzer/spacy.py b/annif/analyzer/spacy.py index b5e9cbc55..184f03ffc 100644 --- a/annif/analyzer/spacy.py +++ b/annif/analyzer/spacy.py @@ -1,4 +1,5 @@ """spaCy analyzer for Annif which uses spaCy for lemmatization""" + from __future__ import annotations import annif.util diff --git a/annif/analyzer/voikko.py b/annif/analyzer/voikko.py index e6e693d65..b3e7d5007 100644 --- a/annif/analyzer/voikko.py +++ b/annif/analyzer/voikko.py @@ -1,4 +1,5 @@ """Voikko analyzer for Annif, based on libvoikko library.""" + from __future__ import annotations import functools diff --git a/annif/backend/__init__.py b/annif/backend/__init__.py index cbeeb648e..7be1264b4 100644 --- a/annif/backend/__init__.py +++ b/annif/backend/__init__.py @@ -1,4 +1,5 @@ """Registry of backend types for Annif""" + from __future__ import annotations from typing import TYPE_CHECKING, Type diff --git a/annif/backend/backend.py b/annif/backend/backend.py index 6a63c86b2..84465e460 100644 --- a/annif/backend/backend.py +++ b/annif/backend/backend.py @@ -1,4 +1,5 @@ """Common functionality for backends.""" + from __future__ import annotations import abc diff --git a/annif/backend/dummy.py b/annif/backend/dummy.py index bb52e22e6..d10ce8d16 100644 --- a/annif/backend/dummy.py +++ b/annif/backend/dummy.py @@ -1,4 +1,5 @@ """Dummy backend for testing basic interaction of projects and backends""" + from __future__ import annotations from typing import TYPE_CHECKING, Any diff --git a/annif/backend/ensemble.py b/annif/backend/ensemble.py index 9953c9e6c..a25e8a03f 100644 --- a/annif/backend/ensemble.py +++ b/annif/backend/ensemble.py @@ -1,4 +1,5 @@ """Ensemble backend that combines results from multiple projects""" + from __future__ import annotations from typing import TYPE_CHECKING, Any diff --git a/annif/backend/fasttext.py b/annif/backend/fasttext.py index 23c33539a..e102b02ba 100644 --- a/annif/backend/fasttext.py +++ b/annif/backend/fasttext.py @@ -1,4 +1,5 @@ """Annif backend using the fastText classifier""" + from __future__ import annotations import collections diff --git a/annif/backend/http.py b/annif/backend/http.py index 0fce7f8e4..9036ec152 100644 --- a/annif/backend/http.py +++ b/annif/backend/http.py @@ -1,5 +1,6 @@ """HTTP/REST client backend that makes calls to a web service and returns the results""" + from __future__ import annotations import importlib diff --git a/annif/backend/hyperopt.py b/annif/backend/hyperopt.py index 2c2e7422c..efbc10513 100644 --- a/annif/backend/hyperopt.py +++ b/annif/backend/hyperopt.py @@ -1,4 +1,5 @@ """Hyperparameter optimization functionality for backends""" + from __future__ import annotations import abc diff --git a/annif/backend/mixins.py b/annif/backend/mixins.py index 2bb094641..e4af03d26 100644 --- a/annif/backend/mixins.py +++ b/annif/backend/mixins.py @@ -1,4 +1,5 @@ """Annif backend mixins that can be used to implement features""" + from __future__ import annotations import abc diff --git a/annif/backend/mllm.py b/annif/backend/mllm.py index cbcef11b1..cd274cf65 100644 --- a/annif/backend/mllm.py +++ b/annif/backend/mllm.py @@ -1,4 +1,5 @@ """Maui-like Lexical Matching backend""" + from __future__ import annotations import os.path diff --git a/annif/backend/nn_ensemble.py b/annif/backend/nn_ensemble.py index d4bc09058..e1daab00a 100644 --- a/annif/backend/nn_ensemble.py +++ b/annif/backend/nn_ensemble.py @@ -1,5 +1,6 @@ """Neural network based ensemble backend that combines results from multiple projects.""" + from __future__ import annotations import os.path diff --git a/annif/backend/omikuji.py b/annif/backend/omikuji.py index 7a2e6a1bb..89d8f0ea9 100644 --- a/annif/backend/omikuji.py +++ b/annif/backend/omikuji.py @@ -1,4 +1,5 @@ """Annif backend using the Omikuji classifier""" + from __future__ import annotations import os.path diff --git a/annif/backend/pav.py b/annif/backend/pav.py index 61f4362d1..2ee30337a 100644 --- a/annif/backend/pav.py +++ b/annif/backend/pav.py @@ -2,6 +2,7 @@ learns which concept suggestions from each backend are trustworthy using the PAV algorithm, a.k.a. isotonic regression, to turn raw scores returned by individual backends into probabilities.""" + from __future__ import annotations import os.path @@ -69,13 +70,15 @@ def _merge_source_batches( reg_models = self._get_model(project_id) pav_batch = [ [ - SubjectSuggestion( - subject_id=sugg.subject_id, - score=reg_models[sugg.subject_id].predict([sugg.score])[0], - ) - if sugg.subject_id in reg_models - else SubjectSuggestion( - subject_id=sugg.subject_id, score=sugg.score + ( + SubjectSuggestion( + subject_id=sugg.subject_id, + score=reg_models[sugg.subject_id].predict([sugg.score])[0], + ) + if sugg.subject_id in reg_models + else SubjectSuggestion( + subject_id=sugg.subject_id, score=sugg.score + ) ) # default to raw score for sugg in result ] diff --git a/annif/backend/svc.py b/annif/backend/svc.py index 30fb23eb4..c18f1410b 100644 --- a/annif/backend/svc.py +++ b/annif/backend/svc.py @@ -1,4 +1,5 @@ """Annif backend using a SVM classifier""" + from __future__ import annotations import os.path diff --git a/annif/backend/tfidf.py b/annif/backend/tfidf.py index 1cca639ca..a77704446 100644 --- a/annif/backend/tfidf.py +++ b/annif/backend/tfidf.py @@ -1,5 +1,6 @@ """Backend that returns most similar subjects based on similarity in sparse TF-IDF normalized bag-of-words vector space""" + from __future__ import annotations import os.path diff --git a/annif/backend/yake.py b/annif/backend/yake.py index 2eeef9a9c..c8b933c9b 100644 --- a/annif/backend/yake.py +++ b/annif/backend/yake.py @@ -1,4 +1,5 @@ """Annif backend using Yake keyword extraction""" + # For license remarks of this backend see README.md: # https://github.com/NatLibFi/Annif#license. from __future__ import annotations diff --git a/annif/cli.py b/annif/cli.py index 73f18f02e..d8ca1ea56 100644 --- a/annif/cli.py +++ b/annif/cli.py @@ -1,7 +1,6 @@ """Definitions for command-line (Click) commands for invoking Annif operations and printing the results to console.""" - import collections import importlib import json diff --git a/annif/cli_util.py b/annif/cli_util.py index bbfa96df4..9f33f8153 100644 --- a/annif/cli_util.py +++ b/annif/cli_util.py @@ -1,4 +1,5 @@ """Utility functions for Annif CLI commands""" + from __future__ import annotations import collections diff --git a/annif/config.py b/annif/config.py index 810249bd6..8cdc7d04f 100644 --- a/annif/config.py +++ b/annif/config.py @@ -1,4 +1,5 @@ """Configuration file handling""" + from __future__ import annotations import configparser diff --git a/annif/corpus/__init__.py b/annif/corpus/__init__.py index 4200d2f87..fb0ceef86 100644 --- a/annif/corpus/__init__.py +++ b/annif/corpus/__init__.py @@ -1,6 +1,5 @@ """Annif corpus operations""" - from .combine import CombinedCorpus from .document import ( DocumentDirectory, diff --git a/annif/corpus/combine.py b/annif/corpus/combine.py index 75fcc7f55..a0ad02147 100644 --- a/annif/corpus/combine.py +++ b/annif/corpus/combine.py @@ -1,4 +1,5 @@ """Class for combining multiple corpora so they behave like a single corpus""" + from __future__ import annotations import itertools diff --git a/annif/corpus/document.py b/annif/corpus/document.py index 09a80a309..8d4ef5319 100644 --- a/annif/corpus/document.py +++ b/annif/corpus/document.py @@ -1,4 +1,5 @@ """Clases for supporting document corpora""" + from __future__ import annotations import glob diff --git a/annif/corpus/skos.py b/annif/corpus/skos.py index 462a35241..82a8c5c13 100644 --- a/annif/corpus/skos.py +++ b/annif/corpus/skos.py @@ -1,4 +1,5 @@ """Support for subjects loaded from a SKOS/RDF file""" + from __future__ import annotations import collections @@ -83,11 +84,15 @@ def languages(self) -> set[str]: def _concept_labels(self, concept: URIRef) -> dict[str, str]: by_lang = self.get_concept_labels(concept, self.PREF_LABEL_PROPERTIES) return { - lang: by_lang[lang][0] - if by_lang[lang] # correct lang - else by_lang[None][0] - if by_lang[None] # no language - else self.graph.namespace_manager.qname(concept) + lang: ( + by_lang[lang][0] + if by_lang[lang] # correct lang + else ( + by_lang[None][0] + if by_lang[None] # no language + else self.graph.namespace_manager.qname(concept) + ) + ) for lang in self.languages } diff --git a/annif/corpus/subject.py b/annif/corpus/subject.py index a9ee06397..2e3ffe5de 100644 --- a/annif/corpus/subject.py +++ b/annif/corpus/subject.py @@ -1,4 +1,5 @@ """Classes for supporting subject corpora expressed as directories or files""" + from __future__ import annotations import csv diff --git a/annif/corpus/types.py b/annif/corpus/types.py index e6cd4b252..e94a8c1d9 100644 --- a/annif/corpus/types.py +++ b/annif/corpus/types.py @@ -1,4 +1,5 @@ """Basic types for document and subject corpora""" + from __future__ import annotations import abc diff --git a/annif/datadir.py b/annif/datadir.py index 752da32dd..02987490d 100644 --- a/annif/datadir.py +++ b/annif/datadir.py @@ -1,4 +1,5 @@ """Mixin class for types that need a data directory""" + from __future__ import annotations import os diff --git a/annif/eval.py b/annif/eval.py index eb6d80f4a..95cdf449d 100644 --- a/annif/eval.py +++ b/annif/eval.py @@ -1,4 +1,5 @@ """Evaluation metrics for Annif""" + from __future__ import annotations import warnings @@ -86,9 +87,9 @@ def __init__(self, subject_index: SubjectIndex) -> None: def evaluate_many( self, - suggestion_batch: list[list[SubjectSuggestion]] - | SuggestionBatch - | list[Iterator], + suggestion_batch: ( + list[list[SubjectSuggestion]] | SuggestionBatch | list[Iterator] + ), gold_subject_batch: Sequence[SubjectSet], ) -> None: if not isinstance(suggestion_batch, SuggestionBatch): diff --git a/annif/exception.py b/annif/exception.py index b4b9c6552..9132d0134 100644 --- a/annif/exception.py +++ b/annif/exception.py @@ -1,4 +1,5 @@ """Custom exceptions used by Annif""" + from __future__ import annotations from click import ClickException diff --git a/annif/lexical/mllm.py b/annif/lexical/mllm.py index ff8bc5894..b475d2975 100644 --- a/annif/lexical/mllm.py +++ b/annif/lexical/mllm.py @@ -1,4 +1,5 @@ """MLLM (Maui-like Lexical Matchin) model for Annif""" + from __future__ import annotations import collections diff --git a/annif/lexical/tokenset.py b/annif/lexical/tokenset.py index 07c15705d..d577527ad 100644 --- a/annif/lexical/tokenset.py +++ b/annif/lexical/tokenset.py @@ -1,4 +1,5 @@ """Index for fast matching of token sets.""" + from __future__ import annotations import collections diff --git a/annif/lexical/util.py b/annif/lexical/util.py index 28d21a141..ca3cb8fb3 100644 --- a/annif/lexical/util.py +++ b/annif/lexical/util.py @@ -1,4 +1,5 @@ """Utility methods for lexical algorithms""" + from __future__ import annotations import collections diff --git a/annif/openapi/validation.py b/annif/openapi/validation.py index 7f920b35d..e57d6830d 100644 --- a/annif/openapi/validation.py +++ b/annif/openapi/validation.py @@ -1,4 +1,5 @@ """Custom validator for the Annif API.""" + from __future__ import annotations import logging diff --git a/annif/parallel.py b/annif/parallel.py index c6b293f8e..2fead01b9 100644 --- a/annif/parallel.py +++ b/annif/parallel.py @@ -1,4 +1,5 @@ """Parallel processing functionality for Annif""" + from __future__ import annotations import multiprocessing diff --git a/annif/project.py b/annif/project.py index 83f7eda7c..3a4aa5566 100644 --- a/annif/project.py +++ b/annif/project.py @@ -1,4 +1,5 @@ """Project management functionality for Annif""" + from __future__ import annotations import enum diff --git a/annif/registry.py b/annif/registry.py index a97dc366e..e12b30e72 100644 --- a/annif/registry.py +++ b/annif/registry.py @@ -1,4 +1,5 @@ """Registry that keeps track of Annif projects""" + from __future__ import annotations import re diff --git a/annif/rest.py b/annif/rest.py index f848117c8..f8d9f6d7e 100644 --- a/annif/rest.py +++ b/annif/rest.py @@ -1,5 +1,6 @@ """Definitions for REST API operations. These are wired via Connexion to methods defined in the OpenAPI specification.""" + from __future__ import annotations import importlib diff --git a/annif/suggestion.py b/annif/suggestion.py index fd7c0c12e..fd9eb48cc 100644 --- a/annif/suggestion.py +++ b/annif/suggestion.py @@ -1,4 +1,5 @@ """Representing suggested subjects.""" + from __future__ import annotations import collections diff --git a/annif/transform/__init__.py b/annif/transform/__init__.py index 716d874a2..680b02cc0 100644 --- a/annif/transform/__init__.py +++ b/annif/transform/__init__.py @@ -1,4 +1,5 @@ """Functionality for obtaining text transformation from string specification""" + from __future__ import annotations import re diff --git a/annif/transform/inputlimiter.py b/annif/transform/inputlimiter.py index 229766864..d57c6a56c 100644 --- a/annif/transform/inputlimiter.py +++ b/annif/transform/inputlimiter.py @@ -1,5 +1,6 @@ """A simple transformation that truncates the text of input documents to a given character length.""" + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/annif/transform/langfilter.py b/annif/transform/langfilter.py index 6794eb370..f7c985485 100644 --- a/annif/transform/langfilter.py +++ b/annif/transform/langfilter.py @@ -1,5 +1,6 @@ """Transformation filtering out parts of a text that are in a language different from the language of the project.""" + from __future__ import annotations from typing import TYPE_CHECKING diff --git a/annif/transform/transform.py b/annif/transform/transform.py index db71fef37..9a96f2877 100644 --- a/annif/transform/transform.py +++ b/annif/transform/transform.py @@ -1,4 +1,5 @@ """Common functionality for transforming text of input documents.""" + from __future__ import annotations import abc diff --git a/annif/util.py b/annif/util.py index 1702c2e4b..b03c63ec2 100644 --- a/annif/util.py +++ b/annif/util.py @@ -1,4 +1,5 @@ """Utility functions for Annif""" + from __future__ import annotations import glob diff --git a/annif/vocab.py b/annif/vocab.py index 333fa0d69..7f7e125c4 100644 --- a/annif/vocab.py +++ b/annif/vocab.py @@ -1,4 +1,5 @@ """Vocabulary management functionality for Annif""" + from __future__ import annotations import os.path diff --git a/pyproject.toml b/pyproject.toml index 2d0d29db0..e83921c3b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -18,19 +18,19 @@ keywords = [ "text-classification", "rest-api", "code4lib", - "subject-indexing" + "subject-indexing", ] -classifiers=[ +classifiers = [ "Programming Language :: Python :: 3", "License :: OSI Approved :: Apache Software License", - "Operating System :: OS Independent" + "Operating System :: OS Independent", ] [tool.poetry.dependencies] python = ">=3.9,<3.12" -connexion = {version = "2.14.2", extras = ["swagger-ui"]} +connexion = { version = "2.14.2", extras = ["swagger-ui"] } flask = "2.2.*" flask-cors = "4.0.*" click = "8.1.*" @@ -49,14 +49,14 @@ tomli = { version = "2.0.*", python = "<3.11" } simplemma = "0.9.*" jsonschema = "4.21.*" -fasttext-wheel = {version = "0.9.2", optional = true} -voikko = {version = "0.5.*", optional = true} -tensorflow-cpu = {version = "2.15.*", optional = true} -lmdb = {version = "1.4.1", optional = true} -omikuji = {version = "0.5.*", optional = true} -yake = {version = "0.4.8", optional = true} -spacy = {version = "3.7.*", optional = true} -stwfsapy = {version="0.3.*", optional = true} +fasttext-wheel = { version = "0.9.2", optional = true } +voikko = { version = "0.5.*", optional = true } +tensorflow-cpu = { version = "2.15.*", optional = true } +lmdb = { version = "1.4.1", optional = true } +omikuji = { version = "0.5.*", optional = true } +yake = { version = "0.4.8", optional = true } +spacy = { version = "3.7.*", optional = true } +stwfsapy = { version = "0.3.*", optional = true } [tool.poetry.dev-dependencies] py = "*" @@ -93,7 +93,5 @@ line_length = "88" skip_gitignore = true [tool.pytest.ini_options] -markers = [ - "slow: marks tests as slow (deselect with '-m \"not slow\"')", -] +markers = ["slow: marks tests as slow (deselect with '-m \"not slow\"')"] addopts = "-m 'not slow'" diff --git a/tests/test_vocab_csv.py b/tests/test_vocab_csv.py index 0bc3820dd..bdb66e25c 100644 --- a/tests/test_vocab_csv.py +++ b/tests/test_vocab_csv.py @@ -1,6 +1,5 @@ """Unit tests for CSV vocabulary functionality in Annif""" - from annif.corpus import SubjectFileCSV, SubjectIndex diff --git a/tests/test_vocab_skos.py b/tests/test_vocab_skos.py index a3c1e508c..fd93ca6d1 100644 --- a/tests/test_vocab_skos.py +++ b/tests/test_vocab_skos.py @@ -1,6 +1,5 @@ """Unit tests for SKOS vocabulary functionality in Annif""" - import os.path from annif.corpus.skos import SubjectFileSKOS diff --git a/tests/test_vocab_tsv.py b/tests/test_vocab_tsv.py index 1fc318e0f..4d0956d5a 100644 --- a/tests/test_vocab_tsv.py +++ b/tests/test_vocab_tsv.py @@ -1,6 +1,5 @@ """Unit tests for TSV vocabulary functionality in Annif""" - from annif.corpus import SubjectFileTSV, SubjectIndex From df20982b7b6ea48609a3ef9cb132a9d01a99527f Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Mon, 26 Feb 2024 14:46:45 +0200 Subject: [PATCH 13/17] Ignore modifications due to migration to Black v24 codestyle in git blame --- .git-blame-ignore-revs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs index bbed4a471..292e8d698 100644 --- a/.git-blame-ignore-revs +++ b/.git-blame-ignore-revs @@ -2,3 +2,5 @@ 3bc18907354a40f1d89dca1833a2719ba7fb0933 # Reorder import statements with isort 68a72c5a603283f70abce2651dcde9c6f0177c41 +# Migrate code style to Black 24 +d4dbd73fe6a91964af82fbf6e6cb8d70b77569a3 From 95639ba70effce7dcb0ea74644862a3bb26aad0e Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 20 Mar 2024 11:30:49 +0200 Subject: [PATCH 14/17] Upgrade to optuna 3.6.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index e83921c3b..636f99329 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -43,7 +43,7 @@ scipy = "1.12.*" rdflib = "6.3.*" gunicorn = "21.2.*" numpy = "1.26.*" -optuna = "3.5.*" +optuna = "3.6.*" python-dateutil = "2.8.*" tomli = { version = "2.0.*", python = "<3.11" } simplemma = "0.9.*" From d3b39e1b3151d1899e9ec770aaef13062f718a73 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 20 Mar 2024 16:35:18 +0200 Subject: [PATCH 15/17] Upgrade to stwfsapy 0.4.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 636f99329..50c2f2b0d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ lmdb = { version = "1.4.1", optional = true } omikuji = { version = "0.5.*", optional = true } yake = { version = "0.4.8", optional = true } spacy = { version = "3.7.*", optional = true } -stwfsapy = { version = "0.3.*", optional = true } +stwfsapy = { version = "0.4.*", optional = true } [tool.poetry.dev-dependencies] py = "*" From a8a4fc3b1d2bbc4e33519235101c5dd13f4b2495 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 20 Mar 2024 16:35:50 +0200 Subject: [PATCH 16/17] Upgrade to scikit-learn 1.4.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 50c2f2b0d..008b1f35e 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -38,7 +38,7 @@ click-log = "0.4.*" joblib = "1.3.*" nltk = "3.8.*" gensim = "4.3.*" -scikit-learn = "1.3.*" +scikit-learn = "1.4.*" scipy = "1.12.*" rdflib = "6.3.*" gunicorn = "21.2.*" From 44b47347f8f0721de0f7bdc9973519ee118a50a7 Mon Sep 17 00:00:00 2001 From: Juho Inkinen <34240031+juhoinkinen@users.noreply.github.com> Date: Wed, 20 Mar 2024 16:36:13 +0200 Subject: [PATCH 17/17] Upgrade to rdflib 7.0.* --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 008b1f35e..ca2a30d1a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,7 +40,7 @@ nltk = "3.8.*" gensim = "4.3.*" scikit-learn = "1.4.*" scipy = "1.12.*" -rdflib = "6.3.*" +rdflib = "7.0.*" gunicorn = "21.2.*" numpy = "1.26.*" optuna = "3.6.*"