From f11797a32158df9d4f4fb3c12efc5700788eb92e Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Thu, 6 Jan 2022 17:00:25 +0200 Subject: [PATCH 01/15] Upgrade to omikuji 0.4.x --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index da10e5e3f..ccb7fc406 100644 --- a/setup.py +++ b/setup.py @@ -43,7 +43,7 @@ def read(fname): 'fasttext': ['fasttext==0.9.2'], 'voikko': ['voikko'], 'nn': ['tensorflow-cpu==2.5.0', 'lmdb==1.2.1'], - 'omikuji': ['omikuji==0.3.*'], + 'omikuji': ['omikuji==0.4.*'], 'yake': ['yake==0.4.5'], 'pycld3': ['pycld3'], 'dev': [ From b2eac420a96296a8383cc77de5754cc9341dba8b Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Thu, 6 Jan 2022 17:24:33 +0200 Subject: [PATCH 02/15] Upgrade to Gensim 4.1.x --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index ccb7fc406..62ea32674 100644 --- a/setup.py +++ b/setup.py @@ -28,7 +28,7 @@ def read(fname): 'click-log', 'joblib==1.0.1', 'nltk', - 'gensim==4.0.*', + 'gensim==4.1.*', 'scikit-learn==0.24.2', 'scipy==1.5.4', 'rdflib>=4.2,<7.0', From 8225fe2f6a9cc865a543d15779ddaa800470bc10 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Thu, 6 Jan 2022 17:26:09 +0200 Subject: [PATCH 03/15] Remove now unnecessary filtering of warnings by older Gensim --- annif/backend/tfidf.py | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/annif/backend/tfidf.py b/annif/backend/tfidf.py index ae5e51c29..8b9603c37 100644 --- a/annif/backend/tfidf.py +++ b/annif/backend/tfidf.py @@ -4,18 +4,13 @@ import os.path import tempfile import annif.util +import gensim.similarities +from gensim.matutils import Sparse2Corpus from annif.suggestion import VectorSuggestionResult from annif.exception import NotInitializedException, NotSupportedException from . import backend from . import mixins -# Filter UserWarnings due to not-installed python-Levenshtein package -import warnings -with warnings.catch_warnings(): - warnings.simplefilter('ignore') - import gensim.similarities - from gensim.matutils import Sparse2Corpus - class SubjectBuffer: """A file-backed buffer to store and retrieve subject text.""" From d728bf20c70a4716b91d4669e94fd5e2f7be44f1 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Thu, 6 Jan 2022 18:28:42 +0200 Subject: [PATCH 04/15] Upgrade to Optuna 2.10.x --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 62ea32674..a796c71b4 100644 --- a/setup.py +++ b/setup.py @@ -34,7 +34,7 @@ def read(fname): 'rdflib>=4.2,<7.0', 'gunicorn', 'numpy==1.19.*', - 'optuna==2.8.0', + 'optuna==2.10.*', 'stwfsapy==0.3.*', 'python-dateutil', ], From 243b0c42273b67248e2eaf4e60e9c47b0be8d9e7 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Thu, 6 Jan 2022 18:29:10 +0200 Subject: [PATCH 05/15] Upgrade to joblib 1.1.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a796c71b4..3f9ef7de7 100644 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ def read(fname): 'flask-cors', 'click==7.1.*', 'click-log', - 'joblib==1.0.1', + 'joblib==1.1.0', 'nltk', 'gensim==4.1.*', 'scikit-learn==0.24.2', From 3d432d46338e417cc6943fc7fc44ed7ad3e3660a Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Thu, 6 Jan 2022 18:30:43 +0200 Subject: [PATCH 06/15] Upgrade to TensorFlow 2.6.2 & lmbd 1.3.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 3f9ef7de7..7dba81266 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def read(fname): extras_require={ 'fasttext': ['fasttext==0.9.2'], 'voikko': ['voikko'], - 'nn': ['tensorflow-cpu==2.5.0', 'lmdb==1.2.1'], + 'nn': ['tensorflow-cpu==2.6.2', 'lmdb==1.3.0'], 'omikuji': ['omikuji==0.4.*'], 'yake': ['yake==0.4.5'], 'pycld3': ['pycld3'], From 300f828944450a31fa326c7bf5550df5d89ba041 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Mon, 10 Jan 2022 16:13:58 +0200 Subject: [PATCH 07/15] Upgrade to SciPy 1.7.* --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 7dba81266..a933c1c7b 100644 --- a/setup.py +++ b/setup.py @@ -30,7 +30,7 @@ def read(fname): 'nltk', 'gensim==4.1.*', 'scikit-learn==0.24.2', - 'scipy==1.5.4', + 'scipy==1.7.*', 'rdflib>=4.2,<7.0', 'gunicorn', 'numpy==1.19.*', From cec1bdafaa56ef03dc4376d23a627fb617b2bd29 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Mon, 10 Jan 2022 16:14:32 +0200 Subject: [PATCH 08/15] Upgrade to NumPy 1.21.* --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index a933c1c7b..bad96e097 100644 --- a/setup.py +++ b/setup.py @@ -33,7 +33,7 @@ def read(fname): 'scipy==1.7.*', 'rdflib>=4.2,<7.0', 'gunicorn', - 'numpy==1.19.*', + 'numpy==1.21.*', 'optuna==2.10.*', 'stwfsapy==0.3.*', 'python-dateutil', From ea568744c74c6443f2a9fb645fbd40c5faa5df04 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Mon, 10 Jan 2022 16:15:11 +0200 Subject: [PATCH 09/15] Upgrade to TensorFlow 2.7.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index bad96e097..70f045f50 100644 --- a/setup.py +++ b/setup.py @@ -42,7 +42,7 @@ def read(fname): extras_require={ 'fasttext': ['fasttext==0.9.2'], 'voikko': ['voikko'], - 'nn': ['tensorflow-cpu==2.6.2', 'lmdb==1.3.0'], + 'nn': ['tensorflow-cpu==2.7.0', 'lmdb==1.3.0'], 'omikuji': ['omikuji==0.4.*'], 'yake': ['yake==0.4.5'], 'pycld3': ['pycld3'], From 59b8911f5c31d85384d001f3874e001416dce935 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Mon, 10 Jan 2022 16:50:55 +0200 Subject: [PATCH 10/15] More informative error msg for trying to load incompatible Omikuji model --- annif/backend/omikuji.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/annif/backend/omikuji.py b/annif/backend/omikuji.py index 73812d000..f92dbb6ae 100644 --- a/annif/backend/omikuji.py +++ b/annif/backend/omikuji.py @@ -5,7 +5,8 @@ import shutil import annif.util from annif.suggestion import SubjectSuggestion, ListSuggestionResult -from annif.exception import NotInitializedException, NotSupportedException +from annif.exception import NotInitializedException, NotSupportedException, \ + OperationFailedException from . import backend from . import mixins @@ -40,7 +41,12 @@ def _initialize_model(self): path = os.path.join(self.datadir, self.MODEL_FILE) self.debug('loading model from {}'.format(path)) if os.path.exists(path): - self._model = omikuji.Model.load(path) + try: + self._model = omikuji.Model.load(path) + except RuntimeError: + raise OperationFailedException( + "Omikuji models trained on Annif versions older than " + "0.56 cannot be loaded. Please retrain your project.") else: raise NotInitializedException( 'model {} not found'.format(path), From 3e36832ba0dda5c01e9314aeec05bc7b6b68313b Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Mon, 10 Jan 2022 17:41:06 +0200 Subject: [PATCH 11/15] Resolve DeprecationWarnings of np.bool by using builtin bool --- annif/backend/pav.py | 4 ++-- annif/eval.py | 2 +- annif/lexical/mllm.py | 2 +- annif/lexical/util.py | 5 ++--- annif/suggestion.py | 4 ++-- 5 files changed, 8 insertions(+), 9 deletions(-) diff --git a/annif/backend/pav.py b/annif/backend/pav.py index eddac16c4..d9e0fc6ff 100644 --- a/annif/backend/pav.py +++ b/annif/backend/pav.py @@ -95,9 +95,9 @@ def _suggest_train_corpus(source_project, corpus): scores = coo_matrix((data, (row, col)), shape=(ndocs, len(source_project.subjects)), dtype=np.float32) - true = coo_matrix((np.ones(len(trow), dtype=np.bool), (trow, tcol)), + true = coo_matrix((np.ones(len(trow), dtype=bool), (trow, tcol)), shape=(ndocs, len(source_project.subjects)), - dtype=np.bool) + dtype=bool) return csc_matrix(scores), csc_matrix(true) def _create_pav_model(self, source_project_id, min_docs, corpus): diff --git a/annif/eval.py b/annif/eval.py index aeb41ff68..edb4a80da 100644 --- a/annif/eval.py +++ b/annif/eval.py @@ -15,7 +15,7 @@ def filter_pred_top_k(preds, limit): masks = [] for pred in preds: - mask = np.zeros_like(pred, dtype=np.bool) + mask = np.zeros_like(pred, dtype=bool) top_k = np.argsort(pred)[::-1][:limit] mask[top_k] = True masks.append(mask) diff --git a/annif/lexical/mllm.py b/annif/lexical/mllm.py index 339460dbc..ff4e4a4c5 100644 --- a/annif/lexical/mllm.py +++ b/annif/lexical/mllm.py @@ -82,7 +82,7 @@ def candidates_to_features(candidates, mdata): matrix = np.zeros((len(candidates), len(Feature)), dtype=np.float32) c_ids = [c.subject_id for c in candidates] - c_vec = np.zeros(mdata.related.shape[0], dtype=np.bool) + c_vec = np.zeros(mdata.related.shape[0], dtype=bool) c_vec[c_ids] = True broader = mdata.broader.multiply(c_vec).sum(axis=1) narrower = mdata.narrower.multiply(c_vec).sum(axis=1) diff --git a/annif/lexical/util.py b/annif/lexical/util.py index 4351c9c21..19312b7f5 100644 --- a/annif/lexical/util.py +++ b/annif/lexical/util.py @@ -3,7 +3,6 @@ import collections from rdflib import URIRef from rdflib.namespace import SKOS -import numpy as np from scipy.sparse import lil_matrix, csc_matrix @@ -16,7 +15,7 @@ def get_subject_labels(graph, uri, properties, language): def make_relation_matrix(graph, vocab, property): n_subj = len(vocab.subjects) - matrix = lil_matrix((n_subj, n_subj), dtype=np.bool) + matrix = lil_matrix((n_subj, n_subj), dtype=bool) for subj, obj in graph.subject_objects(property): subj_id = vocab.subjects.by_uri(str(subj), warnings=False) @@ -36,7 +35,7 @@ def make_collection_matrix(graph, vocab): c_members[str(coll)].append(member_id) c_matrix = lil_matrix((len(c_members), len(vocab.subjects)), - dtype=np.bool) + dtype=bool) # populate the matrix for collection -> subject_id for c_id, members in enumerate(c_members.values()): diff --git a/annif/suggestion.py b/annif/suggestion.py index a8416fbf8..f7a0a2684 100644 --- a/annif/suggestion.py +++ b/annif/suggestion.py @@ -135,7 +135,7 @@ def filter(self, subject_index, limit=None, threshold=0.0): mask = (self._vector > threshold) deprecated_ids = subject_index.deprecated_ids() if limit is not None: - limit_mask = np.zeros_like(self._vector, dtype=np.bool) + limit_mask = np.zeros_like(self._vector, dtype=bool) deprecated_set = set(deprecated_ids) top_k_subjects = itertools.islice( (subj for subj in self.subject_order @@ -143,7 +143,7 @@ def filter(self, subject_index, limit=None, threshold=0.0): limit_mask[list(top_k_subjects)] = True mask = mask & limit_mask else: - deprecated_mask = np.ones_like(self._vector, dtype=np.bool) + deprecated_mask = np.ones_like(self._vector, dtype=bool) deprecated_mask[deprecated_ids] = False mask = mask & deprecated_mask vsr = VectorSuggestionResult(self._vector * mask) From 4736426150db4479e32a5808131407e2259582e6 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Mon, 10 Jan 2022 18:32:37 +0200 Subject: [PATCH 12/15] Upgrade scikit-learn to 1.0.2 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 70f045f50..5ad454271 100644 --- a/setup.py +++ b/setup.py @@ -29,7 +29,7 @@ def read(fname): 'joblib==1.1.0', 'nltk', 'gensim==4.1.*', - 'scikit-learn==0.24.2', + 'scikit-learn==1.0.2', 'scipy==1.7.*', 'rdflib>=4.2,<7.0', 'gunicorn', From 33b65763b0bffff1adf283ad55c3106bae558181 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Tue, 11 Jan 2022 10:38:28 +0200 Subject: [PATCH 13/15] Update Connexion to use new PyPI project offering v2.10.0 --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 5ad454271..f486b1d99 100644 --- a/setup.py +++ b/setup.py @@ -20,7 +20,7 @@ def read(fname): zip_safe=False, python_requires='>=3.7', install_requires=[ - 'connexion[swagger-ui]', + 'connexion2[swagger-ui]', 'swagger_ui_bundle', 'flask>=1.0.4,<2', 'flask-cors', From c5641a9fb2609abf7c461ff9a12c0ce386674fee Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Tue, 11 Jan 2022 10:40:20 +0200 Subject: [PATCH 14/15] Update Flask version requirement to allow 2.x.x --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index f486b1d99..cd7c16f95 100644 --- a/setup.py +++ b/setup.py @@ -22,7 +22,7 @@ def read(fname): install_requires=[ 'connexion2[swagger-ui]', 'swagger_ui_bundle', - 'flask>=1.0.4,<2', + 'flask>=1.0.4,<3', 'flask-cors', 'click==7.1.*', 'click-log', From f6cf926063dd56d9b19f509f8b81689229f27692 Mon Sep 17 00:00:00 2001 From: Juho Inkinen Date: Tue, 11 Jan 2022 10:41:24 +0200 Subject: [PATCH 15/15] Upgrade Click to 8.0.x --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index cd7c16f95..1f23d0549 100644 --- a/setup.py +++ b/setup.py @@ -24,7 +24,7 @@ def read(fname): 'swagger_ui_bundle', 'flask>=1.0.4,<3', 'flask-cors', - 'click==7.1.*', + 'click==8.0.*', 'click-log', 'joblib==1.1.0', 'nltk',