NatLibFi · juhoinkinen · Jun 5, 2023 · May 23, 2023 · May 23, 2023 · May 23, 2023
diff --git a/annif/__init__.py b/annif/__init__.py
@@ -1,17 +1,24 @@
 #!/usr/bin/env python3
 
+from __future__ import annotations
+
 import logging
 import os
 import os.path
+from typing import TYPE_CHECKING, Optional
 
 logging.basicConfig()
 logger = logging.getLogger("annif")
 logger.setLevel(level=logging.INFO)
 
+
 import annif.backend  # noqa
 
+if TYPE_CHECKING:
+    from flask.app import Flask
+
 
-def create_flask_app(config_name=None):
+def create_flask_app(config_name: Optional[str] = None) -> Flask:
     """Create a Flask app to be used by the CLI."""
     from flask import Flask
 
@@ -23,7 +30,7 @@ def create_flask_app(config_name=None):
     return app
 
 
-def create_app(config_name=None):
+def create_app(config_name: Optional[str] = None) -> Flask:
     """Create a Connexion app to be used for the API."""
     # 'cxapp' here is the Connexion application that has a normal Flask app
     # as a property (cxapp.app)
@@ -60,7 +67,7 @@ def create_app(config_name=None):
     return cxapp.app
 
 
-def _get_config_name(config_name):
+def _get_config_name(config_name: Optional[str]) -> str:
     if config_name is None:
         config_name = os.environ.get("ANNIF_CONFIG")
     if config_name is None:

diff --git a/annif/analyzer/__init__.py b/annif/analyzer/__init__.py
@@ -1,20 +1,25 @@
 """Collection of language-specific analyzers and analyzer registry for Annif"""
+from __future__ import annotations
 
 import re
+from typing import TYPE_CHECKING
 
 import annif
 from annif.util import parse_args
 
 from . import simple, simplemma, snowball
 
+if TYPE_CHECKING:
+    from annif.analyzer.analyzer import Analyzer
+
 _analyzers = {}
 
 
 def register_analyzer(analyzer):
     _analyzers[analyzer.name] = analyzer
 
 
-def get_analyzer(analyzerspec):
+def get_analyzer(analyzerspec: str) -> Analyzer:
     match = re.match(r"(\w+)(\((.*)\))?", analyzerspec)
     if match is None:
         raise ValueError("Invalid analyzer specification {}".format(analyzerspec))

diff --git a/annif/analyzer/analyzer.py b/annif/analyzer/analyzer.py
@@ -1,8 +1,10 @@
 """Common functionality for analyzers."""
+from __future__ import annotations
 
 import abc
 import functools
 import unicodedata
+from typing import List
 
 _KEY_TOKEN_MIN_LENGTH = "token_min_length"
 
@@ -15,18 +17,18 @@ class Analyzer(metaclass=abc.ABCMeta):
     name = None
     token_min_length = 3  # default value, can be overridden in instances
 
-    def __init__(self, **kwargs):
+    def __init__(self, **kwargs) -> None:
         if _KEY_TOKEN_MIN_LENGTH in kwargs:
             self.token_min_length = int(kwargs[_KEY_TOKEN_MIN_LENGTH])
 
-    def tokenize_sentences(self, text):
+    def tokenize_sentences(self, text: str) -> List[str]:
         """Tokenize a piece of text (e.g. a document) into sentences."""
         import nltk.tokenize
 
         return nltk.tokenize.sent_tokenize(text)
 
     @functools.lru_cache(maxsize=50000)
-    def is_valid_token(self, word):
+    def is_valid_token(self, word: str) -> bool:
         """Return True if the word is an acceptable token."""
         if len(word) < self.token_min_length:
             return False
@@ -36,7 +38,7 @@ def is_valid_token(self, word):
                 return True
         return False
 
-    def tokenize_words(self, text, filter=True):
+    def tokenize_words(self, text: str, filter: bool = True) -> List[str]:
         """Tokenize a piece of text (e.g. a sentence) into words. If
         filter=True (default), only return valid tokens (e.g. not
         punctuation, numbers or very short words)"""

diff --git a/annif/analyzer/simple.py b/annif/analyzer/simple.py
@@ -1,14 +1,15 @@
 """Simple analyzer for Annif. Only folds words to lower case."""
+from __future__ import annotations
 
 from . import analyzer
 
 
 class SimpleAnalyzer(analyzer.Analyzer):
     name = "simple"
 
-    def __init__(self, param, **kwargs):
+    def __init__(self, param: None, **kwargs) -> None:
         self.param = param
         super().__init__(**kwargs)
 
-    def _normalize_word(self, word):
+    def _normalize_word(self, word: str) -> str:
         return word.lower()
diff --git a/annif/analyzer/simplemma.py b/annif/analyzer/simplemma.py
@@ -1,4 +1,5 @@
 """Simplemma analyzer for Annif, based on simplemma lemmatizer."""
+from __future__ import annotations
 
 import simplemma
 
@@ -8,9 +9,9 @@
 class SimplemmaAnalyzer(analyzer.Analyzer):
     name = "simplemma"
 
-    def __init__(self, param, **kwargs):
+    def __init__(self, param: str, **kwargs) -> None:
         self.lang = param
         super().__init__(**kwargs)
 
-    def _normalize_word(self, word):
+    def _normalize_word(self, word: str) -> str:
         return simplemma.lemmatize(word, lang=self.lang)
diff --git a/annif/analyzer/snowball.py b/annif/analyzer/snowball.py
@@ -1,4 +1,5 @@
 """Snowball analyzer for Annif, based on nltk Snowball stemmer."""
+from __future__ import annotations
 
 import functools
 
@@ -8,13 +9,13 @@
 class SnowballAnalyzer(analyzer.Analyzer):
     name = "snowball"
 
-    def __init__(self, param, **kwargs):
+    def __init__(self, param: str, **kwargs) -> None:
         self.param = param
         import nltk.stem.snowball
 
         self.stemmer = nltk.stem.snowball.SnowballStemmer(param)
         super().__init__(**kwargs)
 
     @functools.lru_cache(maxsize=500000)
-    def _normalize_word(self, word):
+    def _normalize_word(self, word: str) -> str:
         return self.stemmer.stem(word.lower())
diff --git a/annif/analyzer/spacy.py b/annif/analyzer/spacy.py
@@ -1,4 +1,7 @@
 """spaCy analyzer for Annif which uses spaCy for lemmatization"""
+from __future__ import annotations
+
+from typing import List
 
 import annif.util
 from annif.exception import OperationFailedException
@@ -11,7 +14,7 @@
 class SpacyAnalyzer(analyzer.Analyzer):
     name = "spacy"
 
-    def __init__(self, param, **kwargs):
+    def __init__(self, param: str, **kwargs) -> None:
         import spacy
 
         self.param = param
@@ -28,7 +31,7 @@ def __init__(self, param, **kwargs):
             self.lowercase = False
         super().__init__(**kwargs)
 
-    def tokenize_words(self, text, filter=True):
+    def tokenize_words(self, text: str, filter: bool = True) -> List[str]:
         lemmas = [
             lemma
             for lemma in (token.lemma_ for token in self.nlp(text.strip()))

diff --git a/annif/analyzer/voikko.py b/annif/analyzer/voikko.py
@@ -1,6 +1,8 @@
 """Voikko analyzer for Annif, based on libvoikko library."""
+from __future__ import annotations
 
 import functools
+from typing import Dict, Optional
 
 import voikko.libvoikko
 
@@ -10,20 +12,20 @@
 class VoikkoAnalyzer(analyzer.Analyzer):
     name = "voikko"
 
-    def __init__(self, param, **kwargs):
+    def __init__(self, param: str, **kwargs) -> None:
         self.param = param
         self.voikko = None
         super().__init__(**kwargs)
 
-    def __getstate__(self):
+    def __getstate__(self) -> Dict[str, Optional[str]]:
         """Return the state of the object for pickling purposes. The Voikko
         instance is set to None because as a ctypes object it cannot be
         pickled."""
 
         return {"param": self.param, "voikko": None}
 
     @functools.lru_cache(maxsize=500000)
-    def _normalize_word(self, word):
+    def _normalize_word(self, word: str) -> str:
         if self.voikko is None:
             self.voikko = voikko.libvoikko.Voikko(self.param)
         result = self.voikko.analyze(word)

diff --git a/annif/backend/__init__.py b/annif/backend/__init__.py
@@ -1,20 +1,26 @@
 """Registry of backend types for Annif"""
+from __future__ import annotations
+
+from typing import TYPE_CHECKING, Type
+
+if TYPE_CHECKING:
+    from annif.backend.backend import AnnifBackend
 
 
 # define functions for lazily importing each backend (alphabetical order)
-def _dummy():
+def _dummy() -> Type[AnnifBackend]:
     from . import dummy
 
     return dummy.DummyBackend
 
 
-def _ensemble():
+def _ensemble() -> Type[AnnifBackend]:
     from . import ensemble
 
     return ensemble.EnsembleBackend
 
 
-def _fasttext():
+def _fasttext() -> Type[AnnifBackend]:
     try:
         from . import fasttext
 
@@ -23,19 +29,19 @@ def _fasttext():
         raise ValueError("fastText not available, cannot use fasttext backend")
 
 
-def _http():
+def _http() -> Type[AnnifBackend]:
     from . import http
 
     return http.HTTPBackend
 
 
-def _mllm():
+def _mllm() -> Type[AnnifBackend]:
     from . import mllm
 
     return mllm.MLLMBackend
 
 
-def _nn_ensemble():
+def _nn_ensemble() -> Type[AnnifBackend]:
     try:
         from . import nn_ensemble
 
@@ -46,7 +52,7 @@ def _nn_ensemble():
         )
 
 
-def _omikuji():
+def _omikuji() -> Type[AnnifBackend]:
     try:
         from . import omikuji
 
@@ -55,13 +61,13 @@ def _omikuji():
         raise ValueError("Omikuji not available, cannot use omikuji backend")
 
 
-def _pav():
+def _pav() -> Type[AnnifBackend]:
     from . import pav
 
     return pav.PAVBackend
 
 
-def _stwfsa():
+def _stwfsa() -> Type[AnnifBackend]:
     try:
         from . import stwfsa
 
@@ -70,19 +76,19 @@ def _stwfsa():
         raise ValueError("STWFSA not available, cannot use stwfsa backend")
 
 
-def _svc():
+def _svc() -> Type[AnnifBackend]:
     from . import svc
 
     return svc.SVCBackend
 
 
-def _tfidf():
+def _tfidf() -> Type[AnnifBackend]:
     from . import tfidf
 
     return tfidf.TFIDFBackend
 
 
-def _yake():
+def _yake() -> Type[AnnifBackend]:
     try:
         from . import yake
 
@@ -108,7 +114,7 @@ def _yake():
 }
 
 
-def get_backend(backend_id):
+def get_backend(backend_id: str) -> Type[AnnifBackend]:
     if backend_id in _backend_fns:
         return _backend_fns[backend_id]()
     else: