Skip to content

Commit

Permalink
Run "monkeytype apply --pep_563" on all modules
Browse files Browse the repository at this point in the history
  • Loading branch information
juhoinkinen committed May 23, 2023
1 parent 4f6994b commit f5f32dd
Show file tree
Hide file tree
Showing 44 changed files with 1,254 additions and 386 deletions.
13 changes: 10 additions & 3 deletions annif/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
#!/usr/bin/env python3

from __future__ import annotations

import logging
import os
import os.path
Expand All @@ -8,10 +10,15 @@
logger = logging.getLogger("annif")
logger.setLevel(level=logging.INFO)

from typing import TYPE_CHECKING, Optional

import annif.backend # noqa

if TYPE_CHECKING:
from flask.app import Flask


def create_flask_app(config_name=None):
def create_flask_app(config_name: None = None) -> flask.app.Flask:
"""Create a Flask app to be used by the CLI."""
from flask import Flask

Expand All @@ -23,7 +30,7 @@ def create_flask_app(config_name=None):
return app


def create_app(config_name=None):
def create_app(config_name: Optional[str] = None) -> flask.app.Flask:
"""Create a Connexion app to be used for the API."""
# 'cxapp' here is the Connexion application that has a normal Flask app
# as a property (cxapp.app)
Expand Down Expand Up @@ -60,7 +67,7 @@ def create_app(config_name=None):
return cxapp.app


def _get_config_name(config_name):
def _get_config_name(config_name: Optional[str]) -> str:
if config_name is None:
config_name = os.environ.get("ANNIF_CONFIG")
if config_name is None:
Expand Down
15 changes: 14 additions & 1 deletion annif/analyzer/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,33 @@
"""Collection of language-specific analyzers and analyzer registry for Annif"""
from __future__ import annotations

import re
from typing import TYPE_CHECKING, Union

import annif
from annif.util import parse_args

from . import simple, simplemma, snowball

if TYPE_CHECKING:
from annif.analyzer.simple import SimpleAnalyzer
from annif.analyzer.simplemma import SimplemmaAnalyzer
from annif.analyzer.snowball import SnowballAnalyzer
from annif.analyzer.spacy import SpacyAnalyzer
from annif.analyzer.voikko import VoikkoAnalyzer

_analyzers = {}


def register_analyzer(analyzer):
_analyzers[analyzer.name] = analyzer


def get_analyzer(analyzerspec):
def get_analyzer(
analyzerspec: str,
) -> Union[
SimplemmaAnalyzer, SimpleAnalyzer, SnowballAnalyzer, SpacyAnalyzer, VoikkoAnalyzer
]:
match = re.match(r"(\w+)(\((.*)\))?", analyzerspec)
if match is None:
raise ValueError("Invalid analyzer specification {}".format(analyzerspec))
Expand Down
10 changes: 6 additions & 4 deletions annif/analyzer/analyzer.py
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
"""Common functionality for analyzers."""
from __future__ import annotations

import abc
import functools
import unicodedata
from typing import TYPE_CHECKING, Any, List, Union

_KEY_TOKEN_MIN_LENGTH = "token_min_length"

Expand All @@ -15,18 +17,18 @@ class Analyzer(metaclass=abc.ABCMeta):
name = None
token_min_length = 3 # default value, can be overridden in instances

def __init__(self, **kwargs):
def __init__(self, **kwargs) -> None:
if _KEY_TOKEN_MIN_LENGTH in kwargs:
self.token_min_length = int(kwargs[_KEY_TOKEN_MIN_LENGTH])

def tokenize_sentences(self, text):
def tokenize_sentences(self, text: str) -> List[Union[Any, str]]:
"""Tokenize a piece of text (e.g. a document) into sentences."""
import nltk.tokenize

return nltk.tokenize.sent_tokenize(text)

@functools.lru_cache(maxsize=50000)
def is_valid_token(self, word):
def is_valid_token(self, word: str) -> bool:
"""Return True if the word is an acceptable token."""
if len(word) < self.token_min_length:
return False
Expand All @@ -36,7 +38,7 @@ def is_valid_token(self, word):
return True
return False

def tokenize_words(self, text, filter=True):
def tokenize_words(self, text: str, filter: bool = True) -> List[Union[Any, str]]:
"""Tokenize a piece of text (e.g. a sentence) into words. If
filter=True (default), only return valid tokens (e.g. not
punctuation, numbers or very short words)"""
Expand Down
7 changes: 5 additions & 2 deletions annif/analyzer/simple.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,17 @@
"""Simple analyzer for Annif. Only folds words to lower case."""
from __future__ import annotations

from typing import TYPE_CHECKING

from . import analyzer


class SimpleAnalyzer(analyzer.Analyzer):
name = "simple"

def __init__(self, param, **kwargs):
def __init__(self, param: None, **kwargs) -> None:
self.param = param
super().__init__(**kwargs)

def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
return word.lower()
7 changes: 5 additions & 2 deletions annif/analyzer/simplemma.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""Simplemma analyzer for Annif, based on simplemma lemmatizer."""
from __future__ import annotations

from typing import TYPE_CHECKING

import simplemma

Expand All @@ -8,9 +11,9 @@
class SimplemmaAnalyzer(analyzer.Analyzer):
name = "simplemma"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
self.lang = param
super().__init__(**kwargs)

def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
return simplemma.lemmatize(word, lang=self.lang)
6 changes: 4 additions & 2 deletions annif/analyzer/snowball.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,22 @@
"""Snowball analyzer for Annif, based on nltk Snowball stemmer."""
from __future__ import annotations

import functools
from typing import TYPE_CHECKING

from . import analyzer


class SnowballAnalyzer(analyzer.Analyzer):
name = "snowball"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
self.param = param
import nltk.stem.snowball

self.stemmer = nltk.stem.snowball.SnowballStemmer(param)
super().__init__(**kwargs)

@functools.lru_cache(maxsize=500000)
def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
return self.stemmer.stem(word.lower())
7 changes: 5 additions & 2 deletions annif/analyzer/spacy.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
"""spaCy analyzer for Annif which uses spaCy for lemmatization"""
from __future__ import annotations

from typing import TYPE_CHECKING, List

import annif.util
from annif.exception import OperationFailedException
Expand All @@ -11,7 +14,7 @@
class SpacyAnalyzer(analyzer.Analyzer):
name = "spacy"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
import spacy

self.param = param
Expand All @@ -28,7 +31,7 @@ def __init__(self, param, **kwargs):
self.lowercase = False
super().__init__(**kwargs)

def tokenize_words(self, text, filter=True):
def tokenize_words(self, text: str, filter: bool = True) -> List[str]:
lemmas = [
lemma
for lemma in (token.lemma_ for token in self.nlp(text.strip()))
Expand Down
8 changes: 5 additions & 3 deletions annif/analyzer/voikko.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
"""Voikko analyzer for Annif, based on libvoikko library."""
from __future__ import annotations

import functools
from typing import TYPE_CHECKING, Dict, Optional

import voikko.libvoikko

Expand All @@ -10,20 +12,20 @@
class VoikkoAnalyzer(analyzer.Analyzer):
name = "voikko"

def __init__(self, param, **kwargs):
def __init__(self, param: str, **kwargs) -> None:
self.param = param
self.voikko = None
super().__init__(**kwargs)

def __getstate__(self):
def __getstate__(self) -> Dict[str, Optional[str]]:
"""Return the state of the object for pickling purposes. The Voikko
instance is set to None because as a ctypes object it cannot be
pickled."""

return {"param": self.param, "voikko": None}

@functools.lru_cache(maxsize=500000)
def _normalize_word(self, word):
def _normalize_word(self, word: str) -> str:
if self.voikko is None:
self.voikko = voikko.libvoikko.Voikko(self.param)
result = self.voikko.analyze(word)
Expand Down
43 changes: 30 additions & 13 deletions annif/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,20 +1,37 @@
"""Registry of backend types for Annif"""
from __future__ import annotations

from typing import TYPE_CHECKING, Any, Type

if TYPE_CHECKING:
from annif.backend.dummy import DummyBackend
from annif.backend.ensemble import EnsembleBackend
from annif.backend.fasttext import FastTextBackend
from annif.backend.http import HTTPBackend
from annif.backend.mllm import MLLMBackend
from annif.backend.nn_ensemble import NNEnsembleBackend
from annif.backend.omikuji import OmikujiBackend
from annif.backend.pav import PAVBackend
from annif.backend.stwfsa import StwfsaBackend
from annif.backend.svc import SVCBackend
from annif.backend.tfidf import TFIDFBackend
from annif.backend.yake import YakeBackend


# define functions for lazily importing each backend (alphabetical order)
def _dummy():
def _dummy() -> Type[DummyBackend]:
from . import dummy

return dummy.DummyBackend


def _ensemble():
def _ensemble() -> Type[EnsembleBackend]:
from . import ensemble

return ensemble.EnsembleBackend


def _fasttext():
def _fasttext() -> Type[FastTextBackend]:
try:
from . import fasttext

Expand All @@ -23,19 +40,19 @@ def _fasttext():
raise ValueError("fastText not available, cannot use fasttext backend")


def _http():
def _http() -> Type[HTTPBackend]:
from . import http

return http.HTTPBackend


def _mllm():
def _mllm() -> Type[MLLMBackend]:
from . import mllm

return mllm.MLLMBackend


def _nn_ensemble():
def _nn_ensemble() -> Type[NNEnsembleBackend]:
try:
from . import nn_ensemble

Expand All @@ -46,7 +63,7 @@ def _nn_ensemble():
)


def _omikuji():
def _omikuji() -> Type[OmikujiBackend]:
try:
from . import omikuji

Expand All @@ -55,13 +72,13 @@ def _omikuji():
raise ValueError("Omikuji not available, cannot use omikuji backend")


def _pav():
def _pav() -> Type[PAVBackend]:
from . import pav

return pav.PAVBackend


def _stwfsa():
def _stwfsa() -> Type[StwfsaBackend]:
try:
from . import stwfsa

Expand All @@ -70,19 +87,19 @@ def _stwfsa():
raise ValueError("STWFSA not available, cannot use stwfsa backend")


def _svc():
def _svc() -> Type[SVCBackend]:
from . import svc

return svc.SVCBackend


def _tfidf():
def _tfidf() -> Type[TFIDFBackend]:
from . import tfidf

return tfidf.TFIDFBackend


def _yake():
def _yake() -> Type[YakeBackend]:
try:
from . import yake

Expand All @@ -108,7 +125,7 @@ def _yake():
}


def get_backend(backend_id):
def get_backend(backend_id: str) -> Any:
if backend_id in _backend_fns:
return _backend_fns[backend_id]()
else:
Expand Down
Loading

0 comments on commit f5f32dd

Please sign in to comment.