Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimize startup time using local & lazy imports (take 2) #544

Merged
merged 5 commits into from
Dec 20, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion annif/analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import abc
import functools
import unicodedata
import nltk.tokenize

_KEY_TOKEN_MIN_LENGTH = 'token_min_length'

Expand All @@ -22,6 +21,7 @@ def __init__(self, **kwargs):

def tokenize_sentences(self, text):
"""Tokenize a piece of text (e.g. a document) into sentences."""
import nltk.tokenize
return nltk.tokenize.sent_tokenize(text)

@functools.lru_cache(maxsize=50000)
Expand All @@ -37,6 +37,7 @@ def is_valid_token(self, word):

def tokenize_words(self, text):
"""Tokenize a piece of text (e.g. a sentence) into words."""
import nltk.tokenize
return [self.normalize_word(word)
for word in nltk.tokenize.word_tokenize(text)
if self.is_valid_token(word)]
Expand Down
2 changes: 1 addition & 1 deletion annif/analyzer/snowball.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
"""Snowball analyzer for Annif, based on nltk Snowball stemmer."""

import functools
import nltk.stem.snowball
from . import analyzer


Expand All @@ -10,6 +9,7 @@ class SnowballAnalyzer(analyzer.Analyzer):

def __init__(self, param, **kwargs):
self.param = param
import nltk.stem.snowball
self.stemmer = nltk.stem.snowball.SnowballStemmer(param)
super().__init__(**kwargs)

Expand Down
138 changes: 88 additions & 50 deletions annif/backend/__init__.py
Original file line number Diff line number Diff line change
@@ -1,61 +1,99 @@
"""Registry of backend types for Annif"""

from . import dummy
from . import ensemble
from . import http
from . import tfidf
from . import pav
from . import stwfsa
from . import mllm
from . import svc
import annif

# define functions for lazily importing each backend (alphabetical order)
def _dummy():
from . import dummy
return dummy.DummyBackend

_backend_types = {}

def _ensemble():
from . import ensemble
return ensemble.EnsembleBackend

def register_backend(backend):
_backend_types[backend.name] = backend

def _fasttext():
try:
from . import fasttext
return fasttext.FastTextBackend
except ImportError:
raise ValueError("fastText not available, cannot use fasttext backend")

def get_backend(backend_id):

def _http():
from . import http
return http.HTTPBackend


def _mllm():
from . import mllm
return mllm.MLLMBackend


def _nn_ensemble():
try:
return _backend_types[backend_id]
except KeyError:
raise ValueError("No such backend type {}".format(backend_id))
from . import nn_ensemble
return nn_ensemble.NNEnsembleBackend
except ImportError:
raise ValueError("Keras and TensorFlow not available, cannot use " +
"nn_ensemble backend")


def _omikuji():
try:
from . import omikuji
return omikuji.OmikujiBackend
except ImportError:
raise ValueError("Omikuji not available, cannot use omikuji backend")


def _pav():
from . import pav
return pav.PAVBackend


register_backend(dummy.DummyBackend)
register_backend(ensemble.EnsembleBackend)
register_backend(http.HTTPBackend)
register_backend(tfidf.TFIDFBackend)
register_backend(pav.PAVBackend)
register_backend(stwfsa.StwfsaBackend)
register_backend(mllm.MLLMBackend)
register_backend(svc.SVCBackend)

# Optional backends
try:
from . import fasttext
register_backend(fasttext.FastTextBackend)
except ImportError:
annif.logger.debug("fastText not available, not enabling fasttext backend")

try:
from . import nn_ensemble
register_backend(nn_ensemble.NNEnsembleBackend)
except ImportError:
annif.logger.debug("Keras and TensorFlow not available, not enabling " +
"nn_ensemble backend")

try:
from . import omikuji
register_backend(omikuji.OmikujiBackend)
except ImportError:
annif.logger.debug("Omikuji not available, not enabling omikuji backend")

try:
from . import yake
register_backend(yake.YakeBackend)
except ImportError:
annif.logger.debug("YAKE not available, not enabling yake backend")
def _stwfsa():
from . import stwfsa
return stwfsa.StwfsaBackend


def _svc():
from . import svc
return svc.SVCBackend


def _tfidf():
from . import tfidf
return tfidf.TFIDFBackend


def _yake():
try:
from . import yake
return yake.YakeBackend
except ImportError:
raise ValueError("YAKE not available, cannot use yake backend")


# registry of the above functions
_backend_fns = {
'dummy': _dummy,
'ensemble': _ensemble,
'fasttext': _fasttext,
'http': _http,
'mllm': _mllm,
'nn_ensemble': _nn_ensemble,
'omikuji': _omikuji,
'pav': _pav,
'stwfsa': _stwfsa,
'svc': _svc,
'tfidf': _tfidf,
'yake': _yake
}


def get_backend(backend_id):
if backend_id in _backend_fns:
return _backend_fns[backend_id]()
else:
raise ValueError("No such backend type {}".format(backend_id))
3 changes: 2 additions & 1 deletion annif/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,6 @@
from flask.cli import FlaskGroup, ScriptInfo
import annif
import annif.corpus
import annif.eval
import annif.parallel
import annif.project
import annif.registry
Expand Down Expand Up @@ -89,6 +88,7 @@ def validate_backend_params(backend, beparam, project):


def generate_filter_batches(subjects):
import annif.eval
filter_batches = collections.OrderedDict()
for limit in range(1, BATCH_MAX_LIMIT + 1):
for threshold in [i * 0.05 for i in range(20)]:
Expand Down Expand Up @@ -347,6 +347,7 @@ def run_eval(
project = get_project(project_id)
backend_params = parse_backend_params(backend_param, project)

import annif.eval
eval_batch = annif.eval.EvaluationBatch(project.subjects)

if results_file:
Expand Down
33 changes: 33 additions & 0 deletions tests/test_backend.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
import annif
import annif.backend
import annif.corpus
import importlib.util


def test_get_backend_nonexistent():
Expand Down Expand Up @@ -50,3 +51,35 @@ def test_fill_params_with_defaults(project):
project=project)
expected_default_params = {'limit': 100}
assert expected_default_params == dummy.params


@pytest.mark.skipif(importlib.util.find_spec("fasttext") is not None,
reason="test requires that fastText is NOT installed")
def test_get_backend_fasttext_not_installed():
with pytest.raises(ValueError) as excinfo:
annif.backend.get_backend('fasttext')
assert 'fastText not available' in str(excinfo.value)


@pytest.mark.skipif(importlib.util.find_spec("tensorflow") is not None,
reason="test requires that TensorFlow is NOT installed")
def test_get_backend_nn_ensemble_not_installed():
with pytest.raises(ValueError) as excinfo:
annif.backend.get_backend('nn_ensemble')
assert 'TensorFlow not available' in str(excinfo.value)


@pytest.mark.skipif(importlib.util.find_spec("omikuji") is not None,
reason="test requires that Omikuji is NOT installed")
def test_get_backend_omikuji_not_installed():
with pytest.raises(ValueError) as excinfo:
annif.backend.get_backend('omikuji')
assert 'Omikuji not available' in str(excinfo.value)


@pytest.mark.skipif(importlib.util.find_spec("yake") is not None,
reason="test requires that YAKE is NOT installed")
def test_get_backend_yake_not_installed():
with pytest.raises(ValueError) as excinfo:
annif.backend.get_backend('yake')
assert 'YAKE not available' in str(excinfo.value)