Skip to content

Commit

Permalink
Lazy import of NLTK
Browse files Browse the repository at this point in the history
  • Loading branch information
osma committed Dec 20, 2021
1 parent 3619ee1 commit da6a93f
Showing 1 changed file with 2 additions and 1 deletion.
3 changes: 2 additions & 1 deletion annif/analyzer/analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@
import abc
import functools
import unicodedata
import nltk.tokenize

_KEY_TOKEN_MIN_LENGTH = 'token_min_length'

Expand All @@ -22,6 +21,7 @@ def __init__(self, **kwargs):

def tokenize_sentences(self, text):
"""Tokenize a piece of text (e.g. a document) into sentences."""
import nltk.tokenize
return nltk.tokenize.sent_tokenize(text)

@functools.lru_cache(maxsize=50000)
Expand All @@ -37,6 +37,7 @@ def is_valid_token(self, word):

def tokenize_words(self, text):
"""Tokenize a piece of text (e.g. a sentence) into words."""
import nltk.tokenize
return [self.normalize_word(word)
for word in nltk.tokenize.word_tokenize(text)
if self.is_valid_token(word)]
Expand Down

0 comments on commit da6a93f

Please sign in to comment.