Skip to content
This repository has been archived by the owner on Jul 4, 2023. It is now read-only.

Commit

Permalink
Lazy load six and requests for production
Browse files Browse the repository at this point in the history
  • Loading branch information
PetrochukM committed Aug 19, 2019
1 parent 4e84780 commit 38619d9
Show file tree
Hide file tree
Showing 4 changed files with 18 additions and 13 deletions.
4 changes: 3 additions & 1 deletion torchnlp/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,15 @@

import logging
import os
import requests
import subprocess
import urllib.request
import zipfile

from third_party.lazy_loader import LazyLoader
from tqdm import tqdm

requests = LazyLoader('requests', globals(), 'requests')

logger = logging.getLogger(__name__)


Expand Down
18 changes: 9 additions & 9 deletions torchnlp/encoders/text/subword_text_tokenizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,16 +20,16 @@
import unicodedata

# Dependency imports
from third_party.lazy_loader import LazyLoader

import six
from six.moves import xrange # pylint: disable=redefined-builtin
six = LazyLoader('six', globals(), 'six')

logger = logging.getLogger(__name__)

# This set contains all letter and number characters.
_ALPHANUMERIC_CHAR_SET = set(
six.unichr(i)
for i in xrange(sys.maxunicode)
for i in six.moves.xrange(sys.maxunicode)
if (unicodedata.category(six.unichr(i)).startswith("L") or
unicodedata.category(six.unichr(i)).startswith("N")))

Expand Down Expand Up @@ -70,7 +70,7 @@ def encode(text):
token_start = 0
# Classify each character in the input string
is_alnum = [c in _ALPHANUMERIC_CHAR_SET for c in text]
for pos in xrange(1, len(text)):
for pos in six.moves.xrange(1, len(text)):
if is_alnum[pos] != is_alnum[pos - 1]:
token = text[token_start:pos]
if token != u" " or token_start == 0:
Expand Down Expand Up @@ -242,7 +242,7 @@ def _escaped_token_to_subtoken_strings(self, escaped_token):
start = 0
token_len = len(escaped_token)
while start < token_len:
for end in xrange(min(token_len, start + self._max_subtoken_len), start, -1):
for end in six.moves.xrange(min(token_len, start + self._max_subtoken_len), start, -1):
subtoken = escaped_token[start:end]
if subtoken in self._all_subtoken_strings:
ret.append(subtoken)
Expand Down Expand Up @@ -356,7 +356,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
# with high enough counts for our new vocabulary.
if min_count < 1:
min_count = 1
for i in xrange(num_iterations):
for i in six.moves.xrange(num_iterations):

# Collect all substrings of the encoded token that break along current
# subtoken boundaries.
Expand All @@ -366,7 +366,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
subtokens = self._escaped_token_to_subtoken_strings(escaped_token)
start = 0
for subtoken in subtokens:
for end in xrange(start + 1, len(escaped_token) + 1):
for end in six.moves.xrange(start + 1, len(escaped_token) + 1):
new_subtoken = escaped_token[start:end]
subtoken_counts[new_subtoken] += count
start += len(subtoken)
Expand All @@ -384,7 +384,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
# a longer subtoken string, we can decrement the counts of its
# prefixes.
new_subtoken_strings = []
for lsub in xrange(len(len_to_subtoken_strings) - 1, 0, -1):
for lsub in six.moves.xrange(len(len_to_subtoken_strings) - 1, 0, -1):
subtoken_strings = len_to_subtoken_strings[lsub]
for subtoken_string in subtoken_strings:
count = subtoken_counts[subtoken_string]
Expand All @@ -393,7 +393,7 @@ def build_from_token_counts(self, token_counts, min_count, num_iterations=4):
# explicitly, regardless of count.
if subtoken_string not in self._alphabet:
new_subtoken_strings.append((count, subtoken_string))
for l in xrange(1, lsub):
for l in six.moves.xrange(1, lsub):
subtoken_counts[subtoken_string[:l]] -= count

# Include the alphabet explicitly to guarantee all strings are
Expand Down
6 changes: 4 additions & 2 deletions torchnlp/metrics/bleu.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,9 +20,11 @@
import tempfile
import logging

from third_party.lazy_loader import LazyLoader

import numpy as np

from six.moves import urllib
six = LazyLoader('six', globals(), 'six')

logger = logging.getLogger(__name__)

Expand Down Expand Up @@ -64,7 +66,7 @@ def get_moses_multi_bleu(hypotheses, references, lowercase=False):

# Get MOSES multi-bleu script
try:
multi_bleu_path, _ = urllib.request.urlretrieve(
multi_bleu_path, _ = six.moves.urllib.request.urlretrieve(
"https://raw.githubusercontent.com/moses-smt/mosesdecoder/"
"master/scripts/generic/multi-bleu.perl")
os.chmod(multi_bleu_path, 0o755)
Expand Down
3 changes: 2 additions & 1 deletion torchnlp/word_to_vector/pretrained_word_vectors.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@
import logging
import os

from third_party.lazy_loader import LazyLoader
from tqdm import tqdm

import six
import torch
six = LazyLoader('six', globals(), 'six')

from torchnlp.download import download_file_maybe_extract

Expand Down

0 comments on commit 38619d9

Please sign in to comment.