cleanup

ContinuumIO · Sep 28, 2015 · 9d2b9cb · 9d2b9cb
1 parent 924bd9b
commit 9d2b9cb
Show file tree

Hide file tree

Showing 9 changed files with 18 additions and 35 deletions.
diff --git a/topik/importers.py b/topik/importers.py
diff --git a/topik/intermediaries/digested_document_collection.py b/topik/intermediaries/digested_document_collection.py
@@ -1,7 +1,5 @@
-from itertools import tee
-
-from gensim.interfaces import CorpusABC
 from gensim.corpora.dictionary import Dictionary
+from gensim.interfaces import CorpusABC
 
 from .raw_data import load_persisted_corpus
 

diff --git a/topik/intermediaries/raw_data.py b/topik/intermediaries/raw_data.py
@@ -3,12 +3,12 @@
 Elasticsearch.  The class(es) defined here are fed into the preprocessing step.
 """
 
+from abc import ABCMeta, abstractmethod, abstractproperty
 import logging
 import time
-from abc import ABCMeta, abstractmethod, abstractproperty
 
-from six import with_metaclass
 from elasticsearch import Elasticsearch, helpers
+from six import with_metaclass
 
 from topik.intermediaries.persistence import Persistor
 

diff --git a/topik/tests/test_intermediaries.py b/topik/tests/test_intermediaries.py
diff --git a/topik/tests/test_models.py b/topik/tests/test_models.py
@@ -1,8 +1,5 @@
 import os
 import unittest
-from abc import ABCMeta, abstractmethod
-
-from six import with_metaclass
 
 from topik.readers import read_input
 from topik.preprocessing import preprocess
@@ -16,7 +13,7 @@
 MODEL_SAVE_FILENAME = os.path.join(module_path, 'test.model')
 
 
-class _ModelBase(with_metaclass(ABCMeta)):
+class _ModelBase(object):
     def setUp(self):
         raw_data = read_input(
                 source=os.path.join(module_path, 'data/test_data_json_stream.json'),

diff --git a/topik/tests/test_readers.py b/topik/tests/test_readers.py
@@ -1,5 +1,4 @@
 import unittest
-from functools import partial
 
 import nose.tools as nt
 import elasticsearch

diff --git a/topik/tests/test_tokenizers.py b/topik/tests/test_tokenizers.py
@@ -2,12 +2,12 @@
 import unittest
 
 from topik.readers import read_input
-from topik.intermediaries.raw_data import ElasticSearchCorpus, _get_hash_identifier
 from topik.tokenizers import tokenizer_methods, find_entities, collect_bigrams_and_trigrams
 
 # sample data files are located in the same folder
 module_path = os.path.dirname(__file__)
 
+
 class TestTokenizers(unittest.TestCase):
     def setUp(self):
         self.solution_simple_tokenizer_test_data_1 = [
@@ -81,10 +81,8 @@ def setUp(self):
             u'properties', u'sol', u'gel', u'method', u'dna', u'easy',
             u'method', u'biomedical', u'applications']
 
-        self.data_json_stream_path = os.path.join(module_path,
-                                            'data/test_data_json_stream.json')
-        self.data_large_json_path = os.path.join(module_path,
-                                            'data/test_data_large_json.json')
+        self.data_json_stream_path = os.path.join(module_path, 'data/test_data_json_stream.json')
+        self.data_large_json_path = os.path.join(module_path, 'data/test_data_large_json.json')
         assert os.path.exists(self.data_json_stream_path)
         assert os.path.exists(self.data_large_json_path)
 
@@ -93,10 +91,9 @@ def test_simple_tokenizer(self):
                 source=self.data_json_stream_path,
                 content_field="abstract",
                 output_type="dictionary")
-        id, text = next(iter(raw_data))
+        _, text = next(iter(raw_data))
         doc_tokens = tokenizer_methods["simple"](text)
-        self.assertEqual(doc_tokens,
-                         self.solution_simple_tokenizer_test_data_json_stream)
+        self.assertEqual(doc_tokens, self.solution_simple_tokenizer_test_data_json_stream)
 
     def test_collocations_tokenizer(self):
         raw_data = read_input(
@@ -106,21 +103,19 @@ def test_collocations_tokenizer(self):
         bigrams, trigrams = collect_bigrams_and_trigrams(raw_data,
                                                          min_bigram_freq=2,
                                                          min_trigram_freq=2)
-        id, text = next(iter(raw_data))
+        _, text = next(iter(raw_data))
         doc_tokens = tokenizer_methods["collocation"](text, bigrams, trigrams)
-        self.assertEqual(doc_tokens,
-                     self.solution_collocations_tokenizer_test_data_json_stream)
+        self.assertEqual(doc_tokens, self.solution_collocations_tokenizer_test_data_json_stream)
 
     def test_entities_tokenizer_json_stream(self):
         raw_data = read_input(
                 source=self.data_json_stream_path,
                 content_field="abstract",
                 output_type="dictionary")
         entities = find_entities(raw_data, freq_min=1)
-        id, text = next(iter(raw_data))
+        _, text = next(iter(raw_data))
         doc_tokens = tokenizer_methods["entities"](text, entities)
-        self.assertEqual(doc_tokens,
-                         self.solution_entities_tokenizer_test_data_json_stream)
+        self.assertEqual(doc_tokens, self.solution_entities_tokenizer_test_data_json_stream)
 
     def test_mixed_tokenizer(self):
         raw_data = read_input(
@@ -130,8 +125,7 @@ def test_mixed_tokenizer(self):
         entities = find_entities(raw_data)
         id, text = next(iter(raw_data))
         doc_tokens = tokenizer_methods["mixed"](text, entities)
-        self.assertEqual(doc_tokens,
-                         self.solution_mixed_tokenizer_test_data_json_stream)
+        self.assertEqual(doc_tokens, self.solution_mixed_tokenizer_test_data_json_stream)
 
 
 if __name__ == '__main__':

diff --git a/topik/tests/test_viz.py b/topik/tests/test_viz.py
@@ -5,6 +5,7 @@
 
 module_path = os.path.dirname(__file__)
 
+
 class TestTokenizers(unittest.TestCase):
 
     def test_termite(self):

diff --git a/topik/tokenizers.py b/topik/tokenizers.py
@@ -1,14 +1,14 @@
 from __future__ import absolute_import, print_function
 
-import logging
 import itertools
+import logging
 import re
 
+import gensim
+from gensim.parsing.preprocessing import STOPWORDS
 from nltk.collocations import TrigramCollocationFinder
 from nltk.metrics import BigramAssocMeasures, TrigramAssocMeasures
 from textblob import TextBlob
-import gensim
-from gensim.parsing.preprocessing import STOPWORDS
 
 # imports used only for doctests
 from topik.tests import test_data_path