pep8

DARIAH-DE · Dec 21, 2018 · 30cfe21 · 30cfe21
1 parent 3fddeae
commit 30cfe21
Show file tree

Hide file tree

Showing 6 changed files with 80 additions and 55 deletions.
diff --git a/application/database.py b/application/database.py
@@ -30,15 +30,15 @@ def _insert_into_textfiles(db, data):
         if content:
             logging.info("Insert '{}' into database...".format(title))
             db.execute("""
-                    INSERT INTO textfiles (title, content) 
+                    INSERT INTO textfiles (title, content)
                     VALUES(?, ?);
                     """, [title, content])
 
 
 def _insert_into_token_freqs(db, data):
     logging.info("Insert token frequencies into database...")
     db.execute("""
-               INSERT INTO token_freqs (content) 
+               INSERT INTO token_freqs (content)
                VALUES(?);
                """, [data])
 
@@ -75,9 +75,9 @@ def _update_textfile_sizes(db, data):
     logging.info("Update textfile sizes in database...")
     for title, size in data.items():
         db.execute("""
-                   UPDATE textfiles 
-                   SET size = ? 
-                   WHERE title = ?; 
+                   UPDATE textfiles
+                   SET size = ?
+                   WHERE title = ?;
                    """,
                    [size, title])
 
@@ -148,71 +148,71 @@ def _select_textfile_sizes(cursor):
 def _select_parameters(cursor):
     logging.info("Select parameters from database...")
     return cursor.execute("""
-                           SELECT content 
+                           SELECT content
                            FROM parameters;
                            """).fetchone()
 
 
 def _select_stopwords(cursor):
     logging.info("Select stopwords from database...")
     return cursor.execute("""
-                          SELECT content 
+                          SELECT content
                           FROM stopwords;
                           """).fetchone()[0]
 
 
 def _select_document_similarities(cursor):
     logging.info("Select document similarity matrix from database...")
     return cursor.execute("""
-                          SELECT document_similarities 
+                          SELECT document_similarities
                           FROM model;
                           """).fetchone()[0]
 
 
 def _select_topic_similarities(cursor):
     logging.info("Select topic similarity matrix from database...")
     return cursor.execute("""
-                          SELECT topic_similarities 
+                          SELECT topic_similarities
                           FROM model;
                           """).fetchone()[0]
 
 
 def _select_token_freqs(cursor):
     logging.info("Select token frequencies from database...")
     return cursor.execute("""
-                          SELECT content 
+                          SELECT content
                           FROM token_freqs;
                           """).fetchone()[0]
 
 
 def _select_textfiles(cursor):
     logging.info("Select textfiles from database...")
     return cursor.execute("""
-                   SELECT title, content 
+                   SELECT title, content
                    FROM textfiles;
                    """).fetchall()
 
 
 def _select_document_topic_distributions(cursor):
     logging.info("Select document-topic distributions from database...")
     return cursor.execute("""
-                          SELECT document_topic 
+                          SELECT document_topic
                           FROM model;
                           """).fetchone()[0]
 
 
 def _select_topics(cursor):
     logging.info("Select topics from database...")
     return cursor.execute("""
-                              SELECT topics 
+                              SELECT topics
                               FROM model;
                               """).fetchone()[0]
 
 
 def _select_textfile(cursor, title):
     logging.info("Select '{}' from database...".format(title))
     return cursor.execute("""
-                          SELECT content 
+                          SELECT content
                           FROM textfiles
                           WHERE title = ?;
                           """, [title]).fetchone()[0]
@@ -223,7 +223,7 @@ def _select_data_export(cursor):
 
     logging.info("Select model output from database...")
     model = cursor.execute("""
-                           SELECT document_topic, topics, document_similarities, topic_similarities 
+                           SELECT document_topic, topics, document_similarities, topic_similarities
                            FROM model;
                            """).fetchone()
     return model, stopwords
diff --git a/application/gui.py b/application/gui.py
@@ -16,30 +16,31 @@
 
 # This is for high DPI scaling:
 if hasattr(QtCore.Qt, "AA_EnableHighDpiScaling"):
-    QtWidgets.QApplication.setAttribute(QtCore.Qt.AA_EnableHighDpiScaling, True)
+    QtWidgets.QApplication.setAttribute(
+        QtCore.Qt.AA_EnableHighDpiScaling, True)
 if hasattr(QtCore.Qt, "AA_UseHighDpiPixmaps"):
     QtWidgets.QApplication.setAttribute(QtCore.Qt.AA_UseHighDpiPixmaps, True)
 
 
 def download_request(item):
-        """Opens a file dialog to save the ZIP archive.
-        """
-        mimetype = item.mimeType()
-        if "octet-stream" in mimetype:
-            ext = ".png"
-        elif "svg" in item.mimeType():
-            ext = ".svg"
-        elif "zip" in mimetype:
-            ext = ".zip"
-        else:
-            ext = ""
-
-        path = QtWidgets.QFileDialog.getSaveFileName(None,
-                                                    "Select destination folder and file name",
-                                                    "",
-                                                    "")[0]
-        item.setPath("{path}{ext}".format(path=path, ext=ext))
-        item.accept()
+    """Opens a file dialog to save the ZIP archive.
+    """
+    mimetype = item.mimeType()
+    if "octet-stream" in mimetype:
+        ext = ".png"
+    elif "svg" in item.mimeType():
+        ext = ".svg"
+    elif "zip" in mimetype:
+        ext = ".zip"
+    else:
+        ext = ""
+
+    path = QtWidgets.QFileDialog.getSaveFileName(None,
+                                                 "Select destination folder and file name",
+                                                 "",
+                                                 "")[0]
+    item.setPath("{path}{ext}".format(path=path, ext=ext))
+    item.accept()
 
 
 class ApplicationThread(QtCore.QThread):
@@ -83,15 +84,15 @@ def init_gui(application, port=PORT, argv=None, title=TITLE, icon=ICON):
     qtapp = QtWidgets.QApplication(argv)
     web = ApplicationThread(application, port)
     web.start()
-    
+
     def kill(application=web):
         """Kill the Flask process.
         """
         application.terminate()
 
     qtapp.aboutToQuit.connect(kill)
 
-    # Setting width and height individually based on the 
+    # Setting width and height individually based on the
     # screen resolution: 93% of the screen for width,
     # 80% for height:
     screen = qtapp.primaryScreen()
@@ -104,11 +105,11 @@ def kill(application=web):
     webview.resize(width, height)
     webview.setWindowTitle(title)
     webview.setWindowIcon(QtGui.QIcon(icon))
-    
+
     page = WebPage('http://localhost:{}'.format(port))
     page.home()
     webview.setPage(page)
-    
+
     # If the user clicks a download button, a window pops up:
     webview.page().profile().downloadRequested.connect(download_request)
 

diff --git a/application/utils.py b/application/utils.py
@@ -159,7 +159,8 @@ def get_topics(model, vocabulary, maximum=100):
     """
     logging.info("Fetching topics from topic model...")
     for distribution in model.topic_word_:
-        words = list(np.array(vocabulary)[np.argsort(distribution)][:-maximum-1:-1])
+        words = list(np.array(vocabulary)[
+                     np.argsort(distribution)][:-maximum - 1:-1])
         yield "{}, ...".format(", ".join(words[:3])), words
 
 
@@ -203,17 +204,20 @@ def export_data():
 
     logging.info("Preparing document-topic distributions...")
     document_topic = pd.read_json(document_topic, orient="index")
-    document_topic.columns = [col.replace(",", "").replace(" ...", "") for col in document_topic.columns]
+    document_topic.columns = [col.replace(",", "").replace(
+        " ...", "") for col in document_topic.columns]
 
     logging.info("Preparing topics...")
     topics = pd.read_json(topics, orient="index")
     topics.index = ["Topic {}".format(n) for n in range(topics.shape[0])]
-    topics.columns = ["Word {}".format(n) for n in  range(topics.shape[1])]
+    topics.columns = ["Word {}".format(n) for n in range(topics.shape[1])]
 
     logging.info("Preparing topic similarity matrix...")
     topic_similarities = pd.read_json(topic_similarities)
-    topic_similarities.columns = [col.replace(",", "").replace(" ...", "") for col in topic_similarities.columns]
-    topic_similarities.index = [ix.replace(",", "").replace(" ...", "") for ix in topic_similarities.index]
+    topic_similarities.columns = [col.replace(",", "").replace(
+        " ...", "") for col in topic_similarities.columns]
+    topic_similarities.index = [ix.replace(",", "").replace(
+        " ...", "") for ix in topic_similarities.index]
 
     logging.info("Preparing document similarity matrix...")
     document_similarities = pd.read_json(document_similarities)

diff --git a/application/views.py b/application/views.py
@@ -172,18 +172,22 @@ def topics(topic):
     logging.info("Get topics...")
     topics = json.loads(get_topics())
     logging.info("Get document-topic distributions...")
-    document_topic = pd.read_json(get_document_topic_distributions(), orient="index")
+    document_topic = pd.read_json(
+        get_document_topic_distributions(), orient="index")
     logging.info("Get topic similarity matrix...")
     topic_similarites = pd.read_json(get_topic_similarities())
 
     logging.info("Get related documents...")
     related_docs = document_topic[topic].sort_values(ascending=False)[:10]
     related_docs_proportions = utils.scale(related_docs, minimum=70)
-    related_docs_proportions = pd.Series(related_docs_proportions, index=related_docs.index)
-    related_docs_proportions = related_docs_proportions.sort_values(ascending=False)
+    related_docs_proportions = pd.Series(
+        related_docs_proportions, index=related_docs.index)
+    related_docs_proportions = related_docs_proportions.sort_values(
+        ascending=False)
 
     # Convert pandas.Series to a 2-D array:
-    related_docs_proportions = list(utils.series2array(related_docs_proportions))
+    related_docs_proportions = list(
+        utils.series2array(related_docs_proportions))
 
     logging.info("Get related words...")
     related_words = topics[topic][:15]
@@ -213,19 +217,24 @@ def documents(title):
     logging.info("Get textfiles...")
     text = get_textfile(title)
     logging.info("Get document-topics distributions...")
-    document_topic = pd.read_json(get_document_topic_distributions(), orient="index")
+    document_topic = pd.read_json(
+        get_document_topic_distributions(), orient="index")
     logging.info("Get document similarity matrix...")
     document_similarites = pd.read_json(get_document_similarities())
 
     logging.info("Get related topics...")
-    related_topics = document_topic.loc[title].sort_values(ascending=False) * 100
+    related_topics = document_topic.loc[title].sort_values(
+        ascending=False) * 100
     distribution = list(related_topics.to_dict().items())
 
     logging.info("Get similar documents...")
-    similar_docs = document_similarites[title].sort_values(ascending=False)[1:4]
+    similar_docs = document_similarites[title].sort_values(ascending=False)[
+        1:4]
 
-    logging.debug("Use only the first 10000 characters (or less) from document...")
-    text = text if len(text) < 10000 else "{}... This was an excerpt of the original text.".format(text[:10000])
+    logging.debug(
+        "Use only the first 10000 characters (or less) from document...")
+    text = text if len(
+        text) < 10000 else "{}... This was an excerpt of the original text.".format(text[:10000])
 
     logging.debug("Split paragraphs...")
     text = text.split("\n\n")
@@ -379,6 +388,7 @@ def handle_http_exception(e):
     """
     return error()
 
+
 for code in werkzeug.exceptions.default_exceptions:
     web.errorhandler(code)(handle_http_exception)
 

diff --git a/application/workflow.py b/application/workflow.py
@@ -43,8 +43,10 @@ def wrapper():
         topics, descriptors, document_topic = get_model_output(model, dtm)
         logging.info("Got model output.")
         # 4. Calculate similarities:
-        topic_similarities, document_similarities = get_similarities(document_topic)
-        logging.info("Successfully calculated topic and document similarities.")
+        topic_similarities, document_similarities = get_similarities(
+            document_topic)
+        logging.info(
+            "Successfully calculated topic and document similarities.")
 
         data = {"document_topic": document_topic.to_json(orient="index", force_ascii=False),
                 "topics": json.dumps(topics, ensure_ascii=False),

diff --git a/tests/test_utils.py b/tests/test_utils.py
@@ -19,14 +19,17 @@ def test_init_app():
     app = utils.init_app(TEST_STRING)
     assert app.name == TEST_STRING
 
+
 def test_init_logging():
     # TODO
     pass
 
+
 def test_init_db():
     # TODO
     pass
 
+
 def test_format_logging():
     a = "n_documents: 1"
     b = "vocab_size: 1"
@@ -55,32 +58,37 @@ def test_remove_markup():
         text = "<tag>{}</anothertag>".format(TEST_STRING)
         utils.remove_markup(text)
 
+
 def test_get_documents():
     textfiles = [("A", "This is a document.")]
     documents = list(utils.get_documents(textfiles))
     for document in documents:
         assert document.title == "A"
         assert document.text == "This is a document."
 
+
 def test_get_stopwords():
     # TODO
     pass
 
+
 def test_get_data():
     # TODO
     pass
 
+
 def test_get_topics():
     # TODO
     pass
 
+
 def test_get_document_topic():
     # TODO
     pass
 
+
 def test_get_cosine():
     matrix = np.array([[1, 2], [1, 3]])
     descriptors = ["A", "B"]
     similarites = utils.get_cosine(matrix, descriptors)
     assert similarites.sum().sum() == 3.9611613513818402
-