Skip to content

Commit

Permalink
doc: extend docstrings
Browse files Browse the repository at this point in the history
  • Loading branch information
severinsimmler committed Oct 24, 2018
1 parent 71e4db0 commit 2fa32e5
Show file tree
Hide file tree
Showing 6 changed files with 351 additions and 236 deletions.
7 changes: 4 additions & 3 deletions application/constants.py
@@ -1,6 +1,7 @@
from pathlib import Path
import tempfile
import pathlib


TEMPDIR = tempfile.gettempdir()
DATABASE_URI = str(pathlib.Path(TEMPDIR, "topicsexplorer.db"))
LOGFILE = str(pathlib.Path(TEMPDIR, "topicsexplorer.log"))
DATABASE = str(Path(TEMPDIR, "topicsexplorer.db"))
LOGFILE = str(Path(TEMPDIR, "topicsexplorer.log"))
117 changes: 117 additions & 0 deletions application/database.py
@@ -0,0 +1,117 @@
import flask
import sqlite3
import utils
import pandas as pd
import constants
import json



def get_db():
"""Create connection to SQLite database.
"""
if "db" not in flask.g:
flask.g.db = sqlite3.connect(constants.DATABASE)
return flask.g.db


def close_db(e=None):
"""Close connection to SQLite database.
"""
db = flask.g.pop("db", None)
if db is not None:
db.close()


def _insert_into_textfiles(db, data):
"""Insert data into textfiles table.
"""
for textfile in data:
title, content = utils.load_textfile(textfile)
db.execute("""
INSERT INTO textfiles (title, content)
VALUES(?, ?);
""", [title, content])


def insert_into(table, data):
"""Insert data into database.
"""
db = get_db()
if table in {"textfiles"}:
_insert_into_textfiles(db, data)
elif table in {"model"}:
_insert_into_model(db, data)
db.commit()
close_db()


def _insert_into_model(db, data):
"""Insert data into model table.
"""
db.execute("""
INSERT INTO model (doc_topic, topics, doc_sim, topic_sim)
VALUES(?, ?, ?, ?);
""",
[data["doc_topic"], data["topics"],
data["doc_sim"], data["topic_sim"]])


def select(value, **kwargs):
"""Select values from database.
"""
db = get_db()
cursor = db.cursor()
if value in {"textfiles"}:
return _select_textfiles(cursor)
elif value in {"doc_topic"}:
return _select_doc_topic(cursor)
elif value in {"topic-overview"}:
return _select_topic_overview(cursor)
elif value in {"document-overview"}:
return _select_document_overview(cursor, **kwargs)


def _select_textfiles(cursor):
"""Select textfiles from database.
"""
cursor.execute("""
SELECT title, content
FROM textfiles;
""")
return cursor.fetchall()


def _select_doc_topic(cursor):
"""Select document-topic matrix form database.
"""
response = cursor.execute("""
SELECT doc_topic
FROM model;
""").fetchone()[0]
return pd.read_json(response)


def _select_topic_overview(cursor):
"""Select values for the topic overview page.
"""
doc_topic, topics, topic_sim = cursor.execute("""
SELECT doc_topic, topics, topic_sim
FROM model;
""").fetchone()
return pd.read_json(doc_topic), json.loads(topics), pd.read_json(topic_sim)


def _select_document_overview(cursor, title):
"""Select values for the document overview page.
"""
text = cursor.execute("""
SELECT content
FROM textfiles
WHERE title is ?;
""", [title]).fetchone()[0]
doc_topic, topics, doc_sim = cursor.execute("""
SELECT doc_topic, topics, doc_sim
FROM model;
""").fetchone()
return text, pd.read_json(doc_topic).T, json.loads(topics), pd.read_json(doc_sim)
8 changes: 4 additions & 4 deletions application/schema.sql
Expand Up @@ -2,15 +2,15 @@ DROP TABLE IF EXISTS textfiles;
DROP TABLE IF EXISTS model;

CREATE TABLE textfiles (
id INTEGER PRIMARY KEY AUTOINCREMENT,
id INTEGER PRIMARY KEY,
title TEXT UNIQUE,
content TEXT
);

CREATE TABLE model (
id INTEGER PRIMARY KEY AUTOINCREMENT,
id INTEGER PRIMARY KEY,
doc_topic TEXT,
topics TEXT,
doc_similarities TEXT,
topic_similarities TEXT
doc_sim TEXT,
topic_sim TEXT
);
105 changes: 53 additions & 52 deletions application/testing.py
@@ -1,86 +1,87 @@
#!/usr/bin/env python3

import operator
import pathlib
import logging
import json
import sqlite3
import multiprocessing

import flask
import pandas as pd
import numpy as np
import lda

import utils
import workflow
import database

import flask

app, process = utils.init_app()

app, process = utils.init_app("topicsexplorer")

@app.route("/")
def index():
"""Home page.
"""
if process.is_alive():
process.terminate()
utils.init_logging()
utils.init_logging(logging.DEBUG)
utils.init_db(app)
return flask.render_template("index.html")

@app.route("/help")
def help():
"""Help page.
"""
return flask.render_template("help.html")

@app.route("/api/status")
def status():
"""API: Current modeling status.
"""
return utils.get_status()

@app.route("/modeling", methods=["POST"])
def modeling():
process = multiprocessing.Process(target=utils.workflow)
process = multiprocessing.Process(target=workflow.wrapper)
process.start()
return flask.render_template("modeling.html")

@app.route("/help")
def help():
return flask.render_template("help.html")

@app.route("/topic-presence")
def topic_presence():
presence = list(utils.get_topic_presence())
return flask.render_template("topic-presence.html", presence=presence)

@app.route("/topics/<topic>")
def topics(topic):
doc_topic = utils.select_doc_topic()
topicss = utils.select_topics()
topic1 = doc_topic[topic].sort_values(ascending=False)[:30]
related_docs = list(topic1.index)
doc_topic, topics, topic_sim = database.select("topic-overview")
# Get related documents:
related_docs = doc_topic[topic].sort_values(ascending=False)[:30]
related_docs = list(related_docs.index)

# Get related words:
loc = doc_topic.columns.get_loc(topic)
related_words = topicss[loc][:20]
s = utils.scale(topic1)
sim = pd.DataFrame(utils.get_similarities(doc_topic.values))[loc]
sim.index = doc_topic.columns
sim = sim.sort_values(ascending=False)[1:4]
similar_topics = [", ".join(topicss[doc_topic.columns.get_loc(topic)][:3]) for topic in sim.index]
related_words = topics[loc][:20]

return flask.render_template("topic.html", topic=", ".join(related_words[:3]), similar_topics=similar_topics, related_words=related_words, related_documents=related_docs)
# Get similar topics:
similar_topics = topic_sim[topic].sort_values(ascending=False)[1:4]
similar_topics = list(similar_topics.index)
return flask.render_template("topic.html",
topic=topic,
similar_topics=similar_topics,
related_words=related_words,
related_documents=related_docs)

@app.route("/documents/<title>")
def documents(title):
doc_topic = utils.select_doc_topic().T
text = utils.select_document(title).split("\n\n")
topic1 = doc_topic[title].sort_values(ascending=False) * 100
distribution = list(topic1.to_dict().items())
loc = doc_topic.columns.get_loc(title)
sim = pd.DataFrame(utils.get_similarities(doc_topic.values))[loc]
sim.index = doc_topic.columns
sim = sim.sort_values(ascending=False)[1:4]
similar_topics = list(sim.index)
related_topics = topic1[:20].index


return flask.render_template("document.html", title=title, text=text[:4], distribution=distribution, similar_documents=similar_topics, related_topics=related_topics)

@app.route("/api/status")
def status():
return utils.get_status()

@app.route("/api/textfiles/<id>", methods=["GET"])
def get_textfile():
cursor = get_db("database.db").cursor()
res = cur.execute("SELECT * FROM textfiles;")
text, doc_topic, topics, doc_sim = database.select("document-overview", title=title)
# TODO: how to deal with this?
text = text.split("\n\n")

# Get related topics:
related_topics = doc_topic[title].sort_values(ascending=False) * 100
distribution = list(related_topics.to_dict().items())
related_topics = related_topics[:20].index

# Get similar documents:
similar_docs = doc_sim[title].sort_values(ascending=False)[1:4]
similar_docs = list(similar_docs.index)
return flask.render_template("document.html",
title=title,
text=text,
distribution=distribution,
similar_documents=similar_docs,
related_topics=related_topics)

@app.after_request
def add_header(r):
Expand Down

0 comments on commit 2fa32e5

Please sign in to comment.