Skip to content

Commit

Permalink
Remove temp folders after kill
Browse files Browse the repository at this point in the history
  • Loading branch information
severinsimmler committed May 31, 2018
1 parent 6c6dda6 commit 89ad9ec
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 26 deletions.
7 changes: 2 additions & 5 deletions application/config.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
import application
import pathlib
import sys
import flask
Expand All @@ -9,11 +10,7 @@ def create_app(**kwargs):
PyInstaller, the paths to the template and static folder are adjusted
accordingly.
"""
tempdir = tempfile.gettempdir()
dumpdir = pathlib.Path(tempdir, "topicsexplorerdump")
archivedir = pathlib.Path(tempdir, "topicsexplorerdata")
dumpdir.mkdir(exist_ok=True)
archivedir.mkdir(exist_ok=True)
dumpdir, archivedir = application.utils.get_tempdirs(make=True)

if getattr(sys, "frozen", False):
root = pathlib.Path(sys._MEIPASS)
Expand Down
19 changes: 16 additions & 3 deletions application/gui.py
Original file line number Diff line number Diff line change
Expand Up @@ -64,7 +64,7 @@ def acceptNavigationRequest(self, url, kind, is_main_frame):
return super(WebPage, self).acceptNavigationRequest(url, kind, is_main_frame)
"""

def init_gui(application, port=PORT, argv=None, title=TITLE, icon=ICON):
def init_gui(flask_app, port=PORT, argv=None, title=TITLE, icon=ICON):
"""
Initializes the Qt web engine, starts the web application, and loads the
main page.
Expand All @@ -74,9 +74,22 @@ def init_gui(application, port=PORT, argv=None, title=TITLE, icon=ICON):

# Starting the Flask application.
qtapp = PyQt5.QtWidgets.QApplication(argv)
webapp = ApplicationThread(application, port)
webapp = ApplicationThread(flask_app, port)
webapp.start()
qtapp.aboutToQuit.connect(webapp.terminate)

def cleanup(webapp=webapp):
"""
Killing the Flask process and removing temporary
folders after user closed the window.
"""
webapp.terminate()
dumpdir, archivedir = application.utils.get_tempdirs()
application.utils.unlink_content(dumpdir)
application.utils.unlink_content(archivedir)
dumpdir.rmdir()
archivedir.rmdir()

qtapp.aboutToQuit.connect(cleanup)

# Setting width and height individually based on the
# screen resolution: 93% of the screen for width,
Expand Down
37 changes: 21 additions & 16 deletions application/modeling.py
Original file line number Diff line number Diff line change
Expand Up @@ -66,20 +66,25 @@ def workflow(tempdir, archive_dir):
for file in user_input["files"]:
filename = pathlib.Path(werkzeug.utils.secure_filename(file.filename))
percent += 1
yield "running", "Reading {0} ...".format(filename.stem), percent / full * 100, "", "", "", "", ""
yield "running", "Reading {0} ...".format(filename.stem[:20]), percent / full * 100, "", "", "", "", ""
text = file.read().decode("utf-8")
if filename.suffix != ".txt":
yield "running", "Removing markup from text ...", percent / full * 100, "", "", "", "", ""
text = application.utils.remove_markup(text)
yield "running", "Tokenizing {0} ...".format(filename.stem), percent / full * 100, "", "", "", "", ""
yield "running", "Tokenizing {0} ...".format(filename.stem[:20]), percent / full * 100, "", "", "", "", ""
tokens = list(dariah_topics.preprocessing.tokenize(text))
tokenized_corpus[filename.stem] = tokens
parameter["Corpus size (raw), in tokens"] += len(tokens)

excerpt_int = random.randint(0, len(tokenized_corpus) - 1)
excerpt = tokenized_corpus.iloc[excerpt_int]
token_int = random.randint(1, len(excerpt) - 61)
excerpt = "..." + " ".join(excerpt[token_int:token_int + 60]) + "..."
text = text.replace("\n", "")
text = text.replace("\r", "")
text = text.replace("\'", "")
text = text.replace("\"", "")
token_int = random.randint(0, len(text) - 251)
try:
excerpt = "...{}...".format(text[token_int:token_int + 250])
except IndexError:
excerpt = ""

percent += 1
yield "running", "Creating document-term matrix ...", percent / full * 100, excerpt, "", "", "", ""
Expand Down Expand Up @@ -108,7 +113,7 @@ def workflow(tempdir, archive_dir):
cleaning = "removed <b>{0} words</b>, based on an external stopwords list".format(len(stopwords))

percent += 1
yield "running", "Determining hapax legomena from corpus ...", percent / full * 100, "", "", "", "", ""
yield "running", "Determining hapax legomena ...", percent / full * 100, "", "", "", "", ""
hapax_legomena = dariah_topics.preprocessing.find_hapax_legomena(document_term_matrix)
features = set(stopwords).union(hapax_legomena)
features = [token for token in features if token in document_term_matrix.columns]
Expand Down Expand Up @@ -165,7 +170,7 @@ def workflow(tempdir, archive_dir):
topics.index = ["Topic {0}".format(i) for i in range(1, user_input["num_topics"] + 1)]

percent += 1
yield "running", "Accessing document-topics distribution ...", percent / full * 100, "", "", "", "", ""
yield "running", "Accessing distributions ...", percent / full * 100, "", "", "", "", ""
document_topics = dariah_topics.postprocessing.show_document_topics(model=model,
topics=topics,
document_labels=document_labels)
Expand All @@ -191,17 +196,17 @@ def workflow(tempdir, archive_dir):
heatmap = fig.interactive_heatmap(height=height,
sizing_mode="scale_width",
tools="hover, pan, reset, wheel_zoom, zoom_in, zoom_out")
#bokeh.plotting.output_file(str(pathlib.Path(tempdir, "heatmap.html")))
#bokeh.plotting.save(heatmap)
bokeh.plotting.output_file(str(pathlib.Path(tempdir, "heatmap.html")))
bokeh.plotting.save(heatmap)

heatmap_script, heatmap_div = bokeh.embed.components(heatmap)

percent += 1
yield "running", "Creating boxplot ...", percent / full * 100, "", "", "", "", ""
corpus_boxplot = application.utils.boxplot(corpus_stats)
corpus_boxplot_script, corpus_boxplot_div = bokeh.embed.components(corpus_boxplot)
#bokeh.plotting.output_file(str(pathlib.Path(tempdir, "corpus_statistics.html")))
#bokeh.plotting.save(corpus_boxplot)
bokeh.plotting.output_file(str(pathlib.Path(tempdir, "corpus_statistics.html")))
bokeh.plotting.save(corpus_boxplot)

if document_topics.shape[1] < 15:
height = 580
Expand All @@ -212,17 +217,17 @@ def workflow(tempdir, archive_dir):
yield "running", "Creating barcharts ...", percent / full * 100, "", "", "", "", ""
topics_barchart = application.utils.barchart(document_topics, height=height, topics=topics)
topics_script, topics_div = bokeh.embed.components(topics_barchart)
#bokeh.plotting.output_file(str(pathlib.Path(tempdir, "topics_barchart.html")))
#bokeh.plotting.save(topics_barchart)
bokeh.plotting.output_file(str(pathlib.Path(tempdir, "topics_barchart.html")))
bokeh.plotting.save(topics_barchart)

if document_topics.shape[0] < 15:
height = 580
else:
height = document_topics.shape[0] * 25
documents_barchart = application.utils.barchart(document_topics.T, height=height)
documents_script, documents_div = bokeh.embed.components(documents_barchart)
#bokeh.plotting.output_file(str(pathlib.Path(tempdir, "document_topics_barchart.html")))
#bokeh.plotting.save(documents_barchart)
bokeh.plotting.output_file(str(pathlib.Path(tempdir, "document_topics_barchart.html")))
bokeh.plotting.save(documents_barchart)

end = time.time()
passed_time = round((end - start) / 60)
Expand Down
2 changes: 1 addition & 1 deletion application/templates/modeling.html
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@
'“With recent scientific advances in support of unsupervised machine learning topic models promise to be an important component for summarizing and understanding our growing digitized archive of information.” (<a href="http://www.cs.columbia.edu/~blei/papers/Blei2012.pdf">Blei 2012</a>)'
]

var randInt = Math.floor(Math.random() * (6 - 0 + 1)) + 0;
var randInt = Math.floor(Math.random() * 7);
document.getElementById('right-box-content').innerHTML = blei[randInt];
</script>

Expand Down
16 changes: 15 additions & 1 deletion application/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import lxml
import queue
import socket
import tempfile
import random


Expand Down Expand Up @@ -196,7 +197,7 @@ def barchart(document_topics, height, topics=None, script=JAVASCRIPT, tools=TOOL
else:
what = "document"
textfield = bokeh.models.widgets.AutocompleteInput(completions=document_topics.index.tolist(),
placeholder="Type a {} name".format(what),
placeholder="Type a {} and press enter".format(what),
css_classes=["customTextInput"],
callback=callback)
return bokeh.layouts.row(fig, textfield, sizing_mode="scale_width")
Expand Down Expand Up @@ -275,3 +276,16 @@ def unlink_content(directory, pattern="*"):
for p in pathlib.Path(directory).rglob(pattern):
if p.is_file():
p.unlink()


def get_tempdirs(make=False):
"""
Gets paths (and makes) temporary folders.
"""
tempdir = tempfile.gettempdir()
dumpdir = pathlib.Path(tempdir, "topicsexplorerdump")
archivedir = pathlib.Path(tempdir, "topicsexplorerdata")
if make:
dumpdir.mkdir(exist_ok=True)
archivedir.mkdir(exist_ok=True)
return dumpdir, archivedir

0 comments on commit 89ad9ec

Please sign in to comment.