Skip to content

Commit

Permalink
Merge branch 'testing' of https://github.com/DARIAH-DE/Topics into te…
Browse files Browse the repository at this point in the history
…sting
  • Loading branch information
sinabock committed Feb 22, 2017
2 parents 40606a8 + bff3303 commit b2054b0
Show file tree
Hide file tree
Showing 13 changed files with 406 additions and 217,206 deletions.
321 changes: 166 additions & 155 deletions Introduction.ipynb

Large diffs are not rendered by default.

573 changes: 232 additions & 341 deletions Mallet.ipynb

Large diffs are not rendered by default.

317 changes: 0 additions & 317 deletions dariah_topics/mallet.py

This file was deleted.

2 changes: 1 addition & 1 deletion dariah_topics/visualization.py
Original file line number Diff line number Diff line change
Expand Up @@ -262,7 +262,7 @@ def doc_topic_heatmap(data_frame):
Returns:
"""
data_frame = data_frame.transpose()
data_frame = data_frame.transpose().sort_index()
doc_labels = list(data_frame.index)
topic_labels = list(data_frame)
if len(doc_labels) > 20 or len(topic_labels) > 20: plt.figure(figsize=(20,20)) # if many items, enlarge figure
Expand Down
15 changes: 6 additions & 9 deletions demonstrator/demonstrator.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,9 +13,11 @@
https://github.com/DARIAH-DE
"""

import matplotlib
matplotlib.use('Agg')
from dariah_topics import preprocessing
from dariahs_topics import visualization
from dariahs_topics import mallet
from dariah_topics import visualization
from dariah_topics import mallet
from flask import Flask, request, render_template, send_file
from gensim.models import LdaModel
from gensim.corpora import MmCorpus
Expand Down Expand Up @@ -68,9 +70,9 @@ def upload_file():
corpus[label] = tokens
if 'mallet' in lda:
print("Creating MALLET binary ...")
mallet.create_mallet_model("./mallet_output", "./tmp_files", './mallet/bin/mallet')
mallet.create_mallet_model("./mallet_output", "./tmp_files", 'mallet')
print("Training MALLET LDA model ...")
mallet.create_mallet_output('./mallet_output/malletModel.mallet', './mallet_output', './mallet/bin/mallet')
mallet.create_mallet_output('./mallet_output/malletModel.mallet', './mallet_output', 'mallet')
shutil.rmtree('./tmp_files')
df = mallet.show_topics_keys('./mallet_output')
doc_topic = mallet.show_docTopicMatrix('./mallet_output')
Expand Down Expand Up @@ -127,11 +129,6 @@ def upload_file():
# Todo: replace by DataFrame.to_html():
print("Accessing topics for HTML table ...")
df = visualization.topicwords_in_df(model)
import regex
pattern = regex.compile(r'\p{L}+\p{P}?\p{L}+')
topics = []
for n, topic in enumerate(model.show_topics()):
topics.append((n+1, pattern.findall(topic[1])))
print("Rendering result.html ...")

return render_template('result.html', tables=[df.to_html(classes='df')])
Expand Down
2 changes: 1 addition & 1 deletion demonstrator/templates/result.html
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@
<div class="span10 offset1 main-content-wrapper no-margin">
<div id="content" class="primary-area">
<h1>Demonstrator: Topic Modeling</h1>
<div id="contentInner" style="text-align:center; margin:0 auto;">
<div id="contentInner" style="text-align:justify;">
{% for table in tables %}
{{ table|safe }}
{% endfor %}
Expand Down
18 changes: 0 additions & 18 deletions tutorial_supplementals/mallet_output/doc_topics.txt

This file was deleted.

0 comments on commit b2054b0

Please sign in to comment.