Skip to content

Commit

Permalink
Merge branch 'testing' of https://github.com/DARIAH-DE/Topics into te…
Browse files Browse the repository at this point in the history
…sting
  • Loading branch information
Zelassay committed Jan 26, 2017
2 parents 73f32cf + 7d060f9 commit e2766d4
Show file tree
Hide file tree
Showing 3 changed files with 23 additions and 10 deletions.
14 changes: 11 additions & 3 deletions test/integration_test.py
Original file line number Diff line number Diff line change
@@ -1,14 +1,22 @@
from subprocess import check_call
from subprocess import check_output, STDOUT, CalledProcessError
from nose.plugins.skip import SkipTest
from pathlib import Path
import logging

project_path = Path(__file__).absolute().parent.parent


def jupyter_integration_test():
"""
Tries to run the integration test notebook using jupyter.
"""
try:
check_call(["jupyter-nbconvert", "--execute",
"IntegrationTest_v01.ipynb"])
check_output(["jupyter-nbconvert", "--execute",
str(Path(project_path, "IntegrationTest_v01.ipynb"))],
stderr=STDOUT, universal_newlines=True)
except FileNotFoundError as e:
raise SkipTest("jupyter-nbconvert not found. Cannot run integration test. "
+ str(e))
except CalledProcessError as e:
logging.error(e.output)
raise
5 changes: 4 additions & 1 deletion test/preprocessing_test.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
from dariah_topics import preprocessing as pre
from pathlib import Path

project_path = Path(__file__).absolute().parent.parent

# Funktion muss irgendwie mit test heißen
def test_document_list():

# die Funktion under test aufrufen

doclist = pre.create_document_list('corpus_txt')
doclist = pre.create_document_list(str(Path(project_path, 'corpus_txt')))

# Bedingungen auf dem Ergebnis prüfen:
assert len(doclist) == 17
Expand Down
14 changes: 8 additions & 6 deletions test/testing.py
Original file line number Diff line number Diff line change
@@ -1,9 +1,11 @@
import preprocessing
from dariah_topics import preprocessing
import glob
import os.path
from pathlib import Path

project_path = Path(__file__).absolute().parent.parent
basepath = str(project_path)

basepath = os.path.abspath(os.path.join(".", os.pardir))

#path_txt = "grenzbote_plain/*/"

path_txt = "corpus_txt"
Expand All @@ -16,11 +18,11 @@

#doc_tokens = preprocessing.tokenizer(corpus_txt)

with open(os.path.join(basepath, "tutorial_supplementals/stopwords/en"), 'r', encoding = 'utf-8') as f:
with open(os.path.join(basepath, "tutorial_supplementals/stopwords/en"), 'r', encoding = 'utf-8') as f:
stopword_list = f.read().split('\n')

stopword_list = set(stopword_list)

doc_tokens = [list(preprocessing.tokenize(txt)) for txt in list(corpus_txt)]

#print(list(doc_tokens[0]))
Expand All @@ -34,7 +36,7 @@
with open("gb_all.mm", 'a', encoding = "utf-8") as f:
f.write("%%MatrixMarket matrix coordinate real general\n")
sparse_df.to_csv( f, sep = ' ', header = None)

sparse_df_stopwords_removed = preprocessing.remove_features(sparse_df, id_types, stopword_list)

with open("gb_all_features_removed.mm", 'a', encoding = "utf-8") as f:
Expand Down

0 comments on commit e2766d4

Please sign in to comment.