Skip to content

Commit

Permalink
Merge branch 'feature/package' into testing
Browse files Browse the repository at this point in the history
Packaging and test framework.
  • Loading branch information
thvitt committed Jan 23, 2017
2 parents df78fba + a9eef3c commit 7184211
Show file tree
Hide file tree
Showing 13 changed files with 122 additions and 34 deletions.
8 changes: 8 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -5,3 +5,11 @@ visualizations
.project
.settings/
.pydevproject
.eggs
*.egg-info
*.pyc
build
dist
.coverage
coverage.xml
nosetests.xml
8 changes: 4 additions & 4 deletions IntegrationTest_v01.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,9 +9,9 @@
},
"outputs": [],
"source": [
"import preprocessing as pre\n",
"import visualization as visual\n",
"import mallet as mal\n",
"from dariah_topics import preprocessing as pre\n",
"from dariah_topics import visualization as visual\n",
"from dariah_topics import mallet as mal\n",
"# Warning is Gensim related"
]
},
Expand Down Expand Up @@ -790,7 +790,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.5.2"
"version": "3.5.3rc1"
}
},
"nbformat": 4,
Expand Down
7 changes: 7 additions & 0 deletions dariah_topics/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
"""
The `dariah_topics` package currently offers three modules:
* `dariah_topics.preprocessing` contains preprocessing code
* `dariah_topics.visualization` offers visualization stuff that depends on PyLDAvis
* `dariah_topics.mallet` provides a wrapper that calls mallet
"""
File renamed without changes.
File renamed without changes.
42 changes: 23 additions & 19 deletions preprocessing.py → dariah_topics/preprocessing.py
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,7 @@ def read_from_csv(doclist, columns=['ParagraphId', 'TokenId', 'Lemma', 'CPOS', '
log.info("Accessing CSV documents ...")
doc_csv = df[columns]
yield doc_csv

def get_labels(doclist):
"""Creates a list of document labels.
Expand All @@ -112,9 +112,9 @@ def get_labels(doclist):
Yields:
Iterable: Document labels.
ToDo:
Replace this function with function from Toolbox
Replace this function with function from Toolbox
"""
log.info("Creating document labels ...")
for doc in doclist:
Expand Down Expand Up @@ -158,6 +158,10 @@ def tokenize(doc_txt, expression=regular_expression, simple=False):
Yields:
Tokens
Example:
>>> list(tokenize("I am an example text."))
['i', 'am', 'an', 'example', 'text']
"""
doc_txt = regex.sub("\.", "", doc_txt.lower())
if simple == False:
Expand Down Expand Up @@ -255,14 +259,14 @@ def create_large_TF_matrix(doc_labels, doc_tokens):
"""create_large_TF_matrix
Note:
Args:
Returns:
ToDo:
ToDo:
"""

typeset = set()
Expand Down Expand Up @@ -293,14 +297,14 @@ def create_large_counter(doc_labels, doc_tokens, termdoc_matrix):
"""create_large_TF_matrix
Note:
Args:
Returns:
ToDo:
ToDo:
"""

largecounter = defaultdict(dict)
Expand All @@ -314,14 +318,14 @@ def create_sparse_index(largecounter):
"""create_large_TF_matrix
Note:
Args:
Returns:
ToDo:
ToDo:
"""

#tuples = list(zip(largecounter.keys(), largecounter.values().keys()))
Expand Down Expand Up @@ -359,14 +363,14 @@ def populate_two(sparse_index, largecounter):
"""create_large_TF_matrix
Note:
Args:
Returns:
ToDo:
ToDo:
"""

#sparse_df_filled_test = pd.Series(index=sparse_index).fillna(int(0))
Expand Down
File renamed without changes.
4 changes: 4 additions & 0 deletions requirements-dev.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
-r requirements.txt
nose
nosexcover
jupyter
12 changes: 1 addition & 11 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,11 +1 @@
pandas>=0.19.2
regex>=2017.01.14
gensim>=0.13.2
matplotlib==1.5.3
numpy>=1.3
scipy>=0.7
werkzeug>=0.11.15
flask>=0.11.1
pyLDAvis>=2.0.0

-e git+https://github.com/thvitt/cophi-toolbox#egg=cophi_toolbox
-e .[vis,demonstrator]
5 changes: 5 additions & 0 deletions setup.cfg
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
[nosetests]
with-doctest=1
with-xunit=1
with-xcoverage=true
cover-package=dariah_topics
38 changes: 38 additions & 0 deletions setup.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
#!/usr/bin/env python3

from setuptools import setup, find_packages

setup(
name='dariah_topics',
version='0.2.0dev0',
description='DARIAH Topic Modelling',
# url
author="DARIAH-DE Wuerzburg Group",
author_email="pielstroem@biozentrum.uni-wuerzburg.de",
# license
classifiers=[
'Development Status :: 3 - Alpha',
'Programming Language :: Python :: 3.4',
'Programming Language :: Python :: 3.5'
],
# keywords
packages=find_packages(exclude=['corpus_*', 'docs', 'tests']),
install_requires=[
'pandas>=0.19.2',
'regex>=2017.01.14',
'gensim>=0.13.2',
'matplotlib==1.5.3',
'numpy>=1.3',
'scipy>=0.7',
],
# pip install -e .[demonstrator,vis]
extras_require={
'demonstrator': [
'werkzeug>=0.11.15',
'flask>=0.11.1'
],
'vis': [
'pyLDAvis>=2.0.0', # to feature 'pyldavis'
]
}
)
14 changes: 14 additions & 0 deletions test/integration_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from subprocess import check_call
from nose.plugins.skip import SkipTest


def jupyter_integration_test():
"""
Tries to run the integration test notebook using jupyter.
"""
try:
check_call(["jupyter-nbconvert", "--execute",
"IntegrationTest_v01.ipynb"])
except FileNotFoundError as e:
raise SkipTest("jupyter-nbconvert not found. Cannot run integration test. "
+ str(e))
18 changes: 18 additions & 0 deletions test/preprocessing_test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
from dariah_topics import preprocessing as pre

# Funktion muss irgendwie mit test heißen
def test_document_list():

# die Funktion under test aufrufen

doclist = pre.create_document_list('corpus_txt')

# Bedingungen auf dem Ergebnis prüfen:
assert len(doclist) == 17

return doclist

def test_document_labels():
doclist = test_document_list()
labels = pre.get_labels(doclist)
assert len(list(labels)) == len(doclist)

0 comments on commit 7184211

Please sign in to comment.