Merge branch 'feature/package' into testing

Packaging and test framework.
DARIAH-DE · Jan 23, 2017 · 7184211 · 7184211
2 parents df78fba + a9eef3c
commit 7184211
Show file tree

Hide file tree

Showing 13 changed files with 122 additions and 34 deletions.
diff --git a/.gitignore b/.gitignore
@@ -5,3 +5,11 @@ visualizations
 .project
 .settings/
 .pydevproject
+.eggs
+*.egg-info
+*.pyc
+build
+dist
+.coverage
+coverage.xml
+nosetests.xml
diff --git a/IntegrationTest_v01.ipynb b/IntegrationTest_v01.ipynb
@@ -9,9 +9,9 @@
    },
    "outputs": [],
    "source": [
-    "import preprocessing as pre\n",
-    "import visualization as visual\n",
-    "import mallet as mal\n",
+    "from dariah_topics import preprocessing as pre\n",
+    "from dariah_topics import visualization as visual\n",
+    "from dariah_topics import mallet as mal\n",
     "# Warning is Gensim related"
    ]
   },
@@ -790,7 +790,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.5.2"
+   "version": "3.5.3rc1"
   }
  },
  "nbformat": 4,

diff --git a/dariah_topics/__init__.py b/dariah_topics/__init__.py
@@ -0,0 +1,7 @@
+"""
+The `dariah_topics` package currently offers three modules:
+
+* `dariah_topics.preprocessing` contains preprocessing code
+* `dariah_topics.visualization` offers visualization stuff that depends on PyLDAvis
+* `dariah_topics.mallet` provides a wrapper that calls mallet
+"""
diff --git a/mallet.py → dariah_topics/mallet.py b/mallet.py → dariah_topics/mallet.py
diff --git a/model_creation.py → dariah_topics/model_creation.py b/model_creation.py → dariah_topics/model_creation.py
diff --git a/preprocessing.py → dariah_topics/preprocessing.py b/preprocessing.py → dariah_topics/preprocessing.py
@@ -100,7 +100,7 @@ def read_from_csv(doclist, columns=['ParagraphId', 'TokenId', 'Lemma', 'CPOS', '
         log.info("Accessing CSV documents ...")
         doc_csv = df[columns]
         yield doc_csv
-        
+
 def get_labels(doclist):
     """Creates a list of document labels.
 
@@ -112,9 +112,9 @@ def get_labels(doclist):
 
     Yields:
         Iterable: Document labels.
-        
+
     ToDo:
-        Replace this function with function from Toolbox 
+        Replace this function with function from Toolbox
     """
     log.info("Creating document labels ...")
     for doc in doclist:
@@ -158,6 +158,10 @@ def tokenize(doc_txt, expression=regular_expression, simple=False):
 
     Yields:
         Tokens
+
+    Example:
+        >>> list(tokenize("I am an example text."))
+        ['i', 'am', 'an', 'example', 'text']
     """
     doc_txt = regex.sub("\.", "", doc_txt.lower())
     if simple == False:
@@ -255,14 +259,14 @@ def create_large_TF_matrix(doc_labels, doc_tokens):
     """create_large_TF_matrix
 
     Note:
-        
+
 
     Args:
-        
+
 
     Returns:
-    
-    ToDo: 
+
+    ToDo:
     """
 
     typeset = set()
@@ -293,14 +297,14 @@ def create_large_counter(doc_labels, doc_tokens, termdoc_matrix):
     """create_large_TF_matrix
 
     Note:
-        
+
 
     Args:
-        
+
 
     Returns:
-    
-    ToDo: 
+
+    ToDo:
     """
 
     largecounter = defaultdict(dict)
@@ -314,14 +318,14 @@ def create_sparse_index(largecounter):
     """create_large_TF_matrix
 
     Note:
-        
+
 
     Args:
-        
+
 
     Returns:
-    
-    ToDo: 
+
+    ToDo:
     """
 
     #tuples = list(zip(largecounter.keys(), largecounter.values().keys()))
@@ -359,14 +363,14 @@ def populate_two(sparse_index, largecounter):
     """create_large_TF_matrix
 
     Note:
-        
+
 
     Args:
-        
+
 
     Returns:
-    
-    ToDo: 
+
+    ToDo:
     """
 
     #sparse_df_filled_test = pd.Series(index=sparse_index).fillna(int(0))

diff --git a/visualization.py → dariah_topics/visualization.py b/visualization.py → dariah_topics/visualization.py
diff --git a/requirements-dev.txt b/requirements-dev.txt
@@ -0,0 +1,4 @@
+-r requirements.txt
+nose
+nosexcover
+jupyter
diff --git a/requirements.txt b/requirements.txt
@@ -1,11 +1 @@
-pandas>=0.19.2
-regex>=2017.01.14
-gensim>=0.13.2
-matplotlib==1.5.3
-numpy>=1.3
-scipy>=0.7
-werkzeug>=0.11.15
-flask>=0.11.1
-pyLDAvis>=2.0.0
-
--e git+https://github.com/thvitt/cophi-toolbox#egg=cophi_toolbox 
+-e .[vis,demonstrator]
diff --git a/setup.cfg b/setup.cfg
@@ -0,0 +1,5 @@
+[nosetests]
+with-doctest=1
+with-xunit=1
+with-xcoverage=true
+cover-package=dariah_topics
diff --git a/setup.py b/setup.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python3
+
+from setuptools import setup, find_packages
+
+setup(
+    name='dariah_topics',
+    version='0.2.0dev0',
+    description='DARIAH Topic Modelling',
+    # url
+    author="DARIAH-DE Wuerzburg Group",
+    author_email="pielstroem@biozentrum.uni-wuerzburg.de",
+    # license
+    classifiers=[
+        'Development Status :: 3 - Alpha',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5'
+    ],
+    # keywords
+    packages=find_packages(exclude=['corpus_*', 'docs', 'tests']),
+    install_requires=[
+        'pandas>=0.19.2',
+        'regex>=2017.01.14',
+        'gensim>=0.13.2',
+        'matplotlib==1.5.3',
+        'numpy>=1.3',
+        'scipy>=0.7',
+    ],
+    # pip install -e .[demonstrator,vis]
+    extras_require={
+        'demonstrator': [
+            'werkzeug>=0.11.15',
+            'flask>=0.11.1'
+        ],
+        'vis': [
+            'pyLDAvis>=2.0.0',    # to feature 'pyldavis'
+        ]
+    }
+)
diff --git a/test/integration_test.py b/test/integration_test.py
@@ -0,0 +1,14 @@
+from subprocess import check_call
+from nose.plugins.skip import SkipTest
+
+
+def jupyter_integration_test():
+    """
+    Tries to run the integration test notebook using jupyter.
+    """
+    try:
+        check_call(["jupyter-nbconvert", "--execute",
+                    "IntegrationTest_v01.ipynb"])
+    except FileNotFoundError as e:
+        raise SkipTest("jupyter-nbconvert not found. Cannot run integration test. "
+                   + str(e))
diff --git a/test/preprocessing_test.py b/test/preprocessing_test.py
@@ -0,0 +1,18 @@
+from dariah_topics import preprocessing as pre
+
+# Funktion muss irgendwie mit test heißen
+def test_document_list():
+
+    # die Funktion under test aufrufen
+
+    doclist = pre.create_document_list('corpus_txt')
+
+    # Bedingungen auf dem Ergebnis prüfen:
+    assert len(doclist) == 17
+
+    return doclist
+
+def test_document_labels():
+    doclist = test_document_list()
+    labels = pre.get_labels(doclist)
+    assert len(list(labels)) == len(doclist)