added Python component

ConfusionFlow · Oct 1, 2018 · a96ac4f · a96ac4f
1 parent 6b8a95a
commit a96ac4f
Show file tree

Hide file tree

Showing 45 changed files with 1,998 additions and 0 deletions.
diff --git a/.editorconfig b/.editorconfig
@@ -0,0 +1,15 @@
+# http://editorconfig.org
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+insert_final_newline = true
+trim_trailing_whitespace = true
+end_of_line = lf
+charset = utf-8
+
+# Docstrings and comments use max_line_length = 79
+[*.py]
+max_line_length = 119
diff --git a/.github/ISSUE_TEMPLATE/bug_report.md b/.github/ISSUE_TEMPLATE/bug_report.md
@@ -0,0 +1,29 @@
+---
+name: Bug report
+about: Create a report to help us improve
+
+---
+
+**Describe the bug**
+A clear and concise description of what the bug is.
+
+**To Reproduce**
+Steps to reproduce the behavior:
+1. Go to '...'
+2. Click on '....'
+3. Scroll down to '....'
+4. See error
+
+**Expected behavior**
+A clear and concise description of what you expected to happen.
+
+**Screenshots**
+If applicable, add screenshots to help explain your problem.
+
+**System (please complete the following information):**
+ - OS: [e.g. Linux, macOS]
+ - Python Version: [e.g. Python2.7, Python3.6]
+ - Version [e.g. v0.1.0, master]
+
+**Additional context**
+Add any other context about the problem here.
diff --git a/.github/ISSUE_TEMPLATE/feature_request.md b/.github/ISSUE_TEMPLATE/feature_request.md
@@ -0,0 +1,17 @@
+---
+name: Feature request
+about: Suggest an idea for this project
+
+---
+
+**Is your feature request related to a problem? Please describe.**
+A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
+
+**Describe the solution you'd like**
+A clear and concise description of what you want to happen.
+
+**Describe alternatives you've considered**
+A clear and concise description of any alternative solutions or features you've considered.
+
+**Additional context**
+Add any other context or screenshots about the feature request here.
diff --git a/.github/ISSUE_TEMPLATE/feedback.md b/.github/ISSUE_TEMPLATE/feedback.md
@@ -0,0 +1,5 @@
+---
+name: Feeback
+about: Add some feedback
+
+---
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,114 @@
+# Byte-compiled / optimized / DLL files
+__pycache__/
+*.py[cod]
+*$py.class
+
+# C extensions
+*.so
+
+# Distribution / packaging
+.Python
+build/
+develop-eggs/
+dist/
+downloads/
+eggs/
+.eggs/
+lib/
+lib64/
+parts/
+sdist/
+var/
+wheels/
+*.egg-info/
+.installed.cfg
+*.egg
+MANIFEST
+
+# PyInstaller
+#  Usually these files are written by a python script from a template
+#  before PyInstaller builds the exe, so as to inject date/other infos into it.
+*.manifest
+*.spec
+
+# Installer logs
+pip-log.txt
+pip-delete-this-directory.txt
+
+# Unit test / coverage reports
+htmlcov/
+.tox/
+.nox/
+.coverage
+.coverage.*
+.cache
+nosetests.xml
+coverage.xml
+*.cover
+.hypothesis/
+.pytest_cache/
+
+# Translations
+*.mo
+*.pot
+
+# Django stuff:
+*.log
+local_settings.py
+db.sqlite3
+
+# Flask stuff:
+instance/
+.webassets-cache
+
+# Scrapy stuff:
+.scrapy
+
+# Sphinx documentation
+docs/_build/
+
+# PyBuilder
+target/
+
+# Jupyter Notebook
+.ipynb_checkpoints
+
+# IPython
+profile_default/
+ipython_config.py
+
+# pyenv
+.python-version
+
+# celery beat schedule file
+celerybeat-schedule
+
+# SageMath parsed files
+*.sage.py
+
+# Environments
+.env
+.venv
+env/
+venv/
+ENV/
+env.bak/
+venv.bak/
+
+# Spyder project settings
+.spyderproject
+.spyproject
+
+# Rope project settings
+.ropeproject
+
+# mkdocs documentation
+/site
+
+# mypy
+.mypy_cache/
+.dmypy.json
+dmypy.json
+
+# ConfusionFlow
+confusionflow/static
diff --git a/MANIFEST.in b/MANIFEST.in
@@ -0,0 +1,3 @@
+include README.md
+include LICENSE
+recursive-include confusionflow/static *.*
diff --git a/README.md b/README.md
@@ -0,0 +1,106 @@
+![ConfusionFlow Banner](https://github.com/confusionflow/confusionflow/blob/master/docs/_static/img/confusionflow-banner.png)
+
+ConfusionFlow is a visualization tool distributed as Python package that enables more nuanced monitoring of a neural network's training process.
+- track and visualize the model performance of different timepoints (i.e., epochs) and dataset folds and compare different runs (e.g., different hyperparameter configurations).
+- we provide wrappers for exporting performance logs in the required format.
+
+ConfusionFlow is in an early-preview alpha. Expect some bugs and rough edges.
+
+
+## Additional Information
+ConfusionFlow was developed as a visualization tool to provide users with more feedback while developing or tuning neural network based classifiers. Performance monitoring often only utilizes simple line charts (one would for example plot the model loss and accuracy) which might easily miss many details and changes in the model's error structure.
+
+While the errors for a specific model state (e.g., at a certain epoch) can be represented as a confusion matrix, it is difficult to compare multiple confusion matrices or track changes over time.
+ConfusionFlow visualizes model confusion over multiple model states and let users compare different folds (e.g., train vs test set) or different hyperparameter configurations.
+
+To the best of our knowledge there are still no other tools with similar functionality.
+
+
+### Limitations
+#### Number of classes
+Due to screenspace limitations the system is currently limited to around 10 classes. We are aware that we will not be able to handle datasets at *ImageNet* scale BUT those datesets are not very common, as they are usually very expensive to obtain. A large percentage of classification problems ranges around 10 or less classes where ConfusionFlow can provide additional feedback.
+
+#### Runtime overhead
+Logging the performance for multiple folds every epoch might severely slowdown the time to convergence (especially when creating logs for the complete train and test sets). While logging on a minibatch level might provide additional information, it also severely slows down the training even further. It is possible to alleviate this problem by persisting model checkpoints and creating the performance logs on different machines. However, this also involves a lot of engineering effort and will not be supported by the ConfusionFlow in the near future.
+
+## Installation
+Confusionflow can be either downloaded directly from PyPI via `pip install confusionflow` or by cloning and installing the repository directly from source:
+
+### From Source
+Make sure you have [node](https://nodejs.org/en/) installed (required for building the UI component).
+
+Clone and install the repository:
+```
+git clone https://github.com/confusionflow/confusionflow
+cd confusionflow
+python setup.py install
+```
+
+## Getting started
+As a first step you must create some logs before you can start analyzing. Have a look at the `examples` folder and run one of the demos.
+The examples will create a new subdirectory `logs` where the performance logs will be stored.
+
+You then can start the ConfusionFlow UI via:
+```
+confusionflow --logdir `<path_to_logdir>`
+```
+
+### Usage Example
+```
+## tensorflow.keras
+cd examples/tf.keras/fashion-mnist
+python fashion-mnist_demo.py
+confusionflow --logdir logs
+
+## pytorch
+cd examples/torch/mnist
+python mnist_demo.py
+confusionflow --logdir logs
+```
+
+If you are using your own datasets you must create a `dataset-configuration` first. We provide some example configurations for some popular datasets in `examples/dataset-templates` which should help you getting started.
+
+
+## Logging
+We provide simple wrappers for `Tensorflow + Keras` and `Pytorch` for logging confusion matrices and exporting them in the required ConfusionFlow format.
+
+
+### Log Directory Layout
+```
+<logdir>
+├── datasets  		            <--- dataset config files
+│   ├── mnist.json
+│   └── index.yml
+├── foldlogdata                 <--- foldlog data
+│   ├── example_log_mnist_train_data.json
+│   └── example_log_mnist_test_data.json
+├── foldlog                     <--- foldlog specifications
+│   ├── example_log_mnist_train.json
+│   └── example_log_mnist_test.json
+├── runs                        <--- run specifications
+│   ├── example_log.json
+│   └── index.json
+└── views                       <--- view specifications (currently unsused)
+```
+
+
+## Documentation
+The latest documentation can be viewed on [docs.confusionflow.org](https://docs.confusionflow.org)
+
+
+### API
+The Python package includes a simple Flask based server that implements the current API.
+The current API definition can be found in `tools/swagger/api.yml` and can be viewed using the [Swagger Editor](https://editor.swagger.io/).
+
+
+## ConfusionFlow UI
+
+The ConfusionFlow UI is currently developed at [Caleydo/confusionflow-ui](https://github.com/Caleydo/confusionflow-ui).
+
+
+## The Team
+ConfusionFlow is a research project of the [Institute of Computer Graphics](https://www.jku.at/cg) at [Johannes Kepler University Linz](https://www.jku.at/) in collaboration with the [IBM Visual AI Lab](https://researcher.watson.ibm.com/researcher/view_group.php?id=5948).
+ConfusionFlow is currently maintained by [Peter Ruch](https://github.com/gfrogat) and [Holger Stitz](https://github.com/thinkh).
+
+## Feedback
+We would be really grateful for any [feedback](https://github.com/confusionflow/confusionflow/issues/new?template=feedback.md) via the repository's issues section.
diff --git a/confusionflow/VERSION b/confusionflow/VERSION
@@ -0,0 +1 @@
+0.1.0
diff --git a/confusionflow/__init__.py b/confusionflow/__init__.py
@@ -0,0 +1,34 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import argparse
+
+from gevent import pywsgi
+
+from confusionflow.server import create_app
+
+
+def main():
+    parser = argparse.ArgumentParser(description="ConfusionFlow CLI")
+    parser.add_argument("--host", type=str, default="localhost")
+    parser.add_argument("--logdir", type=str, required=True)
+    parser.add_argument("--port", type=int, default=8080)
+
+    FLAGS = parser.parse_args()
+
+    confusionflow_app = create_app(FLAGS.logdir)
+
+    http_server = pywsgi.WSGIServer(
+        (FLAGS.host, FLAGS.port), confusionflow_app)
+
+    try:
+        print("Starting ConfusionFlow Server on http://{}:{}".format(
+            FLAGS.host, FLAGS.port))
+        http_server.serve_forever()
+    except KeyboardInterrupt:
+        print("Server received KeyboardInterrupt. Shutting down ...")
+
+
+if __name__ == "__main__":
+    main()
diff --git a/confusionflow/logging/__init__.py b/confusionflow/logging/__init__.py
@@ -0,0 +1,4 @@
+from .foldlog import FoldLog
+from .foldlogdata import FoldLogData
+from .run import Run
+from .fold import Fold
diff --git a/confusionflow/logging/callbacks.py b/confusionflow/logging/callbacks.py
@@ -0,0 +1,39 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+import tensorflow as tf
+from tensorflow.keras.callbacks import Callback
+from tensorflow import confusion_matrix, Session
+
+
+class RunLogger(Callback):
+    """
+    A Runlogger is a simple wrapper for logging confusion matrices when using Keras
+    """
+
+    def __init__(self, run, loss):
+        self.run = run
+        self.loss = loss
+        self.session = tf.Session()
+
+    def on_epoch_begin(self, epoch, logs={}):
+        for fold, foldlog in zip(self.run.folds, self.run.foldlogs):
+            assert fold.foldId == foldlog.foldId
+
+            self.log_performance(fold, foldlog, epoch)
+
+    def log_performance(self, fold, foldlog, epoch):
+        x, y = fold.dataset
+        predictions = np.argmax(self.model.predict(x, verbose=0), axis=1)
+        if self.loss == "categorical_crossentropy":
+            targets = np.argmax(y, axis=1)
+        elif self.loss == "sparse_categorical_crossentropy":
+            targets = y
+        else:
+            raise ValueError("loss `{}` is not supported".format(self.loss))
+        confmat = confusion_matrix(targets, predictions)
+        # transform tf.Tensor to list
+        confmat = confmat.eval(session=self.session).flatten().tolist()
+        foldlog.add_epochdata(epochId=epoch, confmat=confmat)
diff --git a/confusionflow/logging/fold.py b/confusionflow/logging/fold.py
@@ -0,0 +1,20 @@
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import json
+import os
+
+from confusionflow.logging.utils import check_folderpath
+
+
+class Fold:
+    """
+    A Fold is a subset of your dataset
+    """
+
+    def __init__(self, dataset, foldId, dataset_config):
+        self.dataset = dataset
+        self.foldId = foldId
+        self.description = ""
+        self.dataset_config = dataset_config