diff --git a/renga/cli/_graph.py b/renga/cli/_graph.py
index 128bc339c0..2a1c5a3874 100644
--- a/renga/cli/_graph.py
+++ b/renga/cli/_graph.py
@@ -25,6 +25,7 @@
 
 from renga._compat import Path
 from renga.models.cwl.command_line_tool import CommandLineTool
+from renga.models.cwl.workflow import Workflow
 
 
 @attr.s
@@ -72,7 +73,8 @@ def find_latest(self, start, path):
     def iter_file_inputs(self, tool, basedir):
         """Yield path of tool file inputs."""
         if tool.stdin:
-            raise NotImplemented(tool.stdin)
+            if tool.stdin[0] != '$':  # pragma: no cover
+                raise NotImplemented(tool.stdin)
         for input_ in tool.inputs:
             if input_.type == 'File' and input_.default:
                 yield os.path.relpath(
@@ -105,9 +107,85 @@ def add_file(self, path, revision='HEAD'):
                 file_key = self.add_node(commit, path)
                 tool_key = self.add_tool(commit, cwl)
                 #: Edge from a tool to the output.
-                self.G.add_edge(tool_key, file_key)
+                tool = self.G.nodes[tool_key]['tool']
+                output_id = tool.get_output_id(path)
+                self.G.add_edge(tool_key, file_key, id=output_id)
                 return file_key
 
         if file_commits:
             #: Does not have a parent CWL.
             return self.add_node(file_commits[0], path)
+
+    @property
+    def _output_keys(self):
+        """Return a list of the output keys."""
+        return [n for n, d in self.G.out_degree() if d == 0]
+
+    def _source_name(self, key):
+        """Find source name for a node."""
+        if self.G.in_degree(key) == 0:
+            return None
+
+        assert self.G.in_degree(key) == 1
+
+        tool_key, attr = list(self.G.pred[key].items())[0]
+        step = self.G.nodes[tool_key]['step']['id']
+        return '{0}/{1}'.format(step, attr['id'])
+
+    @property
+    def _tool_nodes(self):
+        """Yield topologically sorted tools."""
+        for key in nx.topological_sort(self.G):
+            node = self.G.nodes[key]
+            tool = node.get('tool')
+            if tool is not None:
+                yield key, node
+
+    def ascwl(self):
+        """Serialize graph to CWL workflow."""
+        workflow = Workflow()
+
+        input_index = 1
+
+        for tool_index, (key, node) in enumerate(self._tool_nodes, 1):
+            _, path = key
+            tool = node['tool']
+            step_id = 'step_{0}'.format(tool_index)
+            node['step'] = {'id': step_id}
+
+            ins = {
+                edge_id: self._source_name(target_id)
+                for target_id, _, edge_id in self.G.in_edges(key, data='id')
+            }
+            outs = [
+                edge_id for _, _, edge_id in self.G.out_edges(key, data='id')
+            ]
+
+            for input_ in tool.inputs:
+                input_mapping = ins.get(input_.id)
+                if input_mapping is None:
+                    input_id = 'input_{0}'.format(input_index)
+                    workflow.inputs.append({
+                        'id': input_id,
+                        'type': input_.type,
+                        # 'default': input_.default,
+                    })
+                    input_index += 1
+                    ins[input_.id] = input_id
+
+            workflow.add_step(
+                run=Path(path),
+                id=step_id,
+                in_=ins,
+                out=outs,
+            )
+
+        for index, key in enumerate(self._output_keys):
+            output_id = 'output_{0}'.format(index)
+            workflow.outputs.append({
+                'id': output_id,
+                'type': 'File',
+                'outputSource': self._source_name(key),
+            })
+
+        return workflow
diff --git a/renga/cli/run.py b/renga/cli/run.py
index 70a370809b..65c811ab9c 100644
--- a/renga/cli/run.py
+++ b/renga/cli/run.py
@@ -18,6 +18,7 @@
 """Track provenance of data created by executing programs."""
 
 import os
+import sys
 from subprocess import call
 
 import click
@@ -39,11 +40,20 @@ def run(repo, no_output, command_line):
     """Tracking work on a specific problem."""
     candidates = [x[0] for x in repo.git.index.entries] + \
         repo.git.untracked_files
+    mapped_std = _mapped_std_streams(candidates)
     factory = CommandLineToolFactory(
         command_line=command_line,
-        **_mapped_std_streams(candidates))
+        **mapped_std)
 
     with repo.with_workflow_storage() as wf:
         with factory.watch(repo.git, no_output=no_output) as tool:
-            call(factory.command_line, cwd=os.getcwd())
+            call(
+                factory.command_line,
+                cwd=os.getcwd(),
+                **{key: getattr(sys, key) for key in mapped_std.keys()},
+            )
+
+            sys.stdout.flush()
+            sys.stderr.flush()
+
             wf.add_step(run=tool)
diff --git a/renga/cli/workflow.py b/renga/cli/workflow.py
new file mode 100644
index 0000000000..304aeb5118
--- /dev/null
+++ b/renga/cli/workflow.py
@@ -0,0 +1,52 @@
+# -*- coding: utf-8 -*-
+#
+# Copyright 2018 - Swiss Data Science Center (SDSC)
+# A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
+# Eidgenössische Technische Hochschule Zürich (ETHZ).
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Workflow operations."""
+
+import os
+
+import click
+import yaml
+
+from renga.models.cwl._ascwl import ascwl
+
+from ._graph import Graph
+from ._repo import pass_repo
+
+
+@click.group()
+def workflow():
+    """Workflow operations."""
+
+
+@workflow.command()
+@click.option('--revision', default='HEAD')
+@click.argument('path', type=click.Path(exists=True, dir_okay=False), nargs=-1)
+@pass_repo
+def create(repo, revision, path):
+    """Create a workflow description for a file."""
+    graph = Graph(repo)
+    for p in path:
+        graph.add_file(p, revision=revision)
+
+    click.echo(
+        yaml.dump(ascwl(
+            graph.ascwl(),
+            filter=lambda _, x: x is not None,
+            # basedir=repo.workflow_path,
+            basedir='.',
+        ), default_flow_style=False))
diff --git a/renga/models/cwl/_ascwl.py b/renga/models/cwl/_ascwl.py
index dd05a87dd3..e6e47ce85e 100644
--- a/renga/models/cwl/_ascwl.py
+++ b/renga/models/cwl/_ascwl.py
@@ -78,17 +78,18 @@ def convert_value(v):
         return v
 
     for a in attrs:
+        a_name = a.name.rstrip('_')
         v = getattr(inst, a.name)
         if filter is not None and not filter(a, v):
             continue
         if recurse is True:
             if has(v.__class__):
-                rv[a.name] = ascwl(v, recurse=True, filter=filter,
+                rv[a_name] = ascwl(v, recurse=True, filter=filter,
                                    dict_factory=dict_factory, basedir=basedir)
 
             elif isinstance(v, (tuple, list, set)):
                 cf = v.__class__ if retain_collection_types is True else list
-                rv[a.name] = cf([
+                rv[a_name] = cf([
                     ascwl(i, recurse=True, filter=filter,
                           dict_factory=dict_factory, basedir=basedir)
                     if has(i.__class__) else i
@@ -99,23 +100,23 @@ def convert_value(v):
                     k = a.metadata['jsonldPredicate'].get('mapSubject')
                     if k:
                         vv = dict_factory()
-                        for i in rv[a.name]:
+                        for i in rv[a_name]:
                             kk = i.pop(k)
                             vv[kk] = i
-                        rv[a.name] = vv
+                        rv[a_name] = vv
 
             elif isinstance(v, dict):
                 df = dict_factory
-                rv[a.name] = df((
+                rv[a_name] = df((
                     ascwl(kk, dict_factory=df, basedir=basedir)
                     if has(kk.__class__) else kk,
                     ascwl(vv, dict_factory=df, basedir=basedir)
                     if has(vv.__class__) else vv)
                     for kk, vv in iteritems(v))
             else:
-                rv[a.name] = convert_value(v)
+                rv[a_name] = convert_value(v)
         else:
-            rv[a.name] = convert_value(v)
+            rv[a_name] = convert_value(v)
 
     if isinstance(inst, CWLClass):
         rv['class'] = inst.__class__.__name__
diff --git a/renga/models/cwl/command_line_tool.py b/renga/models/cwl/command_line_tool.py
index 29df38d477..7629e31f6c 100644
--- a/renga/models/cwl/command_line_tool.py
+++ b/renga/models/cwl/command_line_tool.py
@@ -17,6 +17,7 @@
 # limitations under the License.
 """Represent a ``CommandLineTool`` from the Common Workflow Language."""
 
+import fnmatch
 import re
 import shlex
 from contextlib import contextmanager
@@ -48,8 +49,7 @@ class CommandLineTool(Process, CWLClass):
             cmd, (list, tuple)) else shlex.split(cmd),
     )  # list(string, Expression, CommandLineBinding)
 
-    stdin = attr.ib(default=None, converter=attr.converters.optional(Path))
-    # null, str, Expression
+    stdin = attr.ib(default=None)
     stdout = attr.ib(default=None)
     stderr = attr.ib(default=None)
 
@@ -60,6 +60,24 @@ class CommandLineTool(Process, CWLClass):
     temporaryFailCodes = attr.ib(default=attr.Factory(list))  # list(int)
     permanentFailCodes = attr.ib(default=attr.Factory(list))  # list(int)
 
+    def get_output_id(self, path):
+        """Return an id of the matching path from default values."""
+        for output in self.outputs:
+            if output.type in {'stdout', 'stderr'}:
+                stream = getattr(self, output.type)
+                if stream == path:
+                    return output.id
+            elif output.type == 'File':
+                glob = output.outputBinding.glob
+                # TODO better support for Expression
+                if glob.startswith('$(inputs.'):
+                    input_id = glob[len('$(inputs.'):-1]
+                    for input_ in self.inputs:
+                        if input_.id == input_id and input_.default == path:
+                            return output.id
+                elif fnmatch.fnmatch(path, glob):
+                    return output.id
+
 
 @attr.s
 class CommandLineToolFactory(object):
@@ -94,6 +112,13 @@ def __attrs_post_init__(self):
         self.inputs = []
         self.outputs = []
 
+        if self.stdin:
+            input_ = next(self.guess_inputs(self.stdin))
+            assert input_.type == 'File'
+            input_.id = 'input_stdin'
+            self.inputs.append(input_)
+            self.stdin = '$(inputs.{0}.path)'.format(input_.id)
+
         for stream_name in ('stdout', 'stderr'):
             stream = getattr(self, stream_name)
             if stream and self.file_candidate(stream):
@@ -151,7 +176,7 @@ def watch(self, git=None, no_output=False):
                             'Output file was not created or changed.'
                         )
 
-                if not tool.outputs:
+                if not outputs:
                     raise RuntimeError('No output was detected')
 
             tool.inputs = list(inputs.values())
@@ -184,6 +209,10 @@ def split_command_and_args(self):
         cmd = [self.command_line[0]]
         args = list(self.command_line[1:])
 
+        if len(args) < 2:
+            # only guess subcommand for more arguments
+            return cmd, args
+
         while args and re.match(self._RE_SUBCOMMAND, args[0]) \
                 and not self.file_candidate(args[0]):
             cmd.append(args.pop(0))
@@ -210,7 +239,7 @@ def guess_type(self, value):
                 # TODO suggest that the file should be imported to the repo
                 pass
 
-        if ',' in value:
+        if len(value) > 1 and ',' in value:
             return value.split(','), 'string[]', ','
 
         return value, 'string', None
diff --git a/renga/models/cwl/workflow.py b/renga/models/cwl/workflow.py
index 8996dd0951..72ab2e8921 100644
--- a/renga/models/cwl/workflow.py
+++ b/renga/models/cwl/workflow.py
@@ -17,21 +17,30 @@
 # limitations under the License.
 """Represent workflows from the Common Workflow Language."""
 
+import uuid
+
 import attr
 
+from ._ascwl import CWLClass, mapped
+from .process import Process
+
 
 @attr.s
 class WorkflowStep(object):
     """Define an executable element of a workflow."""
 
     run = attr.ib()  # string, Process
+    id = attr.ib(default=attr.Factory(uuid.uuid4))
+
+    in_ = attr.ib(default=None)
+    out = attr.ib(default=None)
 
 
 @attr.s
-class Workflow(object):
+class Workflow(Process, CWLClass):
     """Define a workflow representation."""
 
-    steps = attr.ib(default=attr.Factory(list))
+    steps = mapped(WorkflowStep)
 
     def add_step(self, **kwargs):
         """Add a workflow step."""
diff --git a/setup.py b/setup.py
index 4d3621d852..61385968d0 100644
--- a/setup.py
+++ b/setup.py
@@ -107,6 +107,7 @@
             'log=renga.cli.log:log',
             'run=renga.cli.run:run',
             'workon=renga.cli.workon:workon',
+            'workflow=renga.cli.workflow:workflow',
         ],
     },
     extras_require=extras_require,
diff --git a/tests/conftest.py b/tests/conftest.py
index 8d7cdf9d20..68945458d6 100644
--- a/tests/conftest.py
+++ b/tests/conftest.py
@@ -105,21 +105,6 @@ def request_callback(request):
         yield rsps
 
 
-@pytest.fixture()
-def runner(base_runner):
-    """Return runner with a new project."""
-    from renga import cli
-
-    runner = base_runner
-
-    os.mkdir('test-project')
-    os.chdir('test-project')
-
-    result = runner.invoke(cli.cli, ['init'])
-    assert result.exit_code == 0
-    yield base_runner
-
-
 @pytest.fixture()
 def graph_mutation_responses(auth_responses, graph_mutation_client):
     """Monkeypatch requests to immitate the KnowledgeGraph."""
@@ -652,31 +637,37 @@ def add_client(doctest_namespace, renga_client, storage_responses,
 
 
 @pytest.fixture()
-def test_file(tmpdir):
+def data_file(tmpdir):
     """Create a sample data file."""
-    p = tmpdir.mkdir('data').join('test_file')
+    p = tmpdir.mkdir('data').join('file')
     p.write('1234')
     return p
 
 
 @pytest.fixture()
-def test_project(base_runner):
+def project(base_runner):
     """Create a test project."""
     from renga import cli
 
-    os.makedirs('test-project/data')
-    os.chdir('test-project')
+    with base_runner.isolated_filesystem() as project_path:
+        os.makedirs('data')
+        result = base_runner.invoke(cli.cli, ['init', '.'])
+        yield project_path
+
 
-    result = base_runner.invoke(cli.cli, ['init', '.'])
+@pytest.fixture()
+def runner(base_runner, project):
+    """Return runner with a new project."""
+    yield base_runner
 
 
 @pytest.fixture()
-def test_dataset(test_project):
+def dataset(project):
     """Create a dataset."""
     from renga.models import dataset
     return dataset.Dataset.create(
         'dataset',
-        datadir='./data',
+        datadir='data',
         authors={'name': 'me',
                  'email': 'me@example.com'})
 
@@ -691,20 +682,20 @@ def request_callback(request):
 
         rsps.add_callback(
             responses.GET,
-            'http://example.com/test_file',
+            'http://example.com/file',
             callback=request_callback)
         rsps.add_callback(
             responses.GET,
-            'https://example.com/test_file',
+            'https://example.com/file',
             callback=request_callback)
         yield rsps
 
 
 @pytest.fixture()
-def test_dir(tmpdir):
+def directory_tree(tmpdir):
     """Create a test directory tree."""
     # initialize
-    p = tmpdir.mkdir('test_dir')
+    p = tmpdir.mkdir('directory_tree')
     p.join('file').write('1234')
     p.join('dir2').mkdir()
     p.join('dir2/file2').write('5678')
@@ -712,23 +703,23 @@ def test_dir(tmpdir):
 
 
 @pytest.fixture()
-def test_repo(test_dir):
+def data_repository(directory_tree):
     """Create a test repo."""
     from git import Repo, Actor
     # initialize
-    repo = Repo.init(test_dir.strpath)
+    repo = Repo.init(directory_tree.strpath)
 
     # add a file
-    repo.index.add([test_dir.join('file').strpath])
+    repo.index.add([directory_tree.join('file').strpath])
     repo.index.commit('test commit', author=Actor('me', 'me@example.com'))
 
     # commit changes to the same file with a different user
-    test_dir.join('file').write('5678')
-    repo.index.add([test_dir.join('file').strpath])
+    directory_tree.join('file').write('5678')
+    repo.index.add([directory_tree.join('file').strpath])
     repo.index.commit('test commit', author=Actor('me2', 'me2@example.com'))
 
     # commit a second file
-    repo.index.add([test_dir.join('dir2/file2').strpath])
+    repo.index.add([directory_tree.join('dir2/file2').strpath])
     repo.index.commit('test commit', author=Actor('me', 'me@example.com'))
 
     # return the repo
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 3aaef34417..bf00362dd7 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -1,6 +1,6 @@
 # -*- coding: utf-8 -*-
 #
-# Copyright 2017 - Swiss Data Science Center (SDSC)
+# Copyright 2017, 2018 - Swiss Data Science Center (SDSC)
 # A partnership between École Polytechnique Fédérale de Lausanne (EPFL) and
 # Eidgenössische Technische Hochschule Zürich (ETHZ).
 #
@@ -19,8 +19,11 @@
 
 from __future__ import absolute_import, print_function
 
+import contextlib
 import os
+import sys
 
+import git
 import pytest
 import responses
 
@@ -100,10 +103,59 @@ def test_run_simple(runner):
     assert result.exit_code == 0
 
 
-def test_datasets(base_runner, test_file, test_project, test_repo):
-    """Test importing data into a dataset."""
-    runner = base_runner
+def test_workflow(runner):
+    """Test workflow command."""
+    result = runner.invoke(cli.cli, ['run', 'touch', 'data.csv'])
+    assert result.exit_code == 0
+
+    with open('counted.txt', 'w') as stdout:
+        with contextlib.redirect_stdout(stdout):
+            try:
+                cli.cli.main(
+                    args=('run', 'wc', 'data.csv'),
+                    prog_name=runner.get_default_prog_name(cli.cli),
+                )
+            except SystemExit as e:
+                assert e.code in {None, 0}
+
+    result = runner.invoke(cli.cli, ['workflow', 'create', 'counted.txt'])
+    assert result.exit_code == 0
+
+
+def test_streams(runner, capsys):
+    """Test redirection of std streams."""
+    repo = git.Repo('.')
+
+    with open('source.txt', 'w') as source:
+        source.write('first,second,third')
 
+    repo.git.add('--all')
+    repo.index.commit('Added source.txt')
+
+    with capsys.disabled():
+        with open('source.txt', 'rb') as stdin:
+            with open('result.txt', 'wb') as stdout:
+                try:
+                    old_stdin, old_stdout = sys.stdin, sys.stdout
+                    sys.stdin, sys.stdout = stdin, stdout
+                    try:
+                        cli.cli.main(
+                            args=('run', 'cut', '-d,', '-f', '2', '-s'),
+                        )
+                    except SystemExit as e:
+                        assert e.code in {None, 0}
+                finally:
+                    sys.stdin, sys.stdout = old_stdin, old_stdout
+
+    with open('result.txt', 'r') as f:
+        assert f.read().strip() == 'second'
+
+    result = runner.invoke(cli.cli, ['workflow', 'create', 'result.txt'])
+    assert result.exit_code == 0
+
+
+def test_datasets(data_file, data_repository, runner):
+    """Test importing data into a dataset."""
     # create a dataset
     result = runner.invoke(cli.cli, ['datasets', 'create', 'dataset'])
     assert result.exit_code == 0
@@ -112,8 +164,10 @@ def test_datasets(base_runner, test_file, test_project, test_repo):
     # add data
     result = runner.invoke(cli.cli,
                            ['datasets', 'add', 'dataset',
-                            str(test_file)])
-    assert os.stat('data/dataset/test_file')
+                            str(data_file)])
+    assert os.stat(os.path.join(
+        'data', 'dataset', os.path.basename(data_file)
+    ))
 
     # add data from a git repo via http
     result = runner.invoke(cli.cli, [
@@ -126,4 +180,4 @@ def test_datasets(base_runner, test_file, test_project, test_repo):
     # add data from local git repo
     result = runner.invoke(cli.cli, [
         'datasets', 'add', 'dataset', '-t', 'file', '-t', 'file2',
-        os.path.dirname(test_repo.git_dir)])
+        os.path.dirname(data_repository.git_dir)])
diff --git a/tests/test_cwl.py b/tests/test_cwl.py
index d6ce9d640e..3b24213ee4 100644
--- a/tests/test_cwl.py
+++ b/tests/test_cwl.py
@@ -76,6 +76,14 @@ def test_base_command_detection(instance_path):
     assert tool.inputs[0].inputBinding.separate is True
 
 
+def test_short_base_command_detection():
+    """Test base command detection without arguments."""
+    tool = CommandLineToolFactory(('echo', 'A')).generate_tool()
+    assert tool.cwlVersion == 'v1.0'
+    assert tool.__class__.__name__ == 'CommandLineTool'
+    assert tool.inputs[0].default == 'A'
+
+
 def test_04_output(instance_path):
     """Test describtion of outputs from a command."""
     hello = Path(instance_path) / 'hello.tar'
diff --git a/tests/test_dataset.py b/tests/test_dataset.py
index f8ea719593..d7306ee6a2 100644
--- a/tests/test_dataset.py
+++ b/tests/test_dataset.py
@@ -46,7 +46,7 @@ def not_raises():
         return not_raises()
 
 
-def test_dataset_creation(test_project):
+def dataset_creation(project):
     """Test dataset directory tree creation."""
     # creating a dataset without an author fails
     with pytest.raises(RuntimeError):
@@ -67,31 +67,31 @@ def test_dataset_creation(test_project):
 @pytest.mark.parametrize('scheme, path, error',
                          [('', 'temp', None), ('file://', 'temp', None),
                           ('', 'tempp', git.NoSuchPathError),
-                          ('http://', 'example.com/test_file',
-                           None), ('https://', 'example.com/test_file',
+                          ('http://', 'example.com/file',
+                           None), ('https://', 'example.com/file',
                                    None), ('bla://', 'file',
                                            NotImplementedError)])
-def test_data_add(scheme, path, error, test_project, test_file, test_dir,
+def test_data_add(scheme, path, error, project, data_file, directory_tree,
                   dataset_responses):
     """Test data import."""
     with raises(error):
         if path == 'temp':
-            path = str(test_file)
+            path = str(data_file)
         elif path == 'tempdir':
-            path = str(test_dir)
+            path = str(directory_tree)
         d = dataset.Dataset.create(
             'dataset',
             datadir='./data',
             authors={'name': 'me',
                      'email': 'me@example.com'})
         d.add_data('{}{}'.format(scheme, path))
-        with open('data/dataset/test_file') as f:
+        with open('data/dataset/file') as f:
             assert f.read() == '1234'
 
-        assert d.files.get('test_file')
+        assert d.files.get('file')
 
         # check that the imported file is read-only
-        assert not os.access('data/dataset/test_file',
+        assert not os.access('data/dataset/file',
                              stat.S_IXUSR | stat.S_IXGRP | stat.S_IXOTH)
         assert os.stat('data/dataset/metadata.json')
 
@@ -104,68 +104,68 @@ def test_data_add(scheme, path, error, test_project, test_file, test_dir,
                 authors={'name': 'me',
                          'email': 'me@example.com'})
             d.add_data('{}{}'.format(scheme, path), nocopy=True)
-            assert os.path.exists('data/dataset/test_file')
+            assert os.path.exists('data/dataset/file')
 
 
-def test_data_add_recursive(test_dir, test_project):
+def test_data_add_recursive(directory_tree, project):
     """Test recursive data imports."""
     d = dataset.Dataset.create(
         'dataset', authors={'name': 'me',
                             'email': 'me@example.com'})
-    d.add_data(test_dir.join('dir2').strpath)
+    d.add_data(directory_tree.join('dir2').strpath)
     assert 'dir2/file2' in d.files
 
 
-def test_dataset_serialization(test_dataset, test_file):
+def dataset_serialization(dataset, data_file):
     """Test deserializing a dataset object."""
     # deserialize from json on disk
-    d = dataset.Dataset.from_json(test_dataset.path.joinpath('metadata.json'))
-    assert d.path == test_dataset.path
+    d = dataset.Dataset.from_json(dataset.path.joinpath('metadata.json'))
+    assert d.path == dataset.path
 
     d_dict = d.to_dict()
 
     assert all([key in d_dict for key in ('name', 'identifier', 'files')])
     assert not len(d_dict['files'].values())
-    d.add_data(str(test_file))
+    d.add_data(str(data_file))
     d_dict = d.to_dict()
     assert len(d_dict['files'].values())
 
 
-def test_repo_commit(test_dataset, test_file):
+def data_repository_commit(dataset, data_file):
     """Test that files get commited to the git repository properly."""
     from git import Repo
     r = Repo('.')
 
-    test_dataset.repo = r
-    test_dataset.add_data(str(test_file))
-    test_dataset.write_metadata()
-    test_dataset.commit_to_repo()
+    dataset.repo = r
+    dataset.add_data(str(data_file))
+    dataset.write_metadata()
+    dataset.commit_to_repo()
     assert all([
         f not in r.untracked_files
-        for f in ['data/dataset/metadata.json', 'data/dataset/test_file']
+        for f in ['data/dataset/metadata.json', 'data/dataset/file']
     ])
 
 
-def test_git_repo_import(test_dataset, tmpdir, test_repo):
+def test_git_repo_import(dataset, tmpdir, data_repository):
     """Test an import from a git repository."""
     from git import Repo
     r = Repo('.')
 
-    test_dataset.repo = r
+    dataset.repo = r
 
     # add data from local repo
-    test_dataset.add_data(
-        os.path.join(os.path.dirname(test_repo.git_dir), 'dir2'))
-    assert os.stat('data/dataset/test_dir/dir2/file2')
-    assert 'test_dir/dir2/file2' in test_dataset.files
+    dataset.add_data(
+        os.path.join(os.path.dirname(data_repository.git_dir), 'dir2'))
+    assert os.stat('data/dataset/directory_tree/dir2/file2')
+    assert 'directory_tree/dir2/file2' in dataset.files
     assert os.stat('.renga/vendors/local')
 
     # check that the authors are properly parsed from commits
-    test_dataset.add_data(os.path.dirname(test_repo.git_dir), target='file')
-    assert len(test_dataset.files['test_dir/file'].authors) == 2
+    dataset.add_data(os.path.dirname(data_repository.git_dir), target='file')
+    assert len(dataset.files['directory_tree/file'].authors) == 2
     assert all(
         x.name in ('me', 'me2')
-        for x in test_dataset.files['test_dir/file'].authors)
+        for x in dataset.files['directory_tree/file'].authors)
 
 
 @pytest.mark.parametrize('authors', [
@@ -176,10 +176,10 @@ def test_git_repo_import(test_dataset, tmpdir, test_repo):
         'email': 'me@example.com'
     }
 ])
-def test_author_parse(authors, test_file):
+def test_author_parse(authors, data_file):
     """Test that different options for specifying authors work."""
     f = dataset.DatasetFile(
-        'test_file', origin=str(test_file), authors=authors)
+        'file', origin=str(data_file), authors=authors)
     assert dataset.Author(name='me', email='me@example.com') in f.authors
 
     # email check
@@ -189,4 +189,4 @@ def test_author_parse(authors, test_file):
     # authors must be a set or list of dicts or Author
     with pytest.raises(ValueError):
         f = dataset.DatasetFile(
-            'test_file', origin=str(test_file), authors=['name'])
+            'file', origin=str(data_file), authors=['name'])