From ada98f815adf252e1f8f74a1c0448fa584abe27f Mon Sep 17 00:00:00 2001
From: Aaron Virshup <avirshup@gmail.com>
Date: Mon, 4 Jun 2018 13:56:47 -0700
Subject: [PATCH 1/6] Split out general engine tests for dependency injection
 tests

---
 pyccc/tests/test_engine_features.py           | 102 +++++++++++++++
 .../{test_job_types.py => test_engines.py}    | 120 ++++--------------
 2 files changed, 125 insertions(+), 97 deletions(-)
 create mode 100644 pyccc/tests/test_engine_features.py
 rename pyccc/tests/{test_job_types.py => test_engines.py} (79%)

diff --git a/pyccc/tests/test_engine_features.py b/pyccc/tests/test_engine_features.py
new file mode 100644
index 0000000..c18e627
--- /dev/null
+++ b/pyccc/tests/test_engine_features.py
@@ -0,0 +1,102 @@
+import os
+import pytest
+from .engine_fixtures import subprocess_engine, local_docker_engine
+
+"""
+Tests of features and quirks specific to engines
+"""
+
+@pytest.fixture
+def set_env_var():
+    import os
+    assert 'NULL123' not in os.environ, "Bleeding environment"
+    os.environ['NULL123'] = 'nullabc'
+    yield
+    del os.environ['NULL123']
+
+
+def test_subprocess_environment_preserved(subprocess_engine, set_env_var):
+    job = subprocess_engine.launch(command='echo $NULL123', image='python:2.7-slim')
+    job.wait()
+    assert job.stdout.strip() == 'nullabc'
+
+
+def test_readonly_docker_volume_mount(local_docker_engine):
+    engine = local_docker_engine
+    mountdir = '/tmp'
+    job = engine.launch(image='docker',
+                        command='echo blah > /mounted/blah',
+                        engine_options={'volumes':
+                                            {mountdir: ('/mounted', 'ro')}})
+    job.wait()
+    assert isinstance(job.exitcode, int)
+    assert job.exitcode != 0
+
+
+def test_set_workingdir_docker(local_docker_engine):
+    engine = local_docker_engine
+    job = engine.launch(image='docker', command='pwd', workingdir='/testtest-dir-test')
+    job.wait()
+    assert job.stdout.strip() == '/testtest-dir-test'
+
+
+def test_set_workingdir_subprocess(subprocess_engine, tmpdir):
+    engine = subprocess_engine
+    job = engine.launch(image=None, command='pwd', workingdir=str(tmpdir))
+    job.wait()
+    assert job.stdout.strip() == str(tmpdir)
+
+
+def test_docker_volume_mount(local_docker_engine):
+    """
+    Note:
+        The test context is not necessarily the same as the bind mount context!
+        These tests will run in containers themselves, so we can't assume
+        that any directories accessible to the tests are bind-mountable.
+
+        Therefore we just test a named volume here.
+    """
+    import subprocess, uuid
+    engine = local_docker_engine
+    key = uuid.uuid4()
+    volname = 'my-mounted-volume-%s' % key
+
+    # Create a named volume with a file named "keyfile" containing a random uuid4
+    subprocess.check_call(('docker volume rm {vn}; docker volume create {vn}; '
+                          'docker run -v {vn}:/mounted alpine sh -c "echo {k} > /mounted/keyfile"')
+                          .format(vn=volname, k=key),
+                          shell=True)
+
+    job = engine.launch(image='docker',
+                        command='cat /mounted/keyfile',
+                        engine_options={'volumes': {volname: '/mounted'}})
+    job.wait()
+    result = job.stdout.strip()
+    assert result == str(key)
+
+
+@pytest.mark.skipif('CI_PROJECT_ID' in os.environ,
+                    reason="Can't mount docker socket in codeship")
+def test_docker_socket_mount_with_engine_option(local_docker_engine):
+    engine = local_docker_engine
+
+    job = engine.launch(image='docker',
+                        command='docker ps -q --no-trunc',
+                        engine_options={'mount_docker_socket': True})
+    job.wait()
+    running = job.stdout.strip().splitlines()
+    assert job.jobid in running
+
+
+@pytest.mark.skipif('CI_PROJECT_ID' in os.environ,
+                    reason="Can't mount docker socket in codeship")
+def test_docker_socket_mount_withdocker_option(local_docker_engine):
+    engine = local_docker_engine
+
+    job = engine.launch(image='docker',
+                        command='docker ps -q --no-trunc',
+                        withdocker=True)
+    job.wait()
+    running = job.stdout.strip().splitlines()
+    assert job.jobid in running
+
diff --git a/pyccc/tests/test_job_types.py b/pyccc/tests/test_engines.py
similarity index 79%
rename from pyccc/tests/test_job_types.py
rename to pyccc/tests/test_engines.py
index f136b7a..41f0257 100644
--- a/pyccc/tests/test_job_types.py
+++ b/pyccc/tests/test_engines.py
@@ -6,7 +6,28 @@
 from .engine_fixtures import *
 from . import function_tests
 
-"""Basic test battery for regular and python jobs on all underlying engines"""
+"""
+Basic test battery for regular and python jobs on all underlying engines
+
+This can be used to test external engines (in a hacky way right now):
+
+```python
+import pytest
+from pyccc.tests import engine_fixtures
+
+@pytest.fixture
+def my_engine_fixture():
+    return MyCustomEngine()
+
+engine_fixtures['engine'] = my_engine_fixture
+
+from pyccc.tests.test_engines import *
+```
+
+A less hacky way to do this would be via pytest-testscenarios or similar test generation strategy
+see https://docs.pytest.org/en/latest/example/parametrize.html#a-quick-port-of-testscenarios
+
+"""
 
 PYVERSION = '%s.%s' % (sys.version_info.major, sys.version_info.minor)
 PYIMAGE = 'python:%s-slim' % PYVERSION
@@ -176,21 +197,6 @@ def test_bash_exitcode(fixture, request):
     assert job.exitcode == 35
 
 
-@pytest.fixture
-def set_env_var():
-    import os
-    assert 'NULL123' not in os.environ, "Bleeding environment"
-    os.environ['NULL123'] = 'nullabc'
-    yield
-    del os.environ['NULL123']
-
-
-def test_subprocess_environment_preserved(subprocess_engine, set_env_var):
-    job = subprocess_engine.launch(command='echo $NULL123', image='python:2.7-slim')
-    job.wait()
-    assert job.stdout.strip() == 'nullabc'
-
-
 @pytest.mark.parametrize('fixture', fixture_types['engine'])
 def test_python_exitcode(fixture, request):
     engine = request.getfuncargvalue(fixture)
@@ -348,86 +354,6 @@ def _runcall(fixture, request, function, *args, **kwargs):
     return job.result
 
 
-@pytest.mark.skipif('CI_PROJECT_ID' in os.environ,
-                    reason="Can't mount docker socket in codeship")
-def test_docker_socket_mount_with_engine_option(local_docker_engine):
-    engine = local_docker_engine
-
-    job = engine.launch(image='docker',
-                        command='docker ps -q --no-trunc',
-                        engine_options={'mount_docker_socket': True})
-    job.wait()
-    running = job.stdout.strip().splitlines()
-    assert job.jobid in running
-
-
-@pytest.mark.skipif('CI_PROJECT_ID' in os.environ,
-                    reason="Can't mount docker socket in codeship")
-def test_docker_socket_mount_withdocker_option(local_docker_engine):
-    engine = local_docker_engine
-
-    job = engine.launch(image='docker',
-                        command='docker ps -q --no-trunc',
-                        withdocker=True)
-    job.wait()
-    running = job.stdout.strip().splitlines()
-    assert job.jobid in running
-
-
-def test_docker_volume_mount(local_docker_engine):
-    """
-    Note:
-        The test context is not necessarily the same as the bind mount context!
-        These tests will run in containers themselves, so we can't assume
-        that any directories accessible to the tests are bind-mountable.
-
-        Therefore we just test a named volume here.
-    """
-    import subprocess, uuid
-    engine = local_docker_engine
-    key = uuid.uuid4()
-    volname = 'my-mounted-volume-%s' % key
-
-    # Create a named volume with a file named "keyfile" containing a random uuid4
-    subprocess.check_call(('docker volume rm {vn}; docker volume create {vn}; '
-                          'docker run -v {vn}:/mounted alpine sh -c "echo {k} > /mounted/keyfile"')
-                          .format(vn=volname, k=key),
-                          shell=True)
-
-    job = engine.launch(image='docker',
-                        command='cat /mounted/keyfile',
-                        engine_options={'volumes': {volname: '/mounted'}})
-    job.wait()
-    result = job.stdout.strip()
-    assert result == str(key)
-
-
-def test_readonly_docker_volume_mount(local_docker_engine):
-    engine = local_docker_engine
-    mountdir = '/tmp'
-    job = engine.launch(image='docker',
-                        command='echo blah > /mounted/blah',
-                        engine_options={'volumes':
-                                            {mountdir: ('/mounted', 'ro')}})
-    job.wait()
-    assert isinstance(job.exitcode, int)
-    assert job.exitcode != 0
-
-
-def test_set_workingdir_docker(local_docker_engine):
-    engine = local_docker_engine
-    job = engine.launch(image='docker', command='pwd', workingdir='/testtest-dir-test')
-    job.wait()
-    assert job.stdout.strip() == '/testtest-dir-test'
-
-
-def test_set_workingdir_subprocess(subprocess_engine, tmpdir):
-    engine = subprocess_engine
-    job = engine.launch(image=None, command='pwd', workingdir=str(tmpdir))
-    job.wait()
-    assert job.stdout.strip() == str(tmpdir)
-
-
 @pytest.mark.parametrize('fixture', fixture_types['engine'])
 def test_clean_working_dir(fixture, request):
     """ Because of some weird results that seemed to indicate the wrong run dir
@@ -438,7 +364,7 @@ def test_clean_working_dir(fixture, request):
     assert job.stdout.strip() == ''
 
 
-class no_context():  # context manager that does nothing that can be used conditionaly
+class no_context():  # context manager that does nothing -- can be used conditionally
     def __enter__(self):
         return None
     def __exit__(self, exc_type, exc_value, traceback):

From 6c25e378d08b2b5ee9942fdbea836d24fc0b5df5 Mon Sep 17 00:00:00 2001
From: Aaron Virshup <avirshup@gmail.com>
Date: Mon, 4 Jun 2018 14:32:34 -0700
Subject: [PATCH 2/6] Add engine metadata for better testing

---
 .gitignore                    |  2 ++
 pyccc/engines/base.py         |  8 +++++++
 pyccc/engines/dockerengine.py |  3 +++
 pyccc/engines/subproc.py      |  2 ++
 pyccc/tests/test_engines.py   | 39 ++++++++++++++++-------------------
 5 files changed, 33 insertions(+), 21 deletions(-)

diff --git a/.gitignore b/.gitignore
index fc07282..6cbfbde 100644
--- a/.gitignore
+++ b/.gitignore
@@ -13,3 +13,5 @@ codeship.aes
 .cache
 __pycache__
 README.rst
+.pytest_cache
+.remote
diff --git a/pyccc/engines/base.py b/pyccc/engines/base.py
index 55db4df..71693af 100644
--- a/pyccc/engines/base.py
+++ b/pyccc/engines/base.py
@@ -25,6 +25,14 @@ class EngineBase(object):
     This class defines the implementation only - you intantiate one of its subclasses
     """
 
+    USES_IMAGES = None
+    "bool: subclasses should set this to indicate whether they use the `job.image` field"
+
+    ABSPATHS = None
+    """bool: subclasses should set this to indicate whether files can
+             be referenced via absolute path"""
+
+
     hostname = 'not specified'  # this should be overidden in subclass init methods
 
     def __call__(self, *args, **kwargs):
diff --git a/pyccc/engines/dockerengine.py b/pyccc/engines/dockerengine.py
index d7120c3..91b80aa 100644
--- a/pyccc/engines/dockerengine.py
+++ b/pyccc/engines/dockerengine.py
@@ -28,6 +28,9 @@
 class Docker(EngineBase):
     """ A compute engine - uses a docker server to run jobs
     """
+    USES_IMAGES = True
+    ABSPATHS = True
+
     def __init__(self, client=None, workingdir='/workingdir'):
         """ Initialization:
 
diff --git a/pyccc/engines/subproc.py b/pyccc/engines/subproc.py
index de51d2c..5bb78f0 100644
--- a/pyccc/engines/subproc.py
+++ b/pyccc/engines/subproc.py
@@ -29,6 +29,8 @@ class Subprocess(EngineBase):
     For now, don't return anything until job completes"""
 
     hostname = 'local'
+    USES_IMAGES = False
+    ABSPATHS = False
 
     def __init__(self, *args, **kwargs):
         super().__init__(*args, **kwargs)
diff --git a/pyccc/tests/test_engines.py b/pyccc/tests/test_engines.py
index 41f0257..d35d08e 100644
--- a/pyccc/tests/test_engines.py
+++ b/pyccc/tests/test_engines.py
@@ -1,39 +1,35 @@
 # -*- coding: utf-8 -*-
-import os
-import sys
-import pytest
-import pyccc
-from .engine_fixtures import *
-from . import function_tests
-
 """
 Basic test battery for regular and python jobs on all underlying engines
 
-This can be used to test external engines (in a hacky way right now):
+This can be used to test external engines (in a hacky, somewhat brittle way right now):
 
 ```python
 import pytest
 from pyccc.tests import engine_fixtures
 
-@pytest.fixture
-def my_engine_fixture():
-    return MyCustomEngine()
+@pytest.fixture(scope='module')
+def my_engine():
+    return MyEngine()
 
-engine_fixtures['engine'] = my_engine_fixture
-
-from pyccc.tests.test_engines import *
+engine_fixtures.fixture_types['engine'] = ['my_engine']
+from pyccc.tests.test_engines import *  # imports all the tests
 ```
 
-A less hacky way to do this would be via pytest-testscenarios or similar test generation strategy
-see https://docs.pytest.org/en/latest/example/parametrize.html#a-quick-port-of-testscenarios
-
+A less hacky way to via a parameterized test strategy similar to testscenarios:
+https://docs.pytest.org/en/latest/example/parametrize.html#a-quick-port-of-testscenarios
 """
+import os
+import sys
+import pytest
+import pyccc
+from .engine_fixtures import *
+from . import function_tests
 
 PYVERSION = '%s.%s' % (sys.version_info.major, sys.version_info.minor)
 PYIMAGE = 'python:%s-slim' % PYVERSION
 THISDIR = os.path.dirname(__file__)
 
-
 ########################
 # Python test objects  #
 ########################
@@ -364,7 +360,9 @@ def test_clean_working_dir(fixture, request):
     assert job.stdout.strip() == ''
 
 
-class no_context():  # context manager that does nothing -- can be used conditionally
+class no_context(): 
+    """context manager that does nothing -- useful if we need to conditionally apply a context
+    """
     def __enter__(self):
         return None
     def __exit__(self, exc_type, exc_value, traceback):
@@ -374,8 +372,7 @@ def __exit__(self, exc_type, exc_value, traceback):
 @pytest.mark.parametrize('fixture', fixture_types['engine'])
 def test_abspath_input_files(fixture, request):
     engine = request.getfuncargvalue(fixture)
-    with pytest.raises(ValueError) if isinstance(engine, pyccc.Subprocess) else no_context():
-        # this is OK with docker but should fail with a subprocess
+    with no_context() if engine.ABSPATHS else pytest.raises(ValueError):
         job = engine.launch(image='alpine', command='cat /opt/a',
                             inputs={'/opt/a': pyccc.LocalFile(os.path.join(THISDIR, 'data', 'a'))})
     if not isinstance(engine, pyccc.Subprocess):

From df154ba688d1d860e6a1cf9265761a9ea7bd8373 Mon Sep 17 00:00:00 2001
From: Aaron Virshup <avirshup@gmail.com>
Date: Tue, 5 Jun 2018 13:33:34 -0700
Subject: [PATCH 3/6] Move engine-specific data to the job.rundata DotDict

---
 pyccc/engines/dockerengine.py       | 25 +++++++++--------
 pyccc/engines/subproc.py            | 42 ++++++++++++++---------------
 pyccc/exceptions.py                 |  6 ++++-
 pyccc/job.py                        | 12 +++++----
 pyccc/tests/test_engine_features.py |  7 -----
 pyccc/tests/test_engines.py         |  4 +--
 requirements.txt                    |  1 +
 7 files changed, 47 insertions(+), 50 deletions(-)

diff --git a/pyccc/engines/dockerengine.py b/pyccc/engines/dockerengine.py
index 91b80aa..d9c4344 100644
--- a/pyccc/engines/dockerengine.py
+++ b/pyccc/engines/dockerengine.py
@@ -79,10 +79,10 @@ def submit(self, job):
 
         container_args = self._generate_container_args(job)
 
-        job.container = self.client.create_container(job.imageid, **container_args)
-        self.client.start(job.container)
-        job.containerid = job.container['Id']
-        job.jobid = job.containerid
+        job.rundata.container = self.client.create_container(job.imageid, **container_args)
+        self.client.start(job.rundata.container)
+        job.rundata.containerid = job.rundata.container['Id']
+        job.jobid = job.rundata.containerid
 
     def _generate_container_args(self, job):
         container_args = dict(command="sh -c '%s'" % job.command,
@@ -107,7 +107,6 @@ def _generate_container_args(self, job):
                 bind = '%s:%s:%s' % (volume, mountpoint, mode)
             else:
                 mountpoint = mount
-                mode = None
                 bind = '%s:%s' % (volume, mountpoint)
 
             volumes.append(mountpoint)
@@ -120,17 +119,17 @@ def _generate_container_args(self, job):
         return container_args
 
     def wait(self, job):
-        stat = self.client.wait(job.container)
+        stat = self.client.wait(job.rundata.container)
         if isinstance(stat, int):  # i.e., docker<3
             return stat
         else:  # i.e., docker>=3
             return stat['StatusCode']
 
     def kill(self, job):
-        self.client.kill(job.container)
+        self.client.kill(job.rundata.container)
 
     def get_status(self, job):
-        inspect = self.client.inspect_container(job.containerid)
+        inspect = self.client.inspect_container(job.rundata.containerid)
         if inspect['State']['Running']:
             return status.RUNNING
         else:
@@ -138,11 +137,11 @@ def get_status(self, job):
 
     def get_directory(self, job, path):
         docker_host = du.kwargs_from_client(self.client)
-        remotedir = files.DockerArchive(docker_host, job.containerid, path)
+        remotedir = files.DockerArchive(docker_host, job.rundata.containerid, path)
         return remotedir
 
     def _list_output_files(self, job):
-        docker_diff = self.client.diff(job.container)
+        docker_diff = self.client.diff(job.rundata.container)
         if docker_diff is None:
             return {}
 
@@ -162,11 +161,11 @@ def _list_output_files(self, job):
             else:
                 relative_path = filename
 
-            remotefile = files.LazyDockerCopy(docker_host, job.containerid, filename)
+            remotefile = files.LazyDockerCopy(docker_host, job.rundata.containerid, filename)
             output_files[relative_path] = remotefile
         return output_files
 
     def _get_final_stds(self, job):
-        stdout = self.client.logs(job.container, stdout=True, stderr=False)
-        stderr = self.client.logs(job.container, stdout=False, stderr=True)
+        stdout = self.client.logs(job.rundata.container, stdout=True, stderr=False)
+        stderr = self.client.logs(job.rundata.container, stdout=False, stderr=True)
         return stdout.decode('utf-8'), stderr.decode('utf-8')
diff --git a/pyccc/engines/subproc.py b/pyccc/engines/subproc.py
index 5bb78f0..9482d5c 100644
--- a/pyccc/engines/subproc.py
+++ b/pyccc/engines/subproc.py
@@ -20,7 +20,7 @@
 import subprocess
 import locale
 
-from pyccc import utils as utils, files
+from pyccc import utils as utils, files, exceptions
 from . import EngineBase, status
 
 
@@ -37,7 +37,7 @@ def __init__(self, *args, **kwargs):
         self.term_encoding = locale.getpreferredencoding()
 
     def get_status(self, job):
-        if job.subproc.poll() is None:
+        if job.rundata.subproc.poll() is None:
             return status.RUNNING
         else:
             return status.FINISHED
@@ -52,7 +52,7 @@ def get_engine_description(self, job):
         """
         Return a text description for the UI
         """
-        return 'Local subprocess %s' % job.subproc.pid
+        return 'Local subprocess %s' % job.rundata.subproc
 
     def launch(self, image=None, command=None,  **kwargs):
         if command is None:
@@ -61,28 +61,26 @@ def launch(self, image=None, command=None,  **kwargs):
 
     def submit(self, job):
         self._check_job(job)
-        if job.workingdir is None:
-            job.workingdir = utils.make_local_temp_dir()
+        job.rundata.localdir = utils.make_local_temp_dir()
 
-        assert os.path.isabs(job.workingdir)
+        assert os.path.isabs(job.rundata.localdir)
         if job.inputs:
             for filename, f in job.inputs.items():
-                targetpath = self._check_file_is_under_workingdir(filename, job.workingdir)
+                targetpath = self._check_file_is_under_workingdir(filename, job.rundata.localdir)
                 f.put(targetpath)
 
         subenv = os.environ.copy()
         subenv['PYTHONIOENCODING'] = 'utf-8'
         if job.env:
             subenv.update(job.env)
-        job.subproc = subprocess.Popen(job.command,
-                                       shell=True,
-                                       cwd=job.workingdir,
-                                       stdout=subprocess.PIPE,
-                                       stderr=subprocess.PIPE,
-                                       env=subenv)
-        job.jobid = job.subproc.pid
-        job._started = True
-        return job.subproc.pid
+        job.rundata.subproc = subprocess.Popen(job.command,
+                                               shell=True,
+                                               cwd=job.rundata.localdir,
+                                               stdout=subprocess.PIPE,
+                                               stderr=subprocess.PIPE,
+                                               env=subenv)
+        job.jobid = job.rundata.subproc.pid
+        return job.rundata.subproc.pid
 
     @staticmethod
     def _check_file_is_under_workingdir(filename, wdir):
@@ -95,22 +93,22 @@ def _check_file_is_under_workingdir(filename, wdir):
         wdir = os.path.realpath(wdir)
         common = os.path.commonprefix([wdir, targetpath])
         if len(common) < len(wdir):
-            raise ValueError(
+            raise exceptions.PathError(
                     "The subprocess engine does not support input files with absolute paths")
         return p
 
     def kill(self, job):
-        job.subproc.terminate()
+        job.rundata.subproc.terminate()
 
     def wait(self, job):
-        return job.subproc.wait()
+        return job.rundata.subproc.wait()
 
     def get_directory(self, job, path):
-        targetpath = self._check_file_is_under_workingdir(path, job.workingdir)
+        targetpath = self._check_file_is_under_workingdir(path, job.rundata.localdir)
         return files.LocalDirectoryReference(targetpath)
 
     def _list_output_files(self, job, dir=None):
-        if dir is None: dir = job.workingdir
+        if dir is None: dir = job.rundata.localdir
         filenames = {}
         for fname in os.listdir(dir):
             abs_path = '%s/%s' % (dir, fname)
@@ -126,6 +124,6 @@ def _list_output_files(self, job, dir=None):
 
     def _get_final_stds(self, job):
         strings = []
-        for fileobj in (job.subproc.stdout, job.subproc.stderr):
+        for fileobj in (job.rundata.subproc.stdout, job.rundata.subproc.stderr):
             strings.append(fileobj.read().decode('utf-8'))
         return strings
diff --git a/pyccc/exceptions.py b/pyccc/exceptions.py
index a0bb244..ea49c05 100644
--- a/pyccc/exceptions.py
+++ b/pyccc/exceptions.py
@@ -76,4 +76,8 @@ def __init__(self, engine):
 
 class DockerMachineError(Exception):
     """ Failures related to connecting to docker machines
-    """
\ No newline at end of file
+    """
+
+class PathError(Exception):
+    """ The engine can't fulfill the requested input or output filesystem path
+    """
diff --git a/pyccc/job.py b/pyccc/job.py
index 1525bb6..e576c39 100644
--- a/pyccc/job.py
+++ b/pyccc/job.py
@@ -22,6 +22,8 @@
 
 import fnmatch
 
+from mdtcollections import DotDict
+
 import pyccc
 from pyccc import files, status
 from pyccc.utils import *
@@ -95,10 +97,11 @@ def __init__(self, engine=None,
         self.command = if_not_none(command, '')
         self.engine_options = if_not_none(engine_options, {})
         self.workingdir = workingdir
-        self.env = env
+        self.rundata = DotDict()
+        self.env = if_not_none(env, {})
 
-        self.inputs = inputs
-        if self.inputs is not None:  # translate strings into file objects
+        self.inputs = if_not_none(inputs, {})
+        if self.inputs:  # translate strings into file objects
             for filename, fileobj in inputs.items():
                 if isinstance(fileobj, basestring):
                     self.inputs[filename] = files.StringContainer(fileobj)
@@ -118,7 +121,6 @@ def __init__(self, engine=None,
 
     def _reset(self):
         self._submitted = False
-        self._started = False
         self._final_stdout = None
         self._final_stderr = None
         self._finished = False
@@ -136,7 +138,7 @@ def _reset(self):
 
     def __str__(self):
         desc = ['Job "%s" status:%s' % (self.name, self.status)]
-        if self.jobid: desc.append('jobid:%s' % self.jobid)
+        if self.jobid: desc.append('jobid:%s' % (self.jobid,) )
         if self.engine: desc.append('engine:%s' % type(self.engine).__name__)
         return ' '.join(desc)
 
diff --git a/pyccc/tests/test_engine_features.py b/pyccc/tests/test_engine_features.py
index c18e627..84d03a9 100644
--- a/pyccc/tests/test_engine_features.py
+++ b/pyccc/tests/test_engine_features.py
@@ -40,13 +40,6 @@ def test_set_workingdir_docker(local_docker_engine):
     assert job.stdout.strip() == '/testtest-dir-test'
 
 
-def test_set_workingdir_subprocess(subprocess_engine, tmpdir):
-    engine = subprocess_engine
-    job = engine.launch(image=None, command='pwd', workingdir=str(tmpdir))
-    job.wait()
-    assert job.stdout.strip() == str(tmpdir)
-
-
 def test_docker_volume_mount(local_docker_engine):
     """
     Note:
diff --git a/pyccc/tests/test_engines.py b/pyccc/tests/test_engines.py
index d35d08e..e4b7cbc 100644
--- a/pyccc/tests/test_engines.py
+++ b/pyccc/tests/test_engines.py
@@ -372,10 +372,10 @@ def __exit__(self, exc_type, exc_value, traceback):
 @pytest.mark.parametrize('fixture', fixture_types['engine'])
 def test_abspath_input_files(fixture, request):
     engine = request.getfuncargvalue(fixture)
-    with no_context() if engine.ABSPATHS else pytest.raises(ValueError):
+    with no_context() if engine.ABSPATHS else pytest.raises(pyccc.PathError):
         job = engine.launch(image='alpine', command='cat /opt/a',
                             inputs={'/opt/a': pyccc.LocalFile(os.path.join(THISDIR, 'data', 'a'))})
-    if not isinstance(engine, pyccc.Subprocess):
+    if engine.ABSPATHS:
         job.wait()
         assert job.exitcode == 0
         assert job.stdout.strip() == 'a'
diff --git a/requirements.txt b/requirements.txt
index 01f03e9..fdae1d6 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -3,4 +3,5 @@ docker >=3.2.1
 funcsigs ; python_version < '3.3'
 future
 requests
+mdtcollections
 tblib

From c48fac19b8246bc94cbb91eb7acd3a9ac898d5cf Mon Sep 17 00:00:00 2001
From: Aaron Virshup <avirshup@gmail.com>
Date: Tue, 5 Jun 2018 16:21:42 -0700
Subject: [PATCH 4/6] Extra logging in engine tests for easier debugging

---
 pyccc/tests/test_engines.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/pyccc/tests/test_engines.py b/pyccc/tests/test_engines.py
index e4b7cbc..292bdd4 100644
--- a/pyccc/tests/test_engines.py
+++ b/pyccc/tests/test_engines.py
@@ -45,6 +45,7 @@ def _raise_valueerror(msg):
 def test_hello_world(fixture, request):
     engine = request.getfuncargvalue(fixture)
     job = engine.launch('alpine', 'echo hello world')
+    print(job.rundata)
     job.wait()
     assert job.stdout.strip() == 'hello world'
 
@@ -55,6 +56,7 @@ def test_job_status(fixture, request):
     job = engine.launch('alpine', 'sleep 3', submit=False)
     assert job.status.lower() == 'unsubmitted'
     job.submit()
+    print(job.rundata)
     assert job.status.lower() in ('queued', 'running', 'downloading')
     job.wait()
     assert job.status.lower() == 'finished'
@@ -64,6 +66,7 @@ def test_job_status(fixture, request):
 def test_file_glob(fixture, request):
     engine = request.getfuncargvalue(fixture)
     job = engine.launch('alpine', 'touch a.txt b c d.txt e.gif')
+    print(job.rundata)
     job.wait()
 
     assert set(job.get_output().keys()) <= set('a.txt b c d.txt e.gif'.split())
@@ -77,6 +80,7 @@ def test_input_ouput_files(fixture, request):
                         command='cat a.txt b.txt > out.txt',
                         inputs={'a.txt': 'a',
                                 'b.txt': pyccc.StringContainer('b')})
+    print(job.rundata)
     job.wait()
     assert job.get_output('out.txt').read().strip() == 'ab'
 
@@ -85,6 +89,7 @@ def test_input_ouput_files(fixture, request):
 def test_sleep_raises_jobstillrunning(fixture, request):
     engine = request.getfuncargvalue(fixture)
     job = engine.launch('alpine', 'sleep 5; echo done')
+    print(job.rundata)
     with pytest.raises(pyccc.JobStillRunning):
         job.stdout
     job.wait()
@@ -96,6 +101,7 @@ def test_python_function(fixture, request):
     engine = request.getfuncargvalue(fixture)
     pycall = pyccc.PythonCall(function_tests.fn, 5)
     job = engine.launch(PYIMAGE, pycall, interpreter=PYVERSION)
+    print(job.rundata)
     job.wait()
     assert job.result == 6
 
@@ -106,6 +112,7 @@ def test_python_instance_method(fixture, request):
     obj = function_tests.Cls()
     pycall = pyccc.PythonCall(obj.increment, by=2)
     job = engine.launch(PYIMAGE, pycall, interpreter=PYVERSION)
+    print(job.rundata)
     job.wait()
 
     assert job.result == 2
@@ -117,6 +124,7 @@ def test_python_reraises_exception(fixture, request):
     engine = request.getfuncargvalue(fixture)
     pycall = pyccc.PythonCall(_raise_valueerror, 'this is my message')
     job = engine.launch(PYIMAGE, pycall, interpreter=PYVERSION)
+    print(job.rundata)
     job.wait()
 
     with pytest.raises(ValueError):
@@ -129,6 +137,7 @@ def test_builtin_imethod(fixture, request):
     mylist = [3, 2, 1]
     fn = pyccc.PythonCall(mylist.sort)
     job = engine.launch(image=PYIMAGE, command=fn, interpreter=PYVERSION)
+    print(job.rundata)
     job.wait()
 
     assert job.result is None  # since sort doesn't return anything
@@ -186,6 +195,7 @@ def test_bash_exitcode(fixture, request):
                     command='sleep 5 && exit 35',
                     engine=engine,
                     submit=True)
+    print(job.rundata)
     with pytest.raises(pyccc.JobStillRunning):
         job.exitcode
     job.wait()
@@ -198,6 +208,7 @@ def test_python_exitcode(fixture, request):
     engine = request.getfuncargvalue(fixture)
     fn = pyccc.PythonCall(function_tests.sleep_then_exit_38)
     job = engine.launch(image=PYIMAGE, command=fn, interpreter=PYVERSION)
+    print(job.rundata)
 
     with pytest.raises(pyccc.JobStillRunning):
         job.exitcode
@@ -231,6 +242,7 @@ def test_persistence_assumptions(fixture, request):
 
     # First the control experiment - references are NOT persisted
     job = engine.launch(PYIMAGE, pycall, interpreter=PYVERSION)
+    print(job.rundata)
     job.wait()
     result = job.result
     assert result is not testobj
@@ -249,6 +261,7 @@ def test_persist_references_flag(fixture, request):
 
     # With the right flag, references ARE now persisted
     job = engine.launch(PYIMAGE, pycall, interpreter=PYVERSION, persist_references=True)
+    print(job.rundata)
     job.wait()
     result = job.result
     assert result is testobj
@@ -270,6 +283,7 @@ def test_persistent_and_nonpersistent_mixture(fixture, request):
     pycall = pyccc.PythonCall(testobj.identity)
 
     job = engine.launch(PYIMAGE, pycall, interpreter=PYVERSION, persist_references=True)
+    print(job.rundata)
     job.wait()
     result = job.result
     assert result is not testobj
@@ -287,6 +301,7 @@ def _callback(job):
     job = engine.launch(image=PYIMAGE,
                         command='echo hello world > out.txt',
                         when_finished=_callback)
+    print(job.rundata)
     job.wait()
 
     assert job.result == 'hello world'
@@ -297,6 +312,7 @@ def test_unicode_stdout_and_return(fixture, request):
     engine = request.getfuncargvalue(fixture)
     fn = pyccc.PythonCall(function_tests.fn_prints_unicode)
     job = engine.launch(image=PYIMAGE, command=fn, interpreter=PYVERSION)
+    print(job.rundata)
     job.wait()
     assert job.result == u'¶'
     assert job.stdout.strip() == u'Å'
@@ -311,6 +327,7 @@ def _callback(job):
     fn = pyccc.PythonCall(function_tests.fn, 3.0)
     engine = request.getfuncargvalue(fixture)
     job = engine.launch(image=PYIMAGE, command=fn, interpreter=PYVERSION, when_finished=_callback)
+    print(job.rundata)
     job.wait()
 
     assert job.function_result == 4.0
@@ -329,6 +346,7 @@ def _callback(job):
     engine = request.getfuncargvalue(fixture)
     job = engine.launch(image=PYIMAGE, command=fn,
                         interpreter=PYVERSION, when_finished=_callback, persist_references=True)
+    print(job.rundata)
     job.wait()
 
     assert job.function_result is testobj
@@ -346,6 +364,7 @@ def _runcall(fixture, request, function, *args, **kwargs):
     engine = request.getfuncargvalue(fixture)
     fn = pyccc.PythonCall(function, *args, **kwargs)
     job = engine.launch(image=PYIMAGE, command=fn, interpreter=PYVERSION)
+    print(job.rundata)
     job.wait()
     return job.result
 
@@ -356,6 +375,7 @@ def test_clean_working_dir(fixture, request):
     """
     engine = request.getfuncargvalue(fixture)
     job = engine.launch(image='alpine', command='ls')
+    print(job.rundata)
     job.wait()
     assert job.stdout.strip() == ''
 
@@ -365,6 +385,7 @@ class no_context():
     """
     def __enter__(self):
         return None
+
     def __exit__(self, exc_type, exc_value, traceback):
         return False
 
@@ -376,6 +397,7 @@ def test_abspath_input_files(fixture, request):
         job = engine.launch(image='alpine', command='cat /opt/a',
                             inputs={'/opt/a': pyccc.LocalFile(os.path.join(THISDIR, 'data', 'a'))})
     if engine.ABSPATHS:
+        print(job.rundata)
         job.wait()
         assert job.exitcode == 0
         assert job.stdout.strip() == 'a'
@@ -388,6 +410,7 @@ def test_directory_input(fixture, request):
     job = engine.launch(image='alpine', command='cat data/a data/b',
                         inputs={'data':
                                     pyccc.LocalDirectoryReference(os.path.join(THISDIR, 'data'))})
+    print(job.rundata)
     job.wait()
     assert job.exitcode == 0
     assert job.stdout.strip() == 'a\nb'
@@ -398,11 +421,13 @@ def test_passing_files_between_jobs(fixture, request):
     engine = request.getfuncargvalue(fixture)
 
     job1 = engine.launch(image='alpine', command='echo hello > world')
+    print('job1:', job1.rundata)
     job1.wait()
     assert job1.exitcode == 0
 
     job2 = engine.launch(image='alpine', command='cat helloworld',
                          inputs={'helloworld': job1.get_output('world')})
+    print('job2:', job2.rundata)
     job2.wait()
     assert job2.exitcode == 0
     assert job2.stdout.strip() == 'hello'
@@ -415,6 +440,7 @@ def test_job_env_vars(fixture, request):
     job = engine.launch(image='alpine',
                         command='echo ${AA} ${BB}',
                         env={'AA': 'hello', 'BB':'world'})
+    print(job.rundata)
     job.wait()
     assert job.exitcode == 0
     assert job.stdout.strip() == 'hello world'

From d3eec1cb74d7f74a7aca060a20e596605851e3e5 Mon Sep 17 00:00:00 2001
From: Aaron Virshup <avirshup@gmail.com>
Date: Mon, 11 Jun 2018 16:46:50 -0700
Subject: [PATCH 5/6] Add engine.get_job feature to get jobs by id (when
 supported)

---
 pyccc/engines/base.py               | 17 ++++++++++
 pyccc/engines/dockerengine.py       | 49 +++++++++++++++++++++++++++--
 pyccc/engines/subproc.py            |  3 ++
 pyccc/exceptions.py                 |  4 +++
 pyccc/job.py                        |  4 ++-
 pyccc/tests/test_engine_features.py |  1 -
 pyccc/tests/test_engines.py         | 20 ++++++++++++
 7 files changed, 94 insertions(+), 4 deletions(-)

diff --git a/pyccc/engines/base.py b/pyccc/engines/base.py
index 71693af..773063f 100644
--- a/pyccc/engines/base.py
+++ b/pyccc/engines/base.py
@@ -67,6 +67,23 @@ def launch(self, image, command, **kwargs):
         else:
             return Job(self, image, command, **kwargs)
 
+    def get_job(self, jobid):
+        """ Return a Job object for this job.
+
+        The returned object will be suitable for retrieving output, but depending on the engine,
+        may not populate all fields used at launch time (such as `job.inputs`, `job.commands`, etc.)
+
+        Args:
+            jobid (Any): job id object
+
+        Returns:
+            pyccc.job.Job: job object for this job id
+
+        Raises:
+            pyccc.exceptions.JobNotFound: if no job could be located for this jobid
+        """
+        raise NotImplementedError()
+
     def submit(self, job):
         """
         submit job to engine
diff --git a/pyccc/engines/dockerengine.py b/pyccc/engines/dockerengine.py
index d9c4344..1a36a1c 100644
--- a/pyccc/engines/dockerengine.py
+++ b/pyccc/engines/dockerengine.py
@@ -19,9 +19,10 @@
 
 import subprocess
 
-import docker
+import docker.errors
+
 from .. import docker_utils as du, DockerMachineError
-from .. import utils, files, status
+from .. import utils, files, status, exceptions
 from . import EngineBase
 
 
@@ -64,6 +65,50 @@ def test_connection(self):
         version = self.client.version()
         return version
 
+    def get_job(self, jobid):
+        """ Return a Job object for the requested job id.
+
+        The returned object will be suitable for retrieving output, but depending on the engine,
+        may not populate all fields used at launch time (such as `job.inputs`, `job.commands`, etc.)
+
+        Args:
+            jobid (str): container id
+
+        Returns:
+            pyccc.job.Job: job object for this container
+
+        Raises:
+            pyccc.exceptions.JobNotFound: if no job could be located for this jobid
+        """
+        import shlex
+        from pyccc.job import Job
+
+        job = Job(engine=self)
+        job.jobid = job.rundata.containerid = jobid
+        try:
+            jobdata = self.client.containers.inspect_container(job.jobid)
+        except docker.errors.NotFound:
+            raise exceptions.JobNotFound(
+                    'The daemon could not find containter "%s"' % job.jobid)
+
+        cmd = jobdata['Config']['Cmd']
+        entrypoint = jobdata['Config']['Entrypoint']
+
+        if len(cmd) == 3 and cmd[0:2] == ['sh', '-c']:
+            cmd = cmd[2]
+        elif entrypoint is not None:
+            cmd = entrypoint + cmd
+
+        if isinstance(cmd, list):
+            cmd = ' '.join(shlex.quote(x) for x in cmd)
+
+        job.command = cmd
+        job.env = jobdata['Env']
+        job.workingdir = jobdata['WorkingDir']
+
+        return job
+
+
     def submit(self, job):
         """ Submit job to the engine
 
diff --git a/pyccc/engines/subproc.py b/pyccc/engines/subproc.py
index 9482d5c..6c28dc8 100644
--- a/pyccc/engines/subproc.py
+++ b/pyccc/engines/subproc.py
@@ -59,6 +59,9 @@ def launch(self, image=None, command=None,  **kwargs):
             command = image
         return super(Subprocess, self).launch('no_image', command, **kwargs)
 
+    def get_job(self, jobid):
+        raise NotImplementedError("Cannot retrieve jobs with the subprocess engine")
+
     def submit(self, job):
         self._check_job(job)
         job.rundata.localdir = utils.make_local_temp_dir()
diff --git a/pyccc/exceptions.py b/pyccc/exceptions.py
index ea49c05..0828701 100644
--- a/pyccc/exceptions.py
+++ b/pyccc/exceptions.py
@@ -81,3 +81,7 @@ class DockerMachineError(Exception):
 class PathError(Exception):
     """ The engine can't fulfill the requested input or output filesystem path
     """
+
+class JobNotFound(Exception):
+    """ The requested job was not found
+    """
\ No newline at end of file
diff --git a/pyccc/job.py b/pyccc/job.py
index e576c39..80fa377 100644
--- a/pyccc/job.py
+++ b/pyccc/job.py
@@ -32,7 +32,9 @@
 def exports(o):
     __all__.append(o.__name__)
     return o
-__all__ = []
+
+
+__all__ = ['Job']
 
 
 class EngineFunction(object):
diff --git a/pyccc/tests/test_engine_features.py b/pyccc/tests/test_engine_features.py
index 84d03a9..b93180a 100644
--- a/pyccc/tests/test_engine_features.py
+++ b/pyccc/tests/test_engine_features.py
@@ -92,4 +92,3 @@ def test_docker_socket_mount_withdocker_option(local_docker_engine):
     job.wait()
     running = job.stdout.strip().splitlines()
     assert job.jobid in running
-
diff --git a/pyccc/tests/test_engines.py b/pyccc/tests/test_engines.py
index 292bdd4..9d40fbb 100644
--- a/pyccc/tests/test_engines.py
+++ b/pyccc/tests/test_engines.py
@@ -444,3 +444,23 @@ def test_job_env_vars(fixture, request):
     job.wait()
     assert job.exitcode == 0
     assert job.stdout.strip() == 'hello world'
+
+
+@pytest.mark.parametrize('fixture', fixture_types['engine'])
+def test_get_job(fixture, request):
+    engine = request.getfuncargvalue(fixture)
+    job = engine.launch(image='alpine',
+                        command='sleep 1 && echo nice nap')
+
+    try:
+        newjob = engine.get_job(job.jobid)
+    except NotImplementedError:
+        pytest.skip('get_job raised NotImplementedError for %s' % fixture)
+
+    assert job.jobid == newjob.jobid
+    job.wait()
+    assert newjob.status == job.status
+
+    assert newjob.stdout == job.stdout
+    assert newjob.stderr == job.stderr
+

From 721659356e085a5225c3e49de657abc2a20aecb4 Mon Sep 17 00:00:00 2001
From: Aaron Virshup <avirshup@gmail.com>
Date: Mon, 11 Jun 2018 16:50:05 -0700
Subject: [PATCH 6/6] get_job test is passing for docker engine

---
 pyccc/engines/dockerengine.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/pyccc/engines/dockerengine.py b/pyccc/engines/dockerengine.py
index 1a36a1c..d8d2fe0 100644
--- a/pyccc/engines/dockerengine.py
+++ b/pyccc/engines/dockerengine.py
@@ -86,7 +86,7 @@ def get_job(self, jobid):
         job = Job(engine=self)
         job.jobid = job.rundata.containerid = jobid
         try:
-            jobdata = self.client.containers.inspect_container(job.jobid)
+            jobdata = self.client.inspect_container(job.jobid)
         except docker.errors.NotFound:
             raise exceptions.JobNotFound(
                     'The daemon could not find containter "%s"' % job.jobid)
@@ -103,8 +103,9 @@ def get_job(self, jobid):
             cmd = ' '.join(shlex.quote(x) for x in cmd)
 
         job.command = cmd
-        job.env = jobdata['Env']
-        job.workingdir = jobdata['WorkingDir']
+        job.env = jobdata['Config']['Env']
+        job.workingdir = jobdata['Config']['WorkingDir']
+        job.rundata.container = jobdata
 
         return job