Skip to content

Commit

Permalink
Merge pull request #24 from Autodesk/directory-refs
Browse files Browse the repository at this point in the history
Directory refs
  • Loading branch information
avirshup authored Mar 5, 2018
2 parents 83c4852 + 83c9c51 commit 812da57
Show file tree
Hide file tree
Showing 13 changed files with 371 additions and 85 deletions.
24 changes: 5 additions & 19 deletions pyccc/engines/dockerengine.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,11 @@ def get_status(self, job):
else:
return status.FINISHED

def get_directory(self, job, path):
docker_host = du.kwargs_from_client(self.client)
remotedir = files.DockerArchive(docker_host, job.containerid, path)
return remotedir

def _list_output_files(self, job):
docker_diff = self.client.diff(job.container)
if docker_diff is None:
Expand Down Expand Up @@ -156,22 +161,3 @@ def _get_final_stds(self, job):
stdout = self.client.logs(job.container, stdout=True, stderr=False)
stderr = self.client.logs(job.container, stdout=False, stderr=True)
return stdout.decode('utf-8'), stderr.decode('utf-8')


class DockerMachine(Docker):
""" Convenience class for connecting to a docker machine.
"""
def __init__(self, machinename):
self.machinename = machinename
client = du.docker_machine_client(machine_name=machinename)

machstat = subprocess.check_output(['docker-machine', 'status', machinename]).strip()
if machstat != 'Running':
raise DockerMachineError('WARNING: docker-machine %s returned status: "%s"' %
(machinename, status))

super(DockerMachine, self).__init__(client)

def __str__(self):
return "%s engine on '%s' at %s" % (type(self).__name__, self.machinename,
self.hostname)
11 changes: 8 additions & 3 deletions pyccc/engines/subproc.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,8 +65,8 @@ def submit(self, job):
assert os.path.isabs(job.workingdir)
if job.inputs:
for filename, f in job.inputs.items():
self._check_input_target_location(filename, job.workingdir)
f.put(os.path.join(job.workingdir, filename))
targetpath = self._check_file_is_under_workingdir(filename, job.workingdir)
f.put(targetpath)

subenv = os.environ.copy()
subenv['PYTHONIOENCODING'] = 'utf-8'
Expand All @@ -81,7 +81,7 @@ def submit(self, job):
return job.subproc.pid

@staticmethod
def _check_input_target_location(filename, wdir):
def _check_file_is_under_workingdir(filename, wdir):
""" Raise error if input is being staged to a location not underneath the working dir
"""
p = filename
Expand All @@ -93,13 +93,18 @@ def _check_input_target_location(filename, wdir):
if len(common) < len(wdir):
raise ValueError(
"The subprocess engine does not support input files with absolute paths")
return p

def kill(self, job):
job.subproc.terminate()

def wait(self, job):
return job.subproc.wait()

def get_directory(self, job, path):
targetpath = self._check_file_is_under_workingdir(path, job.workingdir)
return files.LocalDirectoryReference(targetpath)

def _list_output_files(self, job, dir=None):
if dir is None: dir = job.workingdir
filenames = {}
Expand Down
43 changes: 36 additions & 7 deletions pyccc/files/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function, unicode_literals, absolute_import, division

import shutil

from future import standard_library
standard_library.install_aliases()
from future.builtins import *
Expand All @@ -28,7 +31,7 @@
ENCODING = 'utf-8'


def get_tempfile():
def get_tempfile(**kwargs):
if not os.path.exists(CACHEDIR):
if PY2:
try:
Expand All @@ -38,10 +41,37 @@ def get_tempfile():
raise
else:
os.makedirs(CACHEDIR, exist_ok=True)
tmpfile = tempfile.NamedTemporaryFile(dir=CACHEDIR, delete=False)
tmpfile = tempfile.NamedTemporaryFile(dir=CACHEDIR, delete=False, **kwargs)
return tmpfile


def get_target_path(destination, origname):
""" Implements the directory/path semantics of linux mv/cp etc.
Examples:
>>> import os
>>> os.makedirs('./a')
>>> _get_target_path('./a', '/tmp/myfile')
'./myfile'
>>> _get_target_path('./a/b', '/tmp/myfile')
'./a/b'
Raises:
OSError: if neither destination NOR destination's parent exists OR it already exists
"""
if os.path.exists(destination):
if not os.path.isdir(destination):
raise OSError('Cannot write to requested destination %s - file exists' % destination)
return os.path.join(destination, os.path.basename(origname))
else:
destdir = os.path.abspath(os.path.join(destination, os.path.pardir))
if not os.path.isdir(destdir):
raise OSError(
'Cannot write to requested destination %s - parent directory does not exist' %
destination)
return os.path.join(destination)


class FileReferenceBase(object):
""" The base class for tracking files.
Expand Down Expand Up @@ -80,11 +110,10 @@ def put(self, filename):
LocalFile: reference to the copy of the file stored at ``filename``
"""
from . import LocalFile

with self.open('rb') as infile, open(filename, 'wb') as outfile:
for line in infile:
outfile.write(infile.read())
return LocalFile(filename)
target = get_target_path(filename, self.source)
with self.open('rb') as infile, open(target, 'wb') as outfile:
shutil.copyfileobj(infile, outfile)
return LocalFile(target)

def __iter__(self):
# This is the worst file-reader ever
Expand Down
23 changes: 16 additions & 7 deletions pyccc/files/bytecontainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,9 @@

import io
import warnings
import os

from . import FileReferenceBase
from . import FileReferenceBase, get_target_path


class BytesContainer(FileReferenceBase):
Expand All @@ -35,11 +36,12 @@ class BytesContainer(FileReferenceBase):
contents (bytes): contents of the file
encoded_with (str): encoding of the file (default: system default)
"""
def __init__(self, contents, encoded_with=None, name='byte string'):
def __init__(self, contents, encoded_with=None, name=None):
self._contents = contents
self.encoded_with = encoded_with
self.name = name
self.source = 'script'
self.source = name
self.localpath = None
self.sourcetype = 'runtime'

def put(self, filename, encoding=None):
"""Write the file to the given path
Expand All @@ -52,12 +54,19 @@ def put(self, filename, encoding=None):
"""
from . import LocalFile

if os.path.isdir(filename) and self.source is None:
raise ValueError("Cannot write this object to "
"directory %s without an explicit filename." % filename)

target = get_target_path(filename, self.source)

if (encoding is not None) and (encoding != self.encoded_with):
raise ValueError('%s is already encoded as "%s"' % self, self.encoded_with)

with self.open('rb') as infile, open(filename, 'wb') as outfile:
for line in infile: outfile.write(line)
return LocalFile(filename)
with self.open('rb') as infile, open(target, 'wb') as outfile:
for line in infile:
outfile.write(line)
return LocalFile(target)

def open(self, mode='r', encoding=None):
"""Return file-like object
Expand Down
108 changes: 104 additions & 4 deletions pyccc/files/directory.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,15 +5,25 @@
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
#s
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import tarfile
import shutil

from .remotefiles import LazyDockerCopy
from . import get_target_path


class DirectoryReference(object):
pass


class LocalDirectoryReference(object):
class LocalDirectoryReference(DirectoryReference):
""" This is a reference to a specific directory on the local filesystem.
This allows entire directories to be staged into the dockerfile as input
Expand All @@ -22,5 +32,95 @@ def __init__(self, localpath):
self.localpath = localpath

def put(self, destination):
import shutil
shutil.copytree(self.localpath, destination)
""" Copy the referenced directory to this path
The semantics of this command are similar to unix ``cp``: if ``destination`` already
exists, the copied directory will be put at ``[destination] // [basename(localpath)]``. If
it does not already exist, the directory will be renamed to this path (the parent directory
must exist).
Args:
destination (str): path to put this directory
"""
target = get_target_path(destination, self.localpath)
shutil.copytree(self.localpath, target)


class DirectoryArchive(DirectoryReference):
"""A tar (or tar.gz) archive of a directory
All files in this directory must be under a directory named "dirname". No other files
will be expanded
Args:
archive_path (str): path to the existing archive
dirname (str): name that this directory reference expands to (this is not checked!)
"""
def __init__(self, archive_path, dirname):
self.archive_path = archive_path
self.dirname = dirname

def put(self, destination):
""" Copy the referenced directory to this path
Note:
This ignores anything not in the desired directory, given by ``self.dirname``.
Args:
destination (str): path to put this directory (which must NOT already exist)
References:
https://stackoverflow.com/a/8261083/1958900
"""
target = get_target_path(destination, self.dirname)
valid_paths = (self.dirname, './%s' % self.dirname)

with tarfile.open(self.archive_path, 'r:*') as tf:
members = []
for tarinfo in tf:
# Get only files under the directory `self.dirname`
pathsplit = os.path.split(tarinfo.path)
if pathsplit[0] not in valid_paths:
print('WARNING: skipped file "%s" in archive; not in directory "%s"' %
(tarinfo.path, self.dirname))
continue
tarinfo.name = os.path.join(*pathsplit[1:])
members.append(tarinfo)

if not members:
raise ValueError("No files under path directory '%s' in this tarfile")

tf.extractall(target, members)


class DockerArchive(DirectoryArchive, LazyDockerCopy):
"""
Reference to an archived directory from a docker container.
Notes:
- This is currently a bit of a frankenclass
- Because it requires access to a docker daemon, this object is not particularly portable.
"""
def __init__(self, *args, **kwargs):
LazyDockerCopy.__init__(self, *args, **kwargs)
self.archive_path = None
self.dirname = self.basename

def put(self, destination):
""" Copy the referenced directory to this path
Args:
destination (str): path to put this directory (which must NOT already exist)
"""
if not self._fetched:
self._fetch()
DirectoryArchive.put(self, destination)

put.__doc__ = DirectoryArchive.put.__doc__

def _fetch(self):
self.archive_path = self._open_tmpfile()
stream = self._get_tarstream()
shutil.copyfileobj(stream, self.tmpfile)
stream.close()
self.tmpfile.close()
16 changes: 9 additions & 7 deletions pyccc/files/localfiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@
import shutil
import socket

from . import BytesContainer, StringContainer, get_tempfile
from . import BytesContainer, StringContainer, get_tempfile, get_target_path


class FileContainer(BytesContainer):
Expand Down Expand Up @@ -71,10 +71,11 @@ def __init__(self, path, encoded_with=None, check_exists=True):
self.encoded_with = encoded_with

def put(self, filename, encoding=None):
target = get_target_path(filename, self.source)
if encoding is not None:
raise ValueError('Cannot encode as %s - this file is already encoded')
shutil.copy(self.localpath, filename)
return LocalFile(filename)
shutil.copy(self.localpath, target)
return LocalFile(target)

def open(self, mode='r', encoding=None):
"""Return file-like object (actually opens the file for this class)"""
Expand Down Expand Up @@ -105,14 +106,15 @@ def __init__(self, filecontainer):
self.localpath = self._open_tmpfile()
filecontainer.put(self.localpath)

def _open_tmpfile(self):
def _open_tmpfile(self, **kwargs):
"""
Open a temporary, unique file in CACHEDIR (/tmp/cyborgcache) by default.
Leave it open, assign file handle to self.tmpfile
**kwargs are passed to tempfile.NamedTemporaryFile
"""
tmpfile = get_tempfile()
path = tmpfile.name
self.tmpfile = tmpfile
self.tmpfile = get_tempfile(**kwargs)
path = self.tmpfile.name
return path

def __str__(self):
Expand Down
Loading

0 comments on commit 812da57

Please sign in to comment.