Skip to content

Commit

Permalink
Review fixes lsst#9
Browse files Browse the repository at this point in the history
Updated formatters to use fileDescriptors as attributes.

Updated S3Datastores to match the new formatters. Fixed
mistakes in S3Datastore ingest functionality. Removed
hardcoded path manipulations.

Fixes to S3Datastore put functionality, missing
constraints checks added in.  Prettification and
correction of comments in get functionality.
Additional chekcs added. Fixes to getUri
functionality, somehow it seems it never got updated
when ButlerURI class was written.

Implemented review requests in ButlerURI.
  • Loading branch information
DinoBektesevic committed Aug 6, 2019
1 parent 38c518a commit 743401f
Show file tree
Hide file tree
Showing 8 changed files with 135 additions and 121 deletions.
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/core/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -268,7 +268,7 @@ def __initFromS3YamlFile(self, url):
try:
response = s3.get_object(Bucket=uri.netloc, Key=uri.relativeToPathRoot)
except (s3.exceptions.NoSuchKey, s3.exceptions.NoSuchBucket) as err:
raise FileNotFoundError(f"No such file or directory: {path}") from err
raise FileNotFoundError(f"No such file or directory: {uri}") from err

# boto3 response is a `StreamingBody`, but not a valid Python IOStream.
# Loader will raise an error that the stream has no name. A hackish
Expand Down
4 changes: 2 additions & 2 deletions python/lsst/daf/butler/core/formatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,7 @@ def write(self, inMemoryDataset):
"""
raise NotImplementedError("Type does not support writing")

def fromBytes(self, serializedDataset, fileDescriptor, component=None):
def fromBytes(self, serializedDataset, component=None):
"""Reads serialized data into a Dataset or its component.
Parameters
Expand All @@ -129,7 +129,7 @@ def fromBytes(self, serializedDataset, fileDescriptor, component=None):
"""
raise NotImplementedError("Type does not support reading from bytes.")

def toBytes(self, inMemoryDataset, fileDescriptor):
def toBytes(self, inMemoryDataset):
"""Serialize the Dataset to bytes based on formatter.
Parameters
Expand Down
14 changes: 10 additions & 4 deletions python/lsst/daf/butler/core/location.py
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,10 @@ def relativeToPathRoot(self):
Effectively, this is the path property with posix separator stripped
from the left hand side of the path.
"""
p = PurePosixPath(self._uri.path)
if not self.scheme:
p = PurePath(self.path)
else:
p = PurePosixPath(self.path)
return str(p.relative_to(p.root))

@property
Expand Down Expand Up @@ -292,7 +295,7 @@ def _fixupFileUri(parsed, root=None, forceAbsolute=False):
replacements["path"] += sep

elif parsed.scheme == "file":
# file URI implies POSIX path separators so split as posix,
# file URI implies POSIX path separators so split as POSIX,
# then join as os, and convert to abspath. Do not handle
# home directories since "file" scheme is explicitly documented
# to not do tilde expansion.
Expand Down Expand Up @@ -395,10 +398,13 @@ def relativeToPathRoot(self):
"""Returns the path component of the URI relative to the network
location.
Effectively, this is the path property with posix separator stipped
Effectively, this is the path property with POSIX separator stripped
from the left hand side of the path.
"""
p = PurePosixPath(os2posix(self.path))
if self._datastoreRootUri.scheme == 'file' or not self._datastoreRootUri.scheme:
p = PurePath(os2posix(self.path))
else:
p = PurePosixPath(self.path)
stripped = p.relative_to(p.root)
return str(posix2os(stripped))

Expand Down
201 changes: 110 additions & 91 deletions python/lsst/daf/butler/datastores/s3Datastore.py

Large diffs are not rendered by default.

30 changes: 10 additions & 20 deletions python/lsst/daf/butler/formatters/fileFormatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -77,21 +77,15 @@ def _writeFile(self, inMemoryDataset):
"""
pass

def _assembleDataset(self, data, fileDescriptor, component):
def _assembleDataset(self, data, component=None):
"""Assembles and coerces the dataset, or one of its components,
into an appropriate python type and returns it.
Parameters
----------
fileDescriptor : `FileDescriptor`
Identifies the file to read, type to read it into and parameters
to be used for reading.
data : `dict` or `object`
Composite or a dict that, or which component, needs to be
coerced to the python type specified in "fileDescriptor"
fileDescriptor : `FileDescriptor`
Identifies the file to read, type to read it into and parameters
to be used for reading.
component : `str`, optional
Component to read from the file. Only used if the `StorageClass`
for reading differed from the `StorageClass` used to write the
Expand All @@ -105,10 +99,6 @@ def _assembleDataset(self, data, fileDescriptor, component):
"""
fileDescriptor = self.fileDescriptor

# Read the file naively
path = fileDescriptor.location.path
data = self._readFile(path, fileDescriptor.storageClass.pytype)

# if read and write storage classes differ, more work is required
readStorageClass = fileDescriptor.readStorageClass
if readStorageClass != fileDescriptor.storageClass:
Expand Down Expand Up @@ -181,19 +171,19 @@ def read(self, component=None):
"""

# Read the file naively
path = fileDescriptor.location.path
data = self._readFile(path, fileDescriptor.storageClass.pytype)
path = self.fileDescriptor.location.path
data = self._readFile(path, self.fileDescriptor.storageClass.pytype)

# Assemble the requested dataset and potentially return only its
# component coercing it to its appropriate pytype
data = self._assembleDataset(data, fileDescriptor, component)
data = self._assembleDataset(data, component)

if data is None:
raise ValueError(f"Unable to read data with URI {fileDescriptor.location.uri}")
raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")

return data

def fromBytes(self, serializedDataset, fileDescriptor, component=None):
def fromBytes(self, serializedDataset, component=None):
"""Reads serialized data into a Dataset or its component.
Parameters
Expand Down Expand Up @@ -222,14 +212,14 @@ def fromBytes(self, serializedDataset, fileDescriptor, component=None):
raise NotImplementedError("Type does not support reading from bytes.")

data = self._fromBytes(serializedDataset,
fileDescriptor.storageClass.pytype)
self.fileDescriptor.storageClass.pytype)

# Assemble the requested dataset and potentially return only its
# component coercing it to its appropriate ptype
data = self._assembleDataset(data, fileDescriptor, component)
data = self._assembleDataset(data, component)

if data is None:
raise ValueError(f"Unable to read data with URI {fileDescriptor.location.uri}")
raise ValueError(f"Unable to read data with URI {self.fileDescriptor.location.uri}")

return data

Expand All @@ -254,7 +244,7 @@ def write(self, inMemoryDataset):

return fileDescriptor.location.pathInStore

def toBytes(self, inMemoryDataset, fileDescriptor):
def toBytes(self, inMemoryDataset):
"""Serialize the Dataset to bytes based on formatter.
Parameters
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/formatters/jsonFormatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -73,7 +73,7 @@ def _writeFile(self, inMemoryDataset):
Exception
The file could not be written.
"""
with open(self.fileDescriptor.location.path, "w") as fd:
with open(self.fileDescriptor.location.path, "wb") as fd:
if hasattr(inMemoryDataset, "_asdict"):
inMemoryDataset = inMemoryDataset._asdict()
fd.write(self._toBytes(inMemoryDataset))
Expand Down
2 changes: 1 addition & 1 deletion python/lsst/daf/butler/formatters/yamlFormatter.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,7 +105,7 @@ def _writeFile(self, inMemoryDataset):
Exception
The file could not be written.
"""
with open(self.fileDescriptor.location.path, "w") as fd:
with open(self.fileDescriptor.location.path, "wb") as fd:
if hasattr(inMemoryDataset, "_asdict"):
inMemoryDataset = inMemoryDataset._asdict()
fd.write(self._toBytes(inMemoryDataset))
Expand Down
1 change: 0 additions & 1 deletion tests/config/basic/s3Datastore.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,6 @@ datastore:
ThingOne: lsst.daf.butler.formatters.yamlFormatter.YamlFormatter
datasetType.component: lsst.daf.butler.formatters.yamlFormatter.YamlFormatter
pvi: lsst.daf.butler.formatters.pickleFormatter.PickleFormatter
visit+physical_filter+instrument: lsst.daf.butler.formatters.yamlFormatter.YamlFormatter
instrument<DummyHSC>:
pvi: lsst.daf.butler.formatters.jsonFormatter.JsonFormatter
StructuredData: lsst.daf.butler.formatters.pickleFormatter.PickleFormatter
Expand Down

0 comments on commit 743401f

Please sign in to comment.