Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fillvalue #241

Merged
merged 7 commits into from
Aug 9, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
2 changes: 1 addition & 1 deletion hsds/basenode.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@
from .util.k8sClient import getDnLabelSelector, getPodIps
from . import hsds_logger as log

HSDS_VERSION = "0.8.0"
HSDS_VERSION = "0.8.1"


def getVersion():
Expand Down
15 changes: 11 additions & 4 deletions hsds/chunk_crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,11 +27,11 @@
from .util.httpUtil import isUnixDomainUrl
from .util.idUtil import getDataNodeUrl, getNodeCount
from .util.hdf5dtype import createDataType
from .util.dsetUtil import getFillValue, getSliceQueryParam
from .util.dsetUtil import getSliceQueryParam
from .util.dsetUtil import getSelectionShape, getChunkLayout
from .util.chunkUtil import getChunkCoverage, getDataCoverage
from .util.chunkUtil import getChunkIdForPartition, getQueryDtype
from .util.arrayUtil import jsonToArray, getShapeDims
from .util.arrayUtil import jsonToArray, getShapeDims, getNumpyValue
from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
from . import config
from . import hsds_logger as log
Expand Down Expand Up @@ -401,11 +401,18 @@ async def read_point_sel(
params["action"] = "get"
params["count"] = num_points

fill_value = getFillValue(dset_json)

np_arr_rsp = None
dt = np_arr.dtype

fill_value = None
# initialize to fill_value if specified
if "creationProperties" in dset_json:
cprops = dset_json["creationProperties"]
if "fillValue" in cprops:
fill_value_prop = cprops["fillValue"]
encoding = cprops.get("fillValue_encoding")
fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)

def defaultArray():
# no data, return zero array
if fill_value:
Expand Down
19 changes: 14 additions & 5 deletions hsds/chunk_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,7 @@
from .util.domainUtil import getBucketForDomain
from .util.hdf5dtype import getItemSize, createDataType
from .util.dsetUtil import getSelectionList, isNullSpace, getDatasetLayoutClass
from .util.dsetUtil import getFillValue, isExtensible, getSelectionPagination
from .util.dsetUtil import isExtensible, getSelectionPagination
from .util.dsetUtil import getSelectionShape, getDsetMaxDims, getChunkLayout
from .util.dsetUtil import getDatasetCreationPropertyLayout
from .util.chunkUtil import getNumChunks, getChunkIds, getChunkId
Expand All @@ -40,7 +40,7 @@
from .util.chunkUtil import getQueryDtype, get_chunktable_dims
from .util.arrayUtil import bytesArrayToList, jsonToArray, getShapeDims
from .util.arrayUtil import getNumElements, arrayToBytes, bytesToArray
from .util.arrayUtil import squeezeArray
from .util.arrayUtil import squeezeArray, getNumpyValue
from .util.authUtil import getUserPasswordFromRequest, validateUserPassword
from .util.boolparser import BooleanParser
from .servicenode_lib import getObjectJson, validateAction
Expand Down Expand Up @@ -1372,10 +1372,19 @@ async def doReadSelection(
log.error(msg)
raise HTTPBadRequest(reason=msg)

arr = np.zeros(np_shape, dtype=dset_dtype, order="C")
fill_value = getFillValue(dset_json)
if fill_value is not None:
# initialize to fill_value if specified
fill_value = None
if "creationProperties" in dset_json:
cprops = dset_json["creationProperties"]
if "fillValue" in cprops:
fill_value_prop = cprops["fillValue"]
encoding = cprops.get("fillValue_encoding")
fill_value = getNumpyValue(fill_value_prop, dt=dset_dtype, encoding=encoding)
if fill_value:
arr = np.empty(np_shape, dtype=dset_dtype, order="C")
arr[...] = fill_value
else:
arr = np.zeros(np_shape, dtype=dset_dtype, order="C")

crawler = ChunkCrawler(
app,
Expand Down
15 changes: 9 additions & 6 deletions hsds/datanode_lib.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,10 +29,10 @@
from .util.domainUtil import isValidDomain, getBucketForDomain
from .util.attrUtil import getRequestCollectionName
from .util.httpUtil import http_post
from .util.dsetUtil import getChunkLayout, getFilterOps, getFillValue
from .util.dsetUtil import getChunkLayout, getFilterOps
from .util.dsetUtil import getChunkInitializer, getSliceQueryParam
from .util.chunkUtil import getDatasetId, getChunkSelection, getChunkIndex
from .util.arrayUtil import arrayToBytes, bytesToArray, getShapeDims, jsonToArray
from .util.arrayUtil import arrayToBytes, bytesToArray, getShapeDims, jsonToArray, getNumpyValue
from .util.hdf5dtype import createDataType, getItemSize
from .util.rangegetUtil import ChunkLocation, chunkMunge

Expand Down Expand Up @@ -1119,11 +1119,14 @@ async def get_chunk(

if chunk_arr is None:
# normal fill value based init or initializer failed
fill_value = getFillValue(dset_json)
fill_value = None
if "creationProperties" in dset_json:
cprops = dset_json["creationProperties"]
if "fillValue" in cprops:
fill_value_prop = cprops["fillValue"]
encoding = cprops.get("fillValue_encoding")
fill_value = getNumpyValue(fill_value_prop, dt=dt, encoding=encoding)
if fill_value:
# need to convert list to tuples for numpy broadcast
if isinstance(fill_value, list):
fill_value = tuple(fill_value)
chunk_arr = np.empty(dims, dtype=dt, order="C")
chunk_arr[...] = fill_value
else:
Expand Down
42 changes: 22 additions & 20 deletions hsds/dset_sn.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,15 +15,14 @@
#

import math
import numpy as np
from json import JSONDecodeError
from aiohttp.web_exceptions import HTTPBadRequest, HTTPNotFound, HTTPConflict

from .util.httpUtil import http_post, http_put, http_delete, getHref, respJsonAssemble
from .util.httpUtil import jsonResponse
from .util.idUtil import isValidUuid, getDataNodeUrl, createObjId, isSchema2Id
from .util.dsetUtil import getPreviewQuery, getFilterItem
from .util.arrayUtil import getNumElements, getShapeDims
from .util.arrayUtil import getNumElements, getShapeDims, getNumpyValue
from .util.chunkUtil import getChunkSize, guessChunk, expandChunk, shrinkChunk
from .util.chunkUtil import getContiguousLayout
from .util.authUtil import getUserPasswordFromRequest, aclCheck
Expand Down Expand Up @@ -1031,26 +1030,29 @@ async def POST_Dataset(request):
# validate fill value compatible with type
dt = createDataType(datatype)
fill_value = creationProperties["fillValue"]
is_nan = False
if dt.kind == "f":
if isinstance(fill_value, str) and fill_value == "nan":
is_nan = True

if is_nan:
# use np.nan as fill value
# TBD: this needs to be fixed up for compound types
log.debug("converting 'nan' to np.nan for fillValue")
creationProperties["fillValue"] = np.nan
else:
if isinstance(fill_value, list):
fill_value = tuple(fill_value)
try:
np.asarray(fill_value, dtype=dt)
except (TypeError, ValueError):
msg = f"Fill value {fill_value} not compatible with "
msg += f"dataset type: {datatype}"
if "fillValue_encoding" in creationProperties:
fill_value_encoding = creationProperties["fillValue_encoding"]

if fill_value_encoding not in ("None", "base64"):
msg = f"unexpected value for fill_value_encoding: {fill_value_encoding}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
else:
# should see a string in this case
if not isinstance(fill_value, str):
msg = f"unexpected fill value: {fill_value} "
msg += f"for encoding: {fill_value_encoding}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)
else:
fill_value_encoding = None

try:
getNumpyValue(fill_value, dt=dt, encoding=fill_value_encoding)
except ValueError:
msg = f"invalid fill value: {fill_value}"
log.warn(msg)
raise HTTPBadRequest(reason=msg)

if "filters" in creationProperties:
# convert to standard representation
Expand Down