Skip to content

Commit

Permalink
Using chunk_sizes correctly
Browse files Browse the repository at this point in the history
  • Loading branch information
forman committed May 14, 2018
1 parent 02a2f8b commit 13fc73e
Show file tree
Hide file tree
Showing 3 changed files with 25 additions and 1 deletion.
12 changes: 11 additions & 1 deletion cablab/cube_access.py
Original file line number Diff line number Diff line change
Expand Up @@ -304,11 +304,21 @@ def _get_or_open_dataset(self, cube_var):

def _open_dataset(self, variable):
file_pattern = os.path.join(variable.dir_path, '*.nc')
chunk_sizes = self._cube_config.chunk_sizes
dask_chunks = None
if chunk_sizes:
time_size, lat_size, lon_size = chunk_sizes
# TODO (forman): use multiples of chunk sizes in certain dimensions
# until some constraint is fulfilled.
# e.g. hint that allows using max. XYZ MB per variable,
# only have multiples in time dimension, because users want
# time-series analysis...
dask_chunks = dict(time=time_size, lat=lat_size, lon=lon_size)
variable.dataset = xr.open_mfdataset(file_pattern,
concat_dim='time',
preprocess=self._preprocess_dataset,
engine='h5netcdf',
chunks=self._cube_config.chunk_sizes)
chunks=dask_chunks)

def _preprocess_dataset(self, ds: Dataset):
# Convert specific data variables to coordinate variables
Expand Down
4 changes: 4 additions & 0 deletions cablab/cube_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,10 @@ def __init__(self,
static_data=False,
model_version=CUBE_MODEL_VERSION):
self.model_version = model_version
if chunk_sizes is not None and len(chunk_sizes) != 3:
raise ValueError('chunk_sizes must be a sequence of three integers: <time-size>, <lat-size>, <lon-size>')
if comp_level is not None and (comp_level < 1 or comp_level > 9):
raise ValueError('comp_level must be an integer in the range 1 to 9')
self.spatial_res = spatial_res
self.grid_x0 = grid_x0
self.grid_y0 = grid_y0
Expand Down
10 changes: 10 additions & 0 deletions test/test_cube_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,16 @@ def test_validate(self):
CubeConfig(grid_x0=-1)
with self.assertRaises(ValueError):
CubeConfig(grid_y0=-1)
with self.assertRaises(ValueError):
CubeConfig(chunk_sizes=tuple())
with self.assertRaises(ValueError):
CubeConfig(chunk_sizes=[256])
with self.assertRaises(ValueError):
CubeConfig(chunk_sizes=(256, 256))
with self.assertRaises(ValueError):
CubeConfig(comp_level=0)
with self.assertRaises(ValueError):
CubeConfig(comp_level=10)

def test_model_version_is_current(self):
config = CubeConfig()
Expand Down

0 comments on commit 13fc73e

Please sign in to comment.