From f292ac325dd9a9b876357f42669d86093f46867a Mon Sep 17 00:00:00 2001 From: cnavacch Date: Mon, 18 Nov 2019 12:54:34 +0100 Subject: [PATCH] fixed temporal dimension bug, when loading data --- src/yeoda/datacube.py | 52 ++++++++++++++++++++----------------- tests/test_loading.py | 60 +++++++++++++++++++++---------------------- 2 files changed, 59 insertions(+), 53 deletions(-) diff --git a/src/yeoda/datacube.py b/src/yeoda/datacube.py index 2802974..c212b6d 100644 --- a/src/yeoda/datacube.py +++ b/src/yeoda/datacube.py @@ -592,8 +592,8 @@ def split_yearly(self, name='time', years=None): return self.split_by_dimension(values, expressions, name=name) - def load_by_geom(self, geom, sref=None, dimension_name="tile", band='1', apply_mask=False, dtype="xarray", - origin='c'): + def load_by_geom(self, geom, sref=None, band='1', spatial_dim_name="tile", temporal_dim_name="time", + apply_mask=False, dtype="xarray", origin='ur'): """ Loads data according to a given geometry. @@ -607,8 +607,10 @@ def load_by_geom(self, geom, sref=None, dimension_name="tile", band='1', apply_m Spatial reference of the given region of interest `geom`. band : int or str, optional Band number or name (default is 1). - dimension_name : str, optional + spatial_dim_name : str, optional Name of the spatial dimension (default: 'tile'). + temporal_dim_name : str, optional + Name of the temporal dimension (default: 'time'). apply_mask : bool, optional If true, a numpy mask array with a mask excluding (=1) all pixels outside `geom` (=0) will be created (default is True). @@ -635,12 +637,12 @@ def load_by_geom(self, geom, sref=None, dimension_name="tile", band='1', apply_m return None if self.grid: - if dimension_name not in self.dimensions: - raise DimensionUnkown(dimension_name) + if spatial_dim_name not in self.dimensions: + raise DimensionUnkown(spatial_dim_name) this_sref = None if sref is not None: this_sref = self.grid.core.projection.osr_spref - tilenames = list(self.inventory[dimension_name]) + tilenames = list(self.inventory[spatial_dim_name]) if len(list(set(tilenames))) > 1: raise Exception('Data can be loaded only from one tile. Please filter the data cube before.') tilename = tilenames[0] @@ -708,10 +710,10 @@ def load_by_geom(self, geom, sref=None, dimension_name="tile", band='1', apply_m else: raise FileTypeUnknown(file_type) - return self.__convert_dtype(data, dtype=dtype, xs=xs, ys=ys, band=band) + return self.__convert_dtype(data, dtype=dtype, xs=xs, ys=ys, band=band, temporal_dim_name=temporal_dim_name) - def load_by_pixels(self, rows, cols, row_size=1, col_size=1, band='1', dimension_name="tile", dtype="xarray", - origin="ur"): + def load_by_pixels(self, rows, cols, row_size=1, col_size=1, band='1', spatial_dim_name="tile", + temporal_dim_name="time", dtype="xarray", origin="ur"): """ Loads data according to given pixel numbers, i.e. the row and column numbers and optionally a certain pixel window (`row_size` and `col_size`). @@ -728,8 +730,10 @@ def load_by_pixels(self, rows, cols, row_size=1, col_size=1, band='1', dimension Number of columns to read (counts from input argument `cols`, default is 1). band : int or str, optional Band number or name (default is 1). - dimension_name : str, optional + spatial_dim_name : str, optional Name of the spatial dimension (default: 'tile'). + temporal_dim_name : str, optional + Name of the temporal dimension (default: 'time'). dtype : str Data type of the returned array-like structure (default is 'xarray'). It can be: - 'xarray': loads data as an xarray.DataSet @@ -758,9 +762,9 @@ def load_by_pixels(self, rows, cols, row_size=1, col_size=1, band='1', dimension cols = [cols] if self.grid: - if dimension_name not in self.dimensions: - raise DimensionUnkown(dimension_name) - tilenames = list(self.inventory[dimension_name]) + if spatial_dim_name not in self.dimensions: + raise DimensionUnkown(spatial_dim_name) + tilenames = list(self.inventory[spatial_dim_name]) if len(list(set(tilenames))) > 1: raise Exception('Data can be loaded only from one tile. Please filter the data cube before.') tilename = tilenames[0] @@ -821,9 +825,10 @@ def load_by_pixels(self, rows, cols, row_size=1, col_size=1, band='1', dimension else: raise FileTypeUnknown(file_type) - return self.__convert_dtype(data, dtype, xs=xs, ys=ys, band=band) + return self.__convert_dtype(data, dtype, xs=xs, ys=ys, band=band, temporal_dim_name=temporal_dim_name) - def load_by_coords(self, xs, ys, sref=None, band='1', dimension_name="tile", dtype="xarray", origin="ur"): + def load_by_coords(self, xs, ys, sref=None, band='1', spatial_dim_name="tile", temporal_dim_name="time", + dtype="xarray", origin="ur"): """ Loads data as a 1-D array according to a given coordinate. @@ -837,8 +842,10 @@ def load_by_coords(self, xs, ys, sref=None, band='1', dimension_name="tile", dty Spatial reference referring to the world system coordinates `x` and `y`. band : int or str, optional Band number or name (default is 1). - dimension_name : str, optional + spatial_dim_name : str, optional Name of the spatial dimension (default: 'tile'). + temporal_dim_name : str, optional + Name of the temporal dimension (default: 'time'). dtype : str Data type of the returned array-like structure (default is 'xarray'). It can be: - 'xarray': loads data as an xarray.DataSet @@ -867,12 +874,12 @@ def load_by_coords(self, xs, ys, sref=None, band='1', dimension_name="tile", dty ys = [ys] if self.grid is not None: - if dimension_name not in self.dimensions: - raise DimensionUnkown(dimension_name) + if spatial_dim_name not in self.dimensions: + raise DimensionUnkown(spatial_dim_name) this_sref = None if sref is not None: this_sref = self.grid.core.projection.osr_spref - tilenames = list(self.inventory[dimension_name]) + tilenames = list(self.inventory[spatial_dim_name]) if len(list(set(tilenames))) > 1: raise Exception('Data can be loaded only from one tile. Please filter the data cube before.') tilename = tilenames[0] @@ -925,7 +932,7 @@ def load_by_coords(self, xs, ys, sref=None, band='1', dimension_name="tile", dty else: raise FileTypeUnknown(file_type) - return self.__convert_dtype(data, dtype, xs=xs, ys=ys, band=band) + return self.__convert_dtype(data, dtype, xs=xs, ys=ys, band=band, temporal_dim_name=temporal_dim_name) def __convert_dtype(self, data, dtype, xs=None, ys=None, temporal_dim_name='time', band=1): """ @@ -945,7 +952,7 @@ def __convert_dtype(self, data, dtype, xs=None, ys=None, temporal_dim_name='time ys : list, optional List of world system coordinates in Y direction. temporal_dim_name : str, optional - Name of the temporal dimension (default: 'tile'). + Name of the temporal dimension (default: 'time'). band : int or str, optional Band number or name (default is 1). @@ -955,9 +962,8 @@ def __convert_dtype(self, data, dtype, xs=None, ys=None, temporal_dim_name='time Data as an array-like object. """ - timestamps = self[temporal_dim_name] - if dtype == "xarray": + timestamps = self[temporal_dim_name] if isinstance(data, list) and isinstance(data[0], np.ndarray): ds = [] for i, entry in enumerate(data): diff --git a/tests/test_loading.py b/tests/test_loading.py index 8d26368..170ab16 100644 --- a/tests/test_loading.py +++ b/tests/test_loading.py @@ -124,12 +124,12 @@ def test_load_gt2numpy_by_coord(self): dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, dimension_name='tile_name', dtype='numpy') + data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, spatial_dim_name='tile_name', dtype='numpy') assert (self.ref_np_ar == data).all() ref_data_list = [self.ref_np_ar, self.ref_np_ar] data = dc.load_by_coords([self.lon, self.lon], [self.lat, self.lat], sref=self.sref, - dimension_name='tile_name', dtype='numpy') + spatial_dim_name='tile_name', dtype='numpy') assert (ref_data_list[0] == data[0]).all() & (ref_data_list[1] == data[1]).all() def test_load_gt2xarray_by_coord(self): @@ -137,14 +137,14 @@ def test_load_gt2xarray_by_coord(self): dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, dimension_name='tile_name', dtype='xarray', + data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, spatial_dim_name='tile_name', dtype='xarray', origin='c') data['1'].data = data['1'].data.astype(float) # convert to float for comparison assert self.ref_xr_ds.equals(data) ref_data_list = [self.ref_xr_ds, self.ref_xr_ds] data = dc.load_by_coords([self.lon, self.lon], [self.lat, self.lat], sref=self.sref, - dimension_name='tile_name', dtype='xarray', origin='c') + spatial_dim_name='tile_name', dtype='xarray', origin='c') assert ref_data_list[0].equals(data) & ref_data_list[1].equals(data) def test_load_gt2dataframe_by_coord(self): @@ -152,14 +152,14 @@ def test_load_gt2dataframe_by_coord(self): dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, dimension_name='tile_name', dtype='dataframe', + data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, spatial_dim_name='tile_name', dtype='dataframe', origin='c') data['1'] = data['1'].astype(float) # convert to float for comparison assert self.ref_pd_df.equals(data) ref_data_list = [self.ref_pd_df, self.ref_pd_df] data = dc.load_by_coords([self.lon, self.lon], [self.lat, self.lat], sref=self.sref, - dimension_name='tile_name', dtype='dataframe', origin='c') + spatial_dim_name='tile_name', dtype='dataframe', origin='c') data['1'] = data['1'].astype(float) # convert to float for comparison assert ref_data_list[0].equals(data) & ref_data_list[1].equals(data) @@ -168,12 +168,12 @@ def test_load_nc2numpy_by_coord(self): dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, dimension_name='tile_name', dtype='numpy') + data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, spatial_dim_name='tile_name', dtype='numpy') assert (self.ref_np_ar == data).all() ref_data_list = [self.ref_np_ar, self.ref_np_ar] data = dc.load_by_coords([self.lon, self.lon], [self.lat, self.lat], sref=self.sref, - dimension_name='tile_name', dtype='numpy') + spatial_dim_name='tile_name', dtype='numpy') assert (ref_data_list[0] == data[0]).all() & (ref_data_list[1] == data[1]).all() def test_load_nc2xarray_by_coord(self): @@ -181,13 +181,13 @@ def test_load_nc2xarray_by_coord(self): dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, dimension_name='tile_name', dtype='xarray', + data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, spatial_dim_name='tile_name', dtype='xarray', origin='c') assert self.ref_xr_ds.equals(data) ref_data_list = [self.ref_xr_ds, self.ref_xr_ds] data = dc.load_by_coords([self.lon, self.lon], [self.lat, self.lat], sref=self.sref, - dimension_name='tile_name', dtype='xarray', origin='c') + spatial_dim_name='tile_name', dtype='xarray', origin='c') assert ref_data_list[0].equals(data) & ref_data_list[1].equals(data) def test_load_nc2dataframe_by_coord(self): @@ -195,13 +195,13 @@ def test_load_nc2dataframe_by_coord(self): dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, dimension_name='tile_name', dtype='dataframe', + data = dc.load_by_coords(self.lon, self.lat, sref=self.sref, spatial_dim_name='tile_name', dtype='dataframe', origin='c') assert self.ref_pd_df.equals(data) ref_data_list = [self.ref_pd_df, self.ref_pd_df] data = dc.load_by_coords([self.lon, self.lon], [self.lat, self.lat], sref=self.sref, - dimension_name='tile_name', dtype='dataframe', origin='c') + spatial_dim_name='tile_name', dtype='dataframe', origin='c') assert ref_data_list[0].equals(data) & ref_data_list[1].equals(data) def test_load_singlenc2xarray_by_coord(self): @@ -257,11 +257,11 @@ def test_load_gt2numpy_by_pixels(self): dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_pixels(self.row, self.col, dimension_name='tile_name', dtype='numpy') + data = dc.load_by_pixels(self.row, self.col, spatial_dim_name='tile_name', dtype='numpy') assert (self.ref_np_ar == data).all() data = dc.load_by_pixels(self.row, self.col, row_size=self.row_size, col_size=self.col_size, - dimension_name='tile_name', dtype='numpy') + spatial_dim_name='tile_name', dtype='numpy') assert (self.ref_np_ar_area == data).all() def test_load_gt2xarray_by_pixels(self): @@ -269,12 +269,12 @@ def test_load_gt2xarray_by_pixels(self): dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_pixels(self.row, self.col, dimension_name='tile_name', dtype='xarray', origin='c') + data = dc.load_by_pixels(self.row, self.col, spatial_dim_name='tile_name', dtype='xarray', origin='c') data['1'].data = data['1'].data.astype(float) # convert to float for comparison assert self.ref_xr_ds.equals(data) data = dc.load_by_pixels(self.row, self.col, row_size=self.row_size, col_size=self.col_size, - dimension_name='tile_name', dtype='xarray', origin='c') + spatial_dim_name='tile_name', dtype='xarray', origin='c') data['1'].data = data['1'].data.astype(float) # convert to float for comparison assert self.ref_xr_ds_area.equals(data) @@ -283,13 +283,13 @@ def test_load_gt2dataframe_by_pixels(self): dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_pixels(self.row, self.col, dimension_name='tile_name', dtype='dataframe', origin='c') + data = dc.load_by_pixels(self.row, self.col, spatial_dim_name='tile_name', dtype='dataframe', origin='c') # convert to float data['1'] = data['1'].astype(float) # convert to float for comparison assert self.ref_pd_df.equals(data) data = dc.load_by_pixels(self.row, self.col, row_size=self.row_size, col_size=self.col_size, - dimension_name='tile_name', dtype='dataframe', origin='c') + spatial_dim_name='tile_name', dtype='dataframe', origin='c') data['1'] = data['1'].astype(float) # convert to float for comparison assert self.ref_pd_df_area.equals(data) @@ -298,11 +298,11 @@ def test_load_nc2numpy_by_pixels(self): dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_pixels(self.row, self.col, dimension_name='tile_name', dtype='numpy') + data = dc.load_by_pixels(self.row, self.col, spatial_dim_name='tile_name', dtype='numpy') assert (self.ref_np_ar == data).all() data = dc.load_by_pixels(self.row, self.col, row_size=self.row_size, col_size=self.col_size, - dimension_name='tile_name', dtype='numpy') + spatial_dim_name='tile_name', dtype='numpy') assert (self.ref_np_ar_area == data).all() def test_load_nc2xarray_by_pixels(self): @@ -310,12 +310,12 @@ def test_load_nc2xarray_by_pixels(self): dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_pixels(self.row, self.col, dimension_name='tile_name', dtype='xarray', origin='c') + data = dc.load_by_pixels(self.row, self.col, spatial_dim_name='tile_name', dtype='xarray', origin='c') data['1'].data = data['1'].data.astype(float) # convert to float for comparison assert self.ref_xr_ds.equals(data) data = dc.load_by_pixels(self.row, self.col, row_size=self.row_size, col_size=self.col_size, - dimension_name='tile_name', dtype='xarray', origin='c') + spatial_dim_name='tile_name', dtype='xarray', origin='c') data['1'].data = data['1'].data.astype(float) # convert to float for comparison assert self.ref_xr_ds_area.equals(data) @@ -324,12 +324,12 @@ def test_load_nc2dataframe_by_pixels(self): dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_pixels(self.row, self.col, dimension_name='tile_name', dtype='dataframe', origin='c') + data = dc.load_by_pixels(self.row, self.col, spatial_dim_name='tile_name', dtype='dataframe', origin='c') data['1'] = data['1'].astype(float) # convert to float for comparison assert self.ref_pd_df.equals(data) data = dc.load_by_pixels(self.row, self.col, row_size=self.row_size, col_size=self.col_size, - dimension_name='tile_name', dtype='dataframe', origin='c') + spatial_dim_name='tile_name', dtype='dataframe', origin='c') data['1'] = data['1'].astype(float) # convert to float for comparison assert self.ref_pd_df_area.equals(data) @@ -385,14 +385,14 @@ def test_load_gt2numpy_by_geom(self): """ Tests loading of a Numpy array from GeoTIFF files by a bounding box. """ dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_geom(self.bbox, dimension_name='tile_name', dtype='numpy') + data = dc.load_by_geom(self.bbox, spatial_dim_name='tile_name', dtype='numpy') assert (self.ref_np_ar_area == data).all() def test_load_gt2xarray_by_geom(self): """ Tests loading of an xarray array from GeoTIFF files by a bounding box. """ dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_geom(self.bbox, dimension_name='tile_name', dtype='xarray', origin='c') + data = dc.load_by_geom(self.bbox, spatial_dim_name='tile_name', dtype='xarray', origin='c') data['1'].data = data['1'].data.astype(float) # convert to float for comparison assert self.ref_xr_ds_area.equals(data) @@ -400,7 +400,7 @@ def test_load_gt2dataframe_by_geom(self): """ Tests loading of a Pandas data frame from GeoTIFF files by a bounding box. """ dc = self._create_loadable_dc(self.gt_filepaths) - data = dc.load_by_geom(self.bbox, dimension_name='tile_name', dtype='dataframe', origin='c') + data = dc.load_by_geom(self.bbox, spatial_dim_name='tile_name', dtype='dataframe', origin='c') data['1'] = data['1'].astype(float) # convert to float for comparison assert self.ref_pd_df_area.equals(data) @@ -408,21 +408,21 @@ def test_load_nc2numpy_by_geom(self): """ Tests loading of a Numpy array from NetCDF files by a bounding box. """ dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_geom(self.bbox, dimension_name='tile_name', dtype='numpy') + data = dc.load_by_geom(self.bbox, spatial_dim_name='tile_name', dtype='numpy') assert (self.ref_np_ar_area == data).all() def test_load_nc2xarray_by_geom(self): """ Tests loading of an xarray array from NetCDF files by a bounding box. """ dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_geom(self.bbox, dimension_name='tile_name', dtype='xarray', origin='c') + data = dc.load_by_geom(self.bbox, spatial_dim_name='tile_name', dtype='xarray', origin='c') assert self.ref_xr_ds_area.equals(data) def test_load_nc2dataframe_by_geom(self): """ Tests loading of a Pandas data frame from NetCDF files by a bounding box. """ dc = self._create_loadable_dc(self.nc_filepaths) - data = dc.load_by_geom(self.bbox, dimension_name='tile_name', dtype='dataframe', origin='c') + data = dc.load_by_geom(self.bbox, spatial_dim_name='tile_name', dtype='dataframe', origin='c') assert self.ref_pd_df_area.equals(data) def test_load_singlenc2xarray_by_pixels(self):