Skip to content

Commit

Permalink
Merge branch 'master' of github.com:DHI-GRAS/terracotta
Browse files Browse the repository at this point in the history
  • Loading branch information
dionhaefner committed Nov 26, 2018
2 parents 3c755a4 + 9e8d729 commit 7aa0751
Show file tree
Hide file tree
Showing 6 changed files with 106 additions and 64 deletions.
101 changes: 56 additions & 45 deletions terracotta/drivers/raster_base.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,12 +119,16 @@ def _compute_image_stats_chunked(dataset: 'DatasetReader') -> Optional[Dict[str,

valid_data_count += int(valid_data.size)

hull_candidates = RasterDriver._hull_candidate_mask(~block_data.mask)
hull_shapes = (geometry.shape(s) for s, _ in features.shapes(
np.ones(hull_candidates.shape, 'uint8'),
mask=hull_candidates,
transform=windows.transform(w, dataset.transform)
))
if np.any(block_data.mask):
hull_candidates = RasterDriver._hull_candidate_mask(~block_data.mask)
hull_shapes = [geometry.shape(s) for s, _ in features.shapes(
np.ones(hull_candidates.shape, 'uint8'),
mask=hull_candidates,
transform=windows.transform(w, dataset.transform)
)]
else:
w, s, e, n = windows.bounds(w, dataset.transform)
hull_shapes = [geometry.Polygon([(w, s), (e, s), (e, n), (w, n)])]
convex_hull = geometry.MultiPolygon([convex_hull, *hull_shapes]).convex_hull

tdigest.update(valid_data)
Expand Down Expand Up @@ -165,22 +169,28 @@ def _compute_image_stats(dataset: 'DatasetReader',
)
raster_data = dataset.read(1, out_shape=out_shape, masked=True)

if dataset.profile['nodata'] is not None:
if dataset.nodata is not None:
# nodata values might slip into output array if out_shape < dataset.shape
raster_data[raster_data == dataset.profile['nodata']] = np.ma.masked
raster_data[raster_data == dataset.nodata] = np.ma.masked

valid_data = raster_data.compressed()

if valid_data.size == 0:
return None

hull_candidates = RasterDriver._hull_candidate_mask(~raster_data.mask)
hull_shapes = (geometry.shape(s) for s, _ in features.shapes(
np.ones(hull_candidates.shape, 'uint8'),
mask=hull_candidates,
transform=data_transform
))
convex_hull = geometry.MultiPolygon(hull_shapes).convex_hull
if np.any(raster_data.mask):
hull_candidates = RasterDriver._hull_candidate_mask(~raster_data.mask)
hull_shapes = (geometry.shape(s) for s, _ in features.shapes(
np.ones(hull_candidates.shape, 'uint8'),
mask=hull_candidates,
transform=data_transform
))
convex_hull = geometry.MultiPolygon(hull_shapes).convex_hull
else:
# no masked entries -> convex hull == dataset bounds
w, s, e, n = dataset.bounds
convex_hull = geometry.Polygon([(w, s), (e, s), (e, n), (w, n)])

convex_hull_wgs = warp.transform_geom(
dataset.crs, 'epsg:4326', geometry.mapping(convex_hull)
)
Expand Down Expand Up @@ -227,6 +237,12 @@ def compute_metadata(cls, raster_path: str, *,
)

with rasterio.open(raster_path) as src:
if src.nodata is None and not cls._has_alpha_band(src):
warnings.warn(
f'Raster file {raster_path} does not have a valid nodata value, '
'and does not contain an alpha band. No data will be masked.'
)

bounds = warp.transform_bounds(
src.crs, 'epsg:4326', *src.bounds, densify_pts=21
)
Expand Down Expand Up @@ -329,6 +345,11 @@ def _calculate_default_transform(src_crs: Union[Dict[str, str], str],

return dst_transform, dst_width, dst_height

@staticmethod
def _has_alpha_band(src: 'DatasetReader') -> bool:
from rasterio.enums import MaskFlags
return any([MaskFlags.per_dataset in flags for flags in src.mask_flag_enums])

@cachedmethod(operator.attrgetter('_raster_cache'))
@trace('get_raster_tile')
def _get_raster_tile(self, path: str, *,
Expand All @@ -344,7 +365,7 @@ def _get_raster_tile(self, path: str, *,
import rasterio
from rasterio import transform, windows, warp
from rasterio.vrt import WarpedVRT
from rasterio.enums import MaskFlags
from affine import Affine

dst_bounds: Tuple[float, float, float, float]

Expand All @@ -354,9 +375,6 @@ def _get_raster_tile(self, path: str, *,
upsampling_enum = self._get_resampling_enum(upsampling_method)
downsampling_enum = self._get_resampling_enum(downsampling_method)

def has_alpha_band(src: rasterio.DatasetReader) -> bool:
return any([MaskFlags.per_dataset in flags for flags in src.mask_flag_enums])

with contextlib.ExitStack() as es:
es.enter_context(rasterio.Env(**self.RIO_ENV_KEYS))
try:
Expand All @@ -369,48 +387,42 @@ def has_alpha_band(src: rasterio.DatasetReader) -> bool:
dst_transform, _, _ = self._calculate_default_transform(
src.crs, self.TARGET_CRS, src.width, src.height, *src.bounds
)
dst_res = (dst_transform.a, dst_transform.e)
dst_res = (abs(dst_transform.a), abs(dst_transform.e))
dst_bounds = warp.transform_bounds(src.crs, self.TARGET_CRS, *src.bounds)

if bounds is None:
bounds = dst_bounds

# pad tile bounds by 2 pixels to prevent interpolation artefacts
vrt_bounds = [
bounds[0] - 2 * dst_res[0],
bounds[1] + 2 * dst_res[1],
bounds[2] + 2 * dst_res[0],
bounds[3] - 2 * dst_res[1]
]
# pad tile bounds to prevent interpolation artefacts
num_pad_pixels = 2

# compute tile VRT shape and transform
vrt_width = max(1, round((vrt_bounds[2] - vrt_bounds[0]) / dst_res[0]))
vrt_height = max(1, round((vrt_bounds[1] - vrt_bounds[3]) / dst_res[1]))
vrt_transform = transform.from_bounds(*vrt_bounds, width=vrt_width, height=vrt_height)
dst_width = max(1, round((bounds[2] - bounds[0]) / dst_res[0]))
dst_height = max(1, round((bounds[3] - bounds[1]) / dst_res[1]))
vrt_transform = (
transform.from_bounds(*bounds, width=dst_width, height=dst_height)
* Affine.translation(-num_pad_pixels, -num_pad_pixels)
)
vrt_height, vrt_width = dst_height + 2 * num_pad_pixels, dst_width + 2 * num_pad_pixels

# remove padding in output
out_window = windows.Window(
col_off=2, row_off=2, width=vrt_width - 4, height=vrt_height - 4
col_off=num_pad_pixels, row_off=num_pad_pixels, width=dst_width, height=dst_height
)

# construct VRT
vrt_args: Dict[str, Any] = {}
if has_alpha_band(src):
vrt_args.update(add_alpha=True)

vrt = es.enter_context(
WarpedVRT(
src, crs=self.TARGET_CRS, resampling=upsampling_enum,
transform=vrt_transform, width=vrt_width, height=vrt_height,
**vrt_args
src, crs=self.TARGET_CRS, resampling=upsampling_enum, add_alpha=True,
transform=vrt_transform, width=vrt_width, height=vrt_height
)
)

# prevent loads of very sparse data
out_window_bounds = windows.bounds(out_window, vrt_transform)
cover_ratio = (
(dst_bounds[2] - dst_bounds[0]) / (out_window_bounds[2] - out_window_bounds[0])
* (dst_bounds[1] - dst_bounds[3]) / (out_window_bounds[1] - out_window_bounds[3])
* (dst_bounds[3] - dst_bounds[1]) / (out_window_bounds[3] - out_window_bounds[1])
)

if cover_ratio < 0.01:
Expand All @@ -429,12 +441,11 @@ def has_alpha_band(src: rasterio.DatasetReader) -> bool:
tile_data = vrt.read(
1, resampling=resampling_enum, window=out_window, out_shape=tile_size
)
mask = False
if has_alpha_band(src):
mask_idx = src.count + 1
mask |= vrt.read(mask_idx, window=out_window, out_shape=tile_size) == 0
if vrt.nodata is not None:
mask |= tile_data == vrt.nodata
# read alpha mask
mask_idx = src.count + 1
mask = vrt.read(mask_idx, window=out_window, out_shape=tile_size) == 0
if src.nodata is not None:
mask |= tile_data == src.nodata

return np.ma.masked_array(tile_data, mask=mask)

Expand Down
11 changes: 7 additions & 4 deletions terracotta/server/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,14 +28,17 @@ class Meta:


class DatasetSchema(Schema):
class Meta:
ordered = True

page = fields.Integer(description='Current page', required=True)
limit = fields.Integer(description='Maximum number of returned items', required=True)
datasets = fields.List(
fields.Dict(values=fields.String(example='value1'),
keys=fields.String(example='key1')),
required=True,
description='Array containing all available key combinations'
)
limit = fields.Integer(description='Maximum number of returned items', required=True)
page = fields.Integer(description='Current page', required=True)


@metadata_api.route('/datasets', methods=['GET'])
Expand Down Expand Up @@ -70,9 +73,9 @@ def get_datasets() -> str:
keys = options or None

payload = {
'datasets': datasets(keys, page=page, limit=limit),
'limit': limit,
'page': page
'page': page,
'datasets': datasets(keys, page=page, limit=limit)
}

schema = DatasetSchema()
Expand Down
1 change: 1 addition & 0 deletions tests/benchmarks.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@

ZOOM_XYZ = {
'birds-eye': 8,
'balanced': 12,
'subpixel': 21,
'preview': None
}
Expand Down
17 changes: 15 additions & 2 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,7 +42,7 @@ def mysql_server(request):
return request.config.getoption('mysql_server')


def cloud_optimize(raster_file, outfile, create_mask=False):
def cloud_optimize(raster_file, outfile, create_mask=False, remove_nodata=False):
import math
import contextlib
import rasterio
Expand Down Expand Up @@ -71,6 +71,9 @@ def cloud_optimize(raster_file, outfile, create_mask=False):
profile = src.profile.copy()
profile.update(COG_PROFILE)

if remove_nodata:
profile['nodata'] = None

memfile = es.enter_context(rasterio.io.MemoryFile())
dst = es.enter_context(memfile.open(**profile))

Expand Down Expand Up @@ -166,7 +169,17 @@ def big_raster_file_nodata(tmpdir_factory):
def big_raster_file_mask(tmpdir_factory, big_raster_file_nodata):
outpath = tmpdir_factory.mktemp('raster')
optimized_raster = outpath.join('img-alpha.tif')
cloud_optimize(big_raster_file_nodata, optimized_raster, create_mask=True)
cloud_optimize(big_raster_file_nodata, optimized_raster,
create_mask=True, remove_nodata=False)
return optimized_raster


@pytest.fixture(scope='session')
def big_raster_file_nomask(tmpdir_factory, big_raster_file_nodata):
outpath = tmpdir_factory.mktemp('raster')
optimized_raster = outpath.join('img-alpha.tif')
cloud_optimize(big_raster_file_nodata, optimized_raster,
create_mask=False, remove_nodata=True)
return optimized_raster


Expand Down
29 changes: 20 additions & 9 deletions tests/drivers/test_raster_drivers.py
Original file line number Diff line number Diff line change
Expand Up @@ -386,18 +386,24 @@ def test_default_transform():

def geometry_mismatch(shape1, shape2):
"""Compute relative mismatch of two shapes"""
print(shape1.area, shape2.area)
return shape1.symmetric_difference(shape2).area / shape1.union(shape2).area


@pytest.mark.parametrize('use_chunks', [True, False])
def test_compute_metadata(big_raster_file_nodata, use_chunks):
@pytest.mark.parametrize('nodata_type', ['nodata', 'nomask'])
def test_compute_metadata(big_raster_file_nodata, big_raster_file_nomask,
nodata_type, use_chunks):
from terracotta.drivers.raster_base import RasterDriver

if nodata_type == 'nodata':
raster_file = big_raster_file_nodata
elif nodata_type == 'nomask':
raster_file = big_raster_file_nomask

if use_chunks:
pytest.importorskip('crick')

with rasterio.open(str(big_raster_file_nodata)) as src:
with rasterio.open(str(raster_file)) as src:
data = src.read(1, masked=True)
valid_data = data.compressed()
dataset_shape = list(rasterio.features.dataset_features(
Expand All @@ -407,7 +413,12 @@ def test_compute_metadata(big_raster_file_nodata, use_chunks):
convex_hull = MultiPolygon([shape(s['geometry']) for s in dataset_shape]).convex_hull

# compare
mtd = RasterDriver.compute_metadata(str(big_raster_file_nodata), use_chunks=use_chunks)
if nodata_type == 'nomask':
with pytest.warns(UserWarning) as record:
mtd = RasterDriver.compute_metadata(str(raster_file), use_chunks=use_chunks)
assert 'does not have a valid nodata value' in str(record[0].message)
else:
mtd = RasterDriver.compute_metadata(str(raster_file), use_chunks=use_chunks)

np.testing.assert_allclose(mtd['valid_percentage'], 100 * valid_data.size / data.size)
np.testing.assert_allclose(mtd['range'], (valid_data.min(), valid_data.max()))
Expand All @@ -418,19 +429,19 @@ def test_compute_metadata(big_raster_file_nodata, use_chunks):
np.testing.assert_allclose(
mtd['percentiles'],
np.percentile(valid_data, np.arange(1, 100)),
rtol=2e-2
rtol=2e-2, atol=valid_data.max() / 100
)

assert geometry_mismatch(shape(mtd['convex_hull']), convex_hull) < 1e-8


@pytest.mark.parametrize('raster_type', ['nodata', 'masked'])
def test_compute_metadata_approximate(raster_type, big_raster_file_nodata, big_raster_file_mask):
@pytest.mark.parametrize('nodata_type', ['nodata', 'masked'])
def test_compute_metadata_approximate(nodata_type, big_raster_file_nodata, big_raster_file_mask):
from terracotta.drivers.raster_base import RasterDriver

if raster_type == 'nodata':
if nodata_type == 'nodata':
raster_file = big_raster_file_nodata
elif raster_type == 'masked':
elif nodata_type == 'masked':
raster_file = big_raster_file_mask

with rasterio.open(str(raster_file)) as src:
Expand Down
11 changes: 7 additions & 4 deletions tests/scripts/test_optimize_rasters.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def test_optimize_rasters(unoptimized_raster_file, tmpdir, in_memory, reproject,

runner = CliRunner()

flags = ['--compression', compression]
flags = ['--compression', compression, '-q']

if in_memory is not None:
flags.append('--in-memory' if in_memory else '--no-in-memory')
Expand Down Expand Up @@ -52,7 +52,7 @@ def test_optimize_rasters_nofiles(tmpdir):

input_pattern = str(tmpdir.dirpath('*.tif'))
runner = CliRunner()
result = runner.invoke(cli.cli, ['optimize-rasters', input_pattern, '-o', str(tmpdir)])
result = runner.invoke(cli.cli, ['optimize-rasters', input_pattern, '-o', str(tmpdir), '-q'])

assert result.exit_code == 0
assert 'No files given' in result.output
Expand All @@ -62,7 +62,7 @@ def test_optimize_rasters_invalid(tmpdir):
from terracotta.scripts import cli

runner = CliRunner()
result = runner.invoke(cli.cli, ['optimize-rasters', str(tmpdir), '-o', str(tmpdir)])
result = runner.invoke(cli.cli, ['optimize-rasters', str(tmpdir), '-o', str(tmpdir), '-q'])

assert result.exit_code != 0
assert 'not a file' in result.output
Expand All @@ -88,7 +88,10 @@ def test_optimize_rasters_multiband(tmpdir, unoptimized_raster_file):
outfile = tmpdir / 'co' / unoptimized_raster_file.basename

runner = CliRunner()
result = runner.invoke(cli.cli, ['optimize-rasters', input_pattern, '-o', str(tmpdir / 'co')])
result = runner.invoke(
cli.cli,
['optimize-rasters', input_pattern, '-o', str(tmpdir / 'co')]
)

assert result.exit_code == 0
assert 'has more than one band' in result.output
Expand Down

0 comments on commit 7aa0751

Please sign in to comment.