# Zonal Stats over time

Inputs:
* Product: `rainfall_grids_1998_2017`
* Variable: `rainfall`
* Aggregate Function: `mean`
* Zones: `KHM_Catch8_m_del.shp` or `KHM_Basin_Simple_A.shp` 

In [1]:
import fiona
import rasterio.features
import xarray as xr
import rasterio.features
import xarray
import datacube
dc = datacube.Datacube(config='/g/data/u46/users/ext547/ewater/cambodia_cube/cambodia.conf')


from shapely.geometry import asShape
from shapely.geometry import MultiPolygon, Polygon

import pandas as pd
import dask
from distributed import Client, LocalCluster

# Specify location and name of catchment shapefile

In [2]:
shape_file = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/KHM_Basin_Simple_A.shp'
# shape_file = '/g/data/u46/users/adh547/cambodia/vector/catchments/KHM_Catch8_m_del.shp'
# shape_file = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/test.shp'

# define functions

In [3]:
def geometry_mask(geoms, geobox, all_touched=False, invert=False):
    """
    Create a mask from shapes.

    By default, mask is intended for use as a
    numpy mask, where pixels that overlap shapes are False.
    :param list[Geometry] geoms: geometries to be rasterized
    :param datacube.utils.GeoBox geobox:
    :param bool all_touched: If True, all pixels touched by geometries will be burned in. If
                             false, only pixels whose center is within the polygon or that
                             are selected by Bresenham's line algorithm will be burned in.
    :param bool invert: If True, mask will be True for pixels that overlap shapes.
    """
    data = rasterio.features.geometry_mask([geom.to_crs(geobox.crs) for geom in geoms],
                                           out_shape=geobox.shape,
                                           transform=geobox.affine,
                                           all_touched=all_touched,
                                           invert=invert)
    coords = [xr.DataArray(data=coord.values, name=dim, dims=[dim], attrs={'units': coord.units}) 
              for dim, coord in geobox.coords.items()]
    return xarray.DataArray(data, coords=coords)

In [4]:
def get_shapes(shape_file):
    with fiona.open(shape_file) as shapes:
        crs = datacube.utils.geometry.CRS(shapes.crs_wkt)
        for shape in shapes:
            geom = datacube.utils.geometry.Geometry(shape['geometry'], crs=crs)
            yield geom, shape['properties']

# Set up dask

In [5]:
cluster = LocalCluster(local_dir='/local/u46/adh547/tmp')
client = Client(cluster)
dask.config.set(get=client.get)
client

tornado.application - ERROR - Multiple exceptions in yield list
Traceback (most recent call last):
  File "/g/data/v10/public/modules/dea-env/20180629/lib/python3.6/site-packages/tornado/gen.py", line 1069, in run
    yielded = self.gen.send(value)
  File "/g/data/v10/public/modules/dea-env/20180629/lib/python3.6/site-packages/distributed/deploy/local.py", line 212, in _start_worker
    raise gen.TimeoutError("Worker failed to start")
tornado.gen.TimeoutError: Worker failed to start

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/g/data/v10/public/modules/dea-env/20180629/lib/python3.6/site-packages/tornado/gen.py", line 828, in callback
    result_list.append(f.result())
  File "/g/data/v10/public/modules/dea-env/20180629/lib/python3.6/site-packages/tornado/concurrent.py", line 238, in result
    raise_exc_info(self._exc_info)
  File "<string>", line 4, in raise_exc_info
  File "/g/data/v10/public/modules/dea-env/201806

tornado.application - ERROR - Multiple exceptions in yield list
Traceback (most recent call last):
  File "/g/data/v10/public/modules/dea-env/20180629/lib/python3.6/site-packages/tornado/gen.py", line 828, in callback
    result_list.append(f.result())
  File "/g/data/v10/public/modules/dea-env/20180629/lib/python3.6/site-packages/tornado/concurrent.py", line 238, in result
    raise_exc_info(self._exc_info)
  File "<string>", line 4, in raise_exc_info
  File "/g/data/v10/public/modules/dea-env/20180629/lib/python3.6/site-packages/tornado/gen.py", line 1069, in run
    yielded = self.gen.send(value)
  File "/g/data/v10/public/modules/dea-env/20180629/lib/python3.6/site-packages/distributed/deploy/local.py", line 212, in _start_worker
    raise gen.TimeoutError("Worker failed to start")
tornado.gen.TimeoutError: Worker failed to start
tornado.application - ERROR - Multiple exceptions in yield list
Traceback (most recent call last):
  File "/g/data/v10/public/modules/dea-env/20180629/lib

TimeoutError: Worker failed to start

#  Loop through catchments

## Set up catchment data

In [16]:
product = dc.index.products.get_by_name('rainfall_grids_1998_2017')
datasets = dc.find_datasets(product='rainfall_grids_1998_2017')
crs = product.grid_spec.crs
resolution = product.grid_spec.resolution
align = product.grid_spec.alignment

crs, resolution, align

(CRS('EPSG:4326'), [-0.25, 0.25], (0.0, 0.0))

In [17]:
upsample = 0.01
hi_resolution = [r * upsample for r in resolution]
hi_resolution

[-0.0025, 0.0025]

In [18]:
shapes = get_shapes(shape_file)

## load data for catchments

In [None]:
loaded_xr = {}
for geometry, properties in shapes:
    geobox = datacube.utils.geometry.GeoBox.from_geopolygon(geometry, hi_resolution, crs, align)
    data = dc.load(product='rainfall_grids_1998_2017', 
               measurement='rainfall', 
               datasets=datasets, 
               dask_chunks={'time': 1}, 
               geopolygon=geometry,
               resolution=hi_resolution)
    SCID = properties['SCID']
    print(SCID)
    mask = geometry_mask([geometry], geobox, all_touched=True, invert=True)
    rain_array = data.rainfall.where(data.rainfall > -1).where(mask)
    loaded = rain_array.mean(dim=['latitude', 'longitude']).load();
    loaded_xr[SCID] = loaded
print(loaded_xr)

loaded_pd = pd.DataFrame.from_dict(loaded_xr)

col = loaded.time.values
loaded_pd.index = col

1


In [None]:
print loaded_pd

In [None]:
csv_out = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/csv_out.csv'
loaded_pd.to_csv(csv_out)

In [None]:
# %matplotlib inline

In [None]:
# import pandas as pd
# import dask
# from distributed import Client, LocalCluster

In [None]:
# crs, resolution, align

In [None]:
# datasets = dc.find_datasets(product='rainfall_grids_1998_2017')
# len(datasets)

In [None]:
# geometry, properties = next(shapes)

In [None]:
# properties

In [None]:
# mp = MultiPolygon([asShape(geometry) for geometry, _ in get_shapes(shape_file)])
# mp

In [None]:
# geobox = datacube.utils.geometry.GeoBox.from_geopolygon(geometry, hi_resolution, crs, align)

In [None]:
# mask = geometry_mask([geometry], geobox, all_touched=True, invert=True)

In [None]:
# data = dc.load(product='rainfall_grids_1998_2017', 
#                measurement='rainfall', 
#                datasets=datasets, 
#                dask_chunks={'time': 1}, 
#                geopolygon=geometry,
#                resolution=hi_resolution)

In [None]:
# [str(d.center_time.date()) for d in datasets]

In [None]:
# geometry, properties = next(shapes)
# print(f"{int(geometry.area / (1000*1000))} km^2")
# asShape(geometry)

In [None]:
# geometry, properties = next(shapes)
# print(f"{int(geometry.area / (1000*1000))} km^2")
# asShape(geometry)

In [None]:
# import fiona
# import rasterio.features
# import xarray as xr

# import datacube

In [None]:
# from shapely.geometry import asShape
# from shapely.geometry import MultiPolygon, Polygon

In [None]:
# dc = datacube.Datacube(config='/g/data/u46/users/ext547/ewater/cambodia_cube/cambodia.conf')
# dc.list_products()

In [None]:
# properties

In [None]:
# mask

In [None]:
# mask.plot(size=6, aspect=(mask.shape[1]/mask.shape[0]), add_colorbar=False);

In [None]:
# asShape(geometry)

In [None]:
# data = dc.load(product='rainfall_grids_1998_2017', 
#                measurement='rainfall', 
#                datasets=datasets, 
#                dask_chunks={'time': 1}, 
#                geopolygon=geometry,
#                resolution=hi_resolution)

In [None]:
# data.rainfall[0].plot()

In [None]:
# rain_array = data.rainfall.where(data.rainfall > -1).where(mask)

In [None]:
# rain_array.isel(time=0).plot()

In [None]:
# loaded = rain_array.mean(dim=['latitude', 'longitude']).load();

In [None]:
# loaded.isel(time=4)

In [None]:
# mean_rain.isel(time=4)

In [None]:
# loaded.plot()

In [None]:
# mean_rain = rain_array.mean(dim=['latitude', 'longitude'])
# mean_rain

In [None]:
# mean_rain.isel(time=4).values

In [None]:
# from dask import dataframe as dd

In [None]:
# df = dd.from_dask_array(mean_rain.data, columns=[properties['SC_ID']])

In [None]:
# df.join?

In [None]:
# rain_array[:20].load().plot(col='time', col_wrap=5, size=5, aspect=(mask.shape[1]/mask.shape[0]), add_colorbar=False);