# Zonal Stats over time

Inputs:
* Product: `rainfall_grids_1998_2017`
* Variable: `rainfall`
* Aggregate Function: `mean`
* Zones: `KHM_Catch8_m_del.shp` or `KHM_Basin_Simple_A.shp` 

In [1]:
%matplotlib inline
import fiona
import rasterio.features
import xarray as xr
import rasterio.features
import xarray
import datacube
dc = datacube.Datacube(config='/g/data/u46/users/ext547/ewater/cambodia_cube/cambodia.conf')


from shapely.geometry import asShape
from shapely.geometry import MultiPolygon, Polygon

import pandas as pd
import dask
from distributed import Client, LocalCluster

# Specify location and name of catchment shapefile

In [2]:
# shape_file = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/KHM_Basin_Simple_A.shp'
# shape_file = '/g/data/u46/users/adh547/cambodia/vector/catchments/KHM_Catch8_m_del.shp'
shape_file = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/KHM_Catch8_subset.shp'
# shape_file = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/KHM_Simple_subset.shp'
# shape_file = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/KHM_Simple_subset_v2.shp'

# define functions

In [3]:
def geometry_mask(geoms, geobox, all_touched=False, invert=False):
    """
    Create a mask from shapes.

    By default, mask is intended for use as a
    numpy mask, where pixels that overlap shapes are False.
    :param list[Geometry] geoms: geometries to be rasterized
    :param datacube.utils.GeoBox geobox:
    :param bool all_touched: If True, all pixels touched by geometries will be burned in. If
                             false, only pixels whose center is within the polygon or that
                             are selected by Bresenham's line algorithm will be burned in.
    :param bool invert: If True, mask will be True for pixels that overlap shapes.
    """
    data = rasterio.features.geometry_mask([geom.to_crs(geobox.crs) for geom in geoms],
                                           out_shape=geobox.shape,
                                           transform=geobox.affine,
                                           all_touched=all_touched,
                                           invert=invert)
    coords = [xr.DataArray(data=coord.values, name=dim, dims=[dim], attrs={'units': coord.units}) 
              for dim, coord in geobox.coords.items()]
    return xarray.DataArray(data, coords=coords)

In [4]:
def get_shapes(shape_file):
    with fiona.open(shape_file) as shapes:
        crs = datacube.utils.geometry.CRS(shapes.crs_wkt)
        for shape in shapes:
            geom = datacube.utils.geometry.Geometry(shape['geometry'], crs=crs)
            yield geom, shape['properties']

# Set up dask

In [5]:
# cluster = LocalCluster(local_dir='/local/u46/adh547/tmp')
cluster = LocalCluster(local_dir='/g/data/u46/users/ext547/ewater/working')

client = Client(cluster)
dask.config.set(get=client.get)
client

0,1
Client  Scheduler: tcp://127.0.0.1:38362  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 8  Cores: 8  Memory: 33.67 GB


#  Loop through catchments

## Set up catchment data

In [6]:
dc.list_products()
# dc.list_measurements()

Unnamed: 0_level_0,name,description,platform,lon,label,creation_time,lat,product_type,format,instrument,time,crs,resolution,tile_size,spatial_dimensions
id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
10,dem_3sec_hydrosheds,three sec hydrologically conditioned Digital E...,elevation,,,,,elevation,NETCDF,shuttle radar,,EPSG:4326,"[-0.00083, 0.00083]",,"(latitude, longitude)"
6,ls5_usgs_sr_scene,Landsat 5 USGS Collection 1 Level2 Surface Ref...,LANDSAT_5,,,,,LEDAPS,GeoTiff,TM,,"PROJCS[""WGS 84"",GEOGCS[""WGS 84"",DATUM[""WGS_198...","[-30, 30]",,"(y, x)"
2,ls7_usgs_sr_scene,Landsat 7 USGS Collection 1 Level2 Surface Ref...,LANDSAT_7,,,,,LEDAPS,GeoTiff,ETM,,"PROJCS[""WGS 84"",GEOGCS[""WGS 84"",DATUM[""WGS_198...","[-30, 30]",,"(y, x)"
1,ls8_usgs_sr_scene,Landsat 8 USGS Collection 1 Level2 Surface Ref...,LANDSAT_8,,,,,LaSRC,GeoTiff,OLI_TIRS,,"PROJCS[""WGS 84"",GEOGCS[""WGS 84"",DATUM[""WGS_198...","[-30, 30]",,"(y, x)"
5,ls_level2_geomedian_annual,Landsat Level-2 Annual GeoMedian,"LANDSAT_5,LANDSAT_7,LANDSAT_8",,,,,surface_reflectance_statistical_summary,GeoTiff,"TM,ETM,OLI_TIRS",,EPSG:32648,"[-30, 30]","[30720.0, 30720.0]","(y, x)"
9,pet_grids_1979_2016,Daily potential evapo-transpiration (PET) for ...,PET,,,,,PET,NETCDF,modelled,,EPSG:4326,"[-0.5, 0.5]",,"(latitude, longitude)"
7,rainfall_grids_1998_2017,Interpolated Rain Precipitation 3-hour values ...,rain,,,,,rainfall,NETCDF,rain gauge,,EPSG:4326,"[-0.25, 0.25]",,"(latitude, longitude)"
14,spei_monthly_grids_1998_2017,The Standardised Precipitation-Evapotranspirat...,spei,,,,,spei,NETCDF,modelled,,EPSG:4326,"[-0.5, 0.5]",,"(latitude, longitude)"
8,temperature_grids_1979_2016,Near surface air temperature at 2 m at three h...,temperature,,,,,temperature,NETCDF,modelled,,EPSG:4326,"[-0.5, 0.5]",,"(latitude, longitude)"
11,wofs_grids_1987_2017,Water Observations from Space (WOfS) for Cambo...,,,,,,water_classifier_statistical_summary,GeoTiff,,,EPSG:32648,"[-30, 30]",,"(y, x)"


In [7]:
product_id = 'ls5_usgs_sr_scene'
measurement_id = ['nir','red']

In [8]:
product = dc.index.products.get_by_name(product_id)
datasets = dc.find_datasets(product=product_id)
crs = product.grid_spec.crs
resolution = product.grid_spec.resolution
align = product.grid_spec.alignment

crs, resolution, align

(CRS('PROJCS["WGS 84",GEOGCS["WGS 84",DATUM["WGS_1984",SPHEROID["WGS 84",6378137,298.257223563,AUTHORITY["EPSG","7030"]],AUTHORITY["EPSG","6326"]],PRIMEM["Greenwich",0,AUTHORITY["EPSG","8901"]],UNIT["degree",0.0174532925199433,AUTHORITY["EPSG","9122"]],AUTHORITY["EPSG","4326"]],PROJECTION["Transverse_Mercator"],PARAMETER["latitude_of_origin",0],PARAMETER["central_meridian",105],PARAMETER["scale_factor",0.9996],PARAMETER["false_easting",500000],PARAMETER["false_northing",0],UNIT["metre",1,AUTHORITY["EPSG","9001"]],AXIS["Easting",EAST],AXIS["Northing",NORTH]]'),
 [-30, 30],
 (0.0, 0.0))

In [9]:
upsample = 1
hi_resolution = [r * upsample for r in resolution]
hi_resolution

[-30, 30]

In [10]:
shapes = get_shapes(shape_file)

## load data for catchments

In [11]:
from datacube.storage import masking
from datacube.storage.masking import mask_to_dict
import numpy as np

In [12]:
# #define temporal range ()
# start_of_epoch = '2010-01-01'
# end_of_epoch =  '2015-01-01'
# query = {'time': (start_of_epoch, end_of_epoch),}

# print(query)


In [13]:
loaded_xr = {}
for geometry, properties in shapes:
    geobox = datacube.utils.geometry.GeoBox.from_geopolygon(geometry, hi_resolution, crs, align)
    sensor_nbar = dc.load(product=product_id, 
               measurement=measurement_id, 
               datasets=datasets, 
               dask_chunks={'time': 1}, 
               geopolygon=geometry,
               group_by='solar_day', 
               resolution=hi_resolution,)
    #retrieve the projection information before masking/sorting
    crs = sensor_nbar.crs
    crswkt = sensor_nbar.crs.wkt
    affine = sensor_nbar.affine
    #assign pq data variable
    sensor_pq= sensor_nbar.pixel_qa
    mask_components = {'cloud_shadow': 'no_cloud_shadow',
                           'cloud': 'no_cloud',}
    good_data = masking.make_mask(sensor_pq, **mask_components)
#     good_data = quality_mask.loc[start_of_epoch:end_of_epoch]
    sensor_nbar2 = sensor_nbar.where(good_data)
    del sensor_nbar
    del good_data
    SCID = properties['SCID']    

    print(SCID)
    
    mask = geometry_mask([geometry], geobox, all_touched=True, invert=True)
    data_array = None
    data_array = (sensor_nbar2.nir - sensor_nbar2.red)/(sensor_nbar2.nir+sensor_nbar2.red)
    data_array = data_array.where(mask)
    loaded_array = None
    loaded_array = data_array.mean(dim=['y', 'x'])
    col = loaded_array.time.values
#     print(col)
    loaded_array02 = None
    loaded_array02 = loaded_array.load()
    loaded_array02['ID'] = ('time', np.repeat(SCID, loaded_array02.time.size))
    
    del data_array
    
    loaded_xr[SCID] = loaded_array02

    
    del loaded_array
    del loaded_array02

# loaded_clean = xr.concat(loaded_xr.values(), 'time')
# loaded_clean = loaded_clean.sortby('time')
# loaded_clean.attrs['crs'] = crs
# loaded_clean.attrs['affin|e'] = affine   
    
# 
#     loaded_pd = pd.DataFrame.from_dict(loaded_xr)
#     loaded_pd.index = col.astype('datetime64[D]')

17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75




76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93




94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127




KeyboardInterrupt: 

In [14]:
loaded_pd = pd.DataFrame.from_dict(loaded_xr)#, orient='index')
loaded_pd.index = col.astype('datetime64[D]')

masked_pd = loaded_pd.where(loaded_pd != 0.000000)

In [15]:
loaded_pd.min()

17     0.000000e+00
18    -2.065186e-01
19    -1.881070e-07
20    -3.201579e-01
21     0.000000e+00
22             -inf
23    -3.887064e-01
24    -2.450082e-01
25    -1.875195e-01
26    -5.225484e-02
27    -3.337842e-02
28    -1.050758e-01
29    -3.708788e-01
30    -3.933411e-01
31    -3.739552e-01
32    -2.846465e-01
33    -1.622867e-01
34    -1.134261e-01
35    -5.029398e-02
36     0.000000e+00
37    -3.609331e-01
38    -2.531238e-01
39    -2.427903e-01
40    -1.696390e-01
41    -2.069141e-01
42    -1.228837e-01
43    -1.482246e-05
44     0.000000e+00
45             -inf
46    -3.558588e-01
           ...     
97    -5.770559e-02
98    -2.031520e-01
99    -1.456386e-01
100    0.000000e+00
101            -inf
102   -3.966546e-01
103   -3.054159e-01
104   -1.772641e-01
105            -inf
106   -3.812390e-01
107            -inf
108   -3.436576e-01
109   -1.343124e-01
110   -1.982192e-01
111   -2.178633e-01
112   -6.959244e-02
113   -2.790760e-01
114   -1.272558e-01
115   -2.213610e-03


In [16]:
csv_out = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/'+str(product_id)+'_smallcatch_v2_out.csv'
masked_pd.to_csv(csv_out)

In [None]:
loaded_xr

In [None]:
# loaded_pd = pd.DataFrame.from_dict(loaded_xr)
# loaded_pd.head()

In [None]:
# ls7_masked_pd = loaded_pd.where(loaded_pd != 0.000000)
# ls7_masked_pd

In [None]:
ls5_masked_pd = loaded_pd.where(loaded_pd != 0.000000)
ls5_masked_pd.head()

In [None]:
from matplotlib import pyplot as plt

In [None]:
csv_out = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/'+str(start_of_epoch)+'_'+str(product_id)+'_out.csv'
ls5_masked_pd.to_csv(csv_out)

In [None]:
datacube.model.__file__

In [None]:
for k, v in loaded_array.data.dask.items():
    v.__getstate__()
    

In [None]:
stop

In [None]:
loaded_xr = {}
for geometry, properties in shapes:
    geobox = datacube.utils.geometry.GeoBox.from_geopolygon(geometry, hi_resolution, crs, align)
    data = dc.load(product='spei_monthly_grids_1998_2017', 
               measurement='data.spei_gamma_03', 
               datasets=datasets, 
               dask_chunks={'time': 1}, 
               geopolygon=geometry,
               resolution=hi_resolution)
    SCID = properties['SCID']
    print(SCID)
    mask = geometry_mask([geometry], geobox, all_touched=True, invert=True)
    rain_array = data.spei_gamma_03.where(data.spei_gamma_03 > -3.1).where(mask)
    loaded = rain_array.mean(dim=['latitude', 'longitude']).load();
    loaded_xr[SCID] = loaded
print(loaded_xr)

loaded_pd = pd.DataFrame.from_dict(loaded_xr)

col = loaded.time.values
loaded_pd.index = col.astype('datetime64[D]')

In [None]:
loaded_pd

In [None]:
csv_out = '/g/data/u46/users/ext547/ewater/input_data/Cambodia_boundary/'+str(measurement_id)+'_287_csv_out.csv'
loaded_pd.to_csv(csv_out)

In [None]:
shapes = get_shapes(shape_file)

In [None]:
geometry, properties = next(shapes)
print(f"{int(geometry.area / (1000*1000))} km^2")
asShape(geometry)

In [None]:
geobox = datacube.utils.geometry.GeoBox.from_geopolygon(geometry, hi_resolution, crs, align)
mask = geometry_mask([geometry], geobox, all_touched=True, invert=True)

In [None]:
properties

In [None]:
mask

In [None]:
mask.plot(size=6, aspect=(mask.shape[1]/mask.shape[0]), add_colorbar=False);

In [None]:
asShape(geometry)

In [None]:
data = dc.load(product=product_id, 
               measurement=measurement_id, 
               datasets=datasets, 
               dask_chunks={'time': 1}, 
               geopolygon=geometry,
               resolution=hi_resolution)

In [None]:
data

In [None]:
rain_array = data.spei_gamma_03 .where(data.spei_gamma_03  > -3.1).where(mask)

In [None]:
rain_array

In [None]:
rain_array.values.max()

In [None]:
loaded = rain_array.mean(dim=['latitude', 'longitude']).load();

In [None]:
loaded

In [None]:
mean_rain = rain_array.mean(dim=['latitude', 'longitude'])
mean_rain

In [None]:
mean_rain.values.max()

In [None]:
mean_rain.isel(time=4).values

In [None]:
mean_rain.isel(time=slice(10,15)).plot()

In [None]:
rain_array[:20].load().plot(col='time', col_wrap=5, size=5, aspect=(mask.shape[1]/mask.shape[0]), add_colorbar=True)