## Extracting seasonal rainfall anomalies

This script takes as an input a `shapefile` with n number uniquely indentifiable polygons and exports a netcdf of per-pixel seasonal rainfall anomalies, and a csv of zonally averaged seasonal rainfall anomalies. Go to the `user inputs` section and enter the relevant information, then run the script.  The code is multiprocessed.



In [None]:
import datacube
from datacube.utils import geometry
from datacube.utils.geometry import CRS
from dask.distributed import Client
import fiona
import rasterio.mask
import rasterio.features
import xarray as xr
import os
import pandas as pd
import warnings
from multiprocessing import Pool

#need a datacube confing:
config = {
    'db_hostname': 'agdcdev-db.nci.org.au',
    'db_port': 6432,
    'db_database': 'dg6911'
}

dc = datacube.Datacube(config=config)

### Set up local dask cluster

In [None]:
#delete old client if one still exists
client = locals().get('client', None)
if client is not None:
    client.close()
    del client
    
client = Client(n_workers=4, threads_per_worker=1, memory_limit='6GB')
client

### User Inputs

In [None]:
#path to shapefile
shp_path = "data/northern_basins.shp"
#time-range to extract from datacube
time_range = ('1990-06-01', '2019-02-28')
#time-range to calulate anomaly across
anom_range = ('1990-06-01', '2011-02-28')
#Attribute column in the shapefile that identified the polygon
columnName = 'DNAME'
#projection the output should be in
projection = 'EPSG:3577'
#resolution of the output
resolution = (-750,750)
#where should the results be stored
output_dir = 'results/'
#how many cpus should the analysis run on?
ncpus = 1

### extract data and export to disk

In [None]:
def RainfallFromShape(feat, crs, time_range, anom_range, colummName, output_dir, anomaly_type='standardised'):
    
    first_geom = feat['geometry']
    poly_name = feat['properties'][columnName]
    geom = geometry.Geometry(first_geom, crs=crs)

    #generate query object
    query = {'geopolygon': geom}
    query['time'] = time_range
    query['output_crs'] = CRS(projection)
    query['resolution'] = resolution
    query['dask_chunks'] = {'x':250, 'y':250}

    #get rainfall data
    print('Working on polygon: ' + poly_name)

    accum_prcp = dc.load(product='accum_prcp_monthly', **query)
    
    #using plygon to mask extracted rainfall data
    mask = rasterio.features.geometry_mask([geom.to_crs(accum_prcp.geobox.crs)for geoms in [geom]],
                                                   out_shape=accum_prcp.geobox.shape,
                                                   transform=accum_prcp.geobox.affine,
                                                   all_touched=False,
                                                   invert=False)

    mask_xr = xr.DataArray(mask, dims = ('y','x'))
    accum_prcp = accum_prcp.where(mask_xr==False)

    #resample to quarterly
    prcp_quarterly = accum_prcp.resample(time='QS-JUN').sum('time')
    #select out the time range to calculate anomaly over
    prcp_quarterly_anom = prcp_quarterly.sel(time=slice(anom_range[0], anom_range[1]))
    #calclate climatology mean 
    climatology_mean = prcp_quarterly_anom.groupby('time.season').mean('time')
    
    if anomaly_type == 'standardised':
        climatology_std = prcp_quarterly_anom.groupby('time.season').std('time') 
        standard_anom = xr.apply_ufunc(
                                    lambda x, m, s: (x - m) / s,
                                    prcp_quarterly.groupby('time.season'),
                                    climatology_mean, climatology_std,
                                    dask='allowed')
        #compute result
        standard_anom = standard_anom.compute()
        #export
        standard_anom.to_netcdf(output_dir + poly_name + "_rainfall_STDanomaly.nc")
   
    else:
        anomalies = prcp_quarterly_anom.groupby('time.season') - climatology_mean
        #compute
        anomalies = anomalies.compute()
        #export
        anomalies.to_netcdf(output_dir + poly_name + "_rainfall_anomaly.nc")
      
    #     df = anomalies.accum_prcp.mean(['x', 'y']).to_pandas()
    #     df.to_csv(output_dir + poly_name + "_rainfall_anomaly.csv")


In [None]:
with fiona.open(shp_path) as input:
    crs = geometry.CRS(input.crs_wkt)

In [None]:
warnings.filterwarnings("ignore")
p = Pool(ncpus)
for feat in fiona.open(shp_path):
        RainfallFromShape(feat, crs, time_range, anom_range, columnName, output_dir, anomaly_type='standardised') # single-cpu
#     p.apply_async(RainfallFromShape, [feat, crs, time_range, columnName, output_dir]) # MULTIPROCESS

In [None]:
import xarray as xr

In [None]:

x

In [None]:
x = xr.open_dataarray('results/MURRAY-DARLING_rainfall_STDanomaly.nc').isel(time=114)
x
x.plot(figsize=(10,10), cmap='RdBu')

In [None]:
x.mean(['x', 'y']).mean()#plot(figsize=(15,4))

In [None]:
x.isel(time=range(63,83)).plot(col='time', col_wrap=4, cmap='BrBG', vmin=-2.0, vmax=2.0)