## Extracting seasonal rainfall anomalies

This script takes as an input a `shapefile` with n number uniquely indentifiable polygons and exports a netcdf of per-pixel seasonal rainfall anomalies, and a csv of zonally averaged seasonal rainfall anomalies. Go to the `user inputs` section and enter the relevant information, then run the script.  The code is multiprocessed.



In [None]:
import datacube
from datacube.utils import geometry
from datacube.utils.geometry import CRS
import fiona
import rasterio.mask
import rasterio.features
import xarray as xr
import os
import pandas
import warnings
from multiprocessing import Pool

# you will need a datacube confing:
config = {
    'db_hostname': 'agdcdev-db.nci.org.au',
    'db_port': 6432,
    'db_database': 'dg6911'
}

dc = datacube.Datacube(config=config)

### User Inputs

In [None]:
#path to shapefile
shp_path = 'data/IrrigationDistrict.shp'
#time-range to extract from datacube
time_range = ('1988-12-01', '2019-02-28')
#Attribute column in the shapefile that identified the polygon
columnName = 'Scheme'
#projection the output should be in
projection = 'EPSG:3577'
#resolution of the output
resolution = (-500,500)
#where should the results be stored
output_dir = 'data/rainfall/tas_irrigation_'
#how many cpus should the analysis run on?
ncpus = 4

### extract data and export to disk

In [None]:
def RainfallFromShape(feat, crs, time_range, colummName, output_dir):
    
    first_geom = feat['geometry']
    poly_name = feat['properties'][columnName]
    geom = geometry.Geometry(first_geom, crs=crs)

    #generate query object
    query = {'geopolygon': geom}
    query['time'] = time_range
    query['output_crs'] = CRS(projection)
    query['resolution'] = resolution

    #get rainfall data
    print('Working on polygon: ' + poly_name)

    accum_prcp = dc.load(product='accum_prcp_monthly', **query)

    #using plygon to mask extracted rainfall data
    mask = rasterio.features.geometry_mask([geom.to_crs(accum_prcp.geobox.crs)for geoms in [geom]],
                                                   out_shape=accum_prcp.geobox.shape,
                                                   transform=accum_prcp.geobox.affine,
                                                   all_touched=False,
                                                   invert=False)

    mask_xr = xr.DataArray(mask, dims = ('y','x'))
    accum_prcp = accum_prcp.where(mask_xr==False)

    #resample to quarterly and groupby seasons
    prcp_seasonal = accum_prcp.resample(time='QS-DEC').mean('time')
    prcp_seasonal = prcp_seasonal.groupby('time.season')
    #calculate climatologies
    climatology_mean = accum_prcp.groupby('time.season').mean('time')
    #calculate standardised anomalies
    standardised_anomalies = xr.apply_ufunc(lambda x, m: x - m,
                                 prcp_seasonal, climatology_mean,
                                 dask='allowed')
    #export
    standardised_anomalies.to_netcdf(output_dir + poly_name + "_rainfall_anomaly.nc")
    df = standardised_anomalies.accum_prcp.mean(['x', 'y']).to_pandas()
    df.to_csv(output_dir + poly_name + "_rainfall_anomaly.csv")


In [None]:
with fiona.open(shp_path) as input:
    crs = geometry.CRS(input.crs_wkt)

In [None]:
warnings.filterwarnings("ignore")
p = Pool(ncpus)
for feat in fiona.open(shp_path):
    #     RainfallFromShape(feat, crs, time_range, columnName, output_dir) # single-cpu
    p.apply_async(RainfallFromShape, [feat, crs, time_range, columnName, output_dir]) # MULTIPROCESS