# BARRA <> DEA Generate Cropped Monthly Summaries

In [1]:
import datacube
import rasterio
import time
import os
import numpy as np
from datacube.helpers import write_geotiff
from matplotlib import pyplot

## Optional helper functions

In [2]:
def write_barra_data(filename, target_dataset, source_affine, nodata=-1073741824):
    profile_override = {'nodata': nodata, 'transform': source_affine}
    write_geotiff(filename, target_dataset, profile_override=profile_override)

## Before loading BARRA data...

In [3]:
# Because GDAL struggles with the BARRA NetCDF format we have two choices:
# we leave everything unmolested but exruiciatingly slow
# or
# we use the following directive. This helps GDAL read the file
# metadata correctly but it has the unfortunate side effect of flipping our data.
# Depending on what other data sourced you are reading from , you may need to 
# explicitly set this flag back to YES to avoid any unforeseen consequences
#os.environ['GDAL_NETCDF_BOTTOMUP'] = 'NO'

# you will need a datacube confing:
config = {
    'db_hostname': 'agdcdev-db.nci.org.au',
    'db_port': 6432,
    'db_database': 'dg6911'
}
dc = datacube.Datacube(config=config)

In [4]:
query = {}
query['latitude'] = (-44,-9)
query['longitude'] = (113,154)

## Load BARRA faster using DASK

In [5]:
import dask
import dask.distributed

client = dask.distributed.Client(n_workers=8,
                                 threads_per_worker=1,
                                 memory_limit='3G',
                                 ip='127.0.0.1')
client

0,1
Client  Scheduler: tcp://127.0.0.1:46880  Dashboard: http://127.0.0.1:8787/status,Cluster  Workers: 8  Cores: 8  Memory: 24.00 GB


In [7]:
def generate_monthly_summary(month, year):
    accum = dc.load(product='accum_prcp',
             dask_chunks={'time':6},
               time=year+'-'+month,
               skip_broken_datasets=True,
                **query)
    affine = accum['accum_prcp'].affine
    attrs = accum['accum_prcp'].attrs
    loaded_accum = accum['accum_prcp'].compute()
    filtered_sum = loaded_accum.where(loaded_accum >= 0, drop=True).sum(dim='time')
    
    ## turn back into dataset
    total_day_accumprcp_dataset = filtered_sum.to_dataset()
    total_day_accumprcp_dataset['accum_prcp'].attrs = attrs
    total_day_accumprcp_dataset.attrs = attrs

    ##write to disk
    write_barra_data('/g/data/u46/users/dg6911/BARRA_Monthly/'+year+month+'_total_accum_prcp.tiff', total_day_accumprcp_dataset, affine)

In [None]:
for year in range(1990, 2020):
    for month in range(1, 13):
        %time generate_monthly_summary(str(month).zfill(2),str(year))

In [None]:
months_to_reprocess=['012010','032014','022010','122013','082012','072011','112014','042014','032010','072014']
for month_to_reprorcess in months_to_reprocess:
    year = month_to_reprorcess[2:]
    month = month_to_reprorcess[:-4]
    generate_monthly_summary(month,year)