Authors: Bex Dunn, Neil Symington, Claire Krause, Leo Lymburner

### get some modules

In [47]:
%load_ext autoreload
%autoreload 2

from datetime import datetime
import matplotlib.pyplot as plt

#dealing with system commands
import sys
import os.path

#modules for datacube
import datacube
from datacube.utils import geometry
from datacube.storage.storage import write_dataset_to_netcdf
from datacube.helpers import write_geotiff

# Import external functions from dea-notebooks
sys.path.append(os.path.expanduser('~/dea-notebooks/Scripts/'))
from RainfallTools import load_rainfall, calculate_residual_mass_curve

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Choose a time period and location to calculate residual mass curve.
 - Note: please select whole months here, not days, as a month with only two days may negatively affect your mass residual curve

In [48]:
## Choose a query area and time
##note: datacube is inclusive of both ends of the interval.


### choose only whole months please
query = {
        'lat': (-35.25, -35.35),
        'lon': (149.05, 149.17),
        'time':('2010-01-01', '2013-03-01')
        }

### load rainfall data

In [49]:
#using the BoM grids to 2013. These are out by 2.5km, be aware.

In [50]:
#rainfall = load_rainfall(query)
dc = datacube.Datacube(app='dc-BOMrainfall')
rainfall = dc.load(product = 'bom_rainfall_grids', **query)

## calculate rainfall mass residual

In [51]:
'''This function calculates the residual mass rainfall curve ##FIXME
:param a: '''
import datacube
import numpy as np
import xarray as xr

## resample rainfall data to month start ##FIXME, daily option

In [52]:
rainfall

<xarray.Dataset>
Dimensions:    (latitude: 2, longitude: 3, time: 1156)
Coordinates:
  * time       (time) datetime64[ns] 2010-01-01 2010-01-02 2010-01-03 ...
  * latitude   (latitude) float64 -35.27 -35.32
  * longitude  (longitude) float64 149.1 149.1 149.2
Data variables:
    rainfall   (time, latitude, longitude) float32 1.0589818 1.0485023 ...
Attributes:
    crs:      EPSG:4326

In [53]:
#cumulative_rainfall = rainfall.cumsum(dim='time',keep_attrs=True)

In [96]:
monthly_mean_rainfall = rainfall.groupby('time.month').mean(dim='time', keep_attrs='True')

In [55]:
#monthly_mean_rainfall.rainfall.plot(col='month',col_wrap=3)

In [56]:
#monthly_mean_rainfall

In [57]:
#rainfall.rainfall.plot(col='time')

In [58]:
#cumulative_rainfall.rainfall.plot(col='time')

In [59]:
#monthly_mean_rainfall.rainfall[-1]

In [60]:
#cumulative_rainfall.rainfall[-1]

In [61]:
## Bex version

In [62]:
#resample rainfall data to month start
monthly_rainfall = rainfall.resample(time='MS').sum('time')

In [95]:
monthly_rainfall

<xarray.Dataset>
Dimensions:    (latitude: 2, longitude: 3, time: 39)
Coordinates:
  * time       (time) datetime64[ns] 2010-01-01 2010-02-01 2010-03-01 ...
  * latitude   (latitude) float64 -35.27 -35.32
  * longitude  (longitude) float64 149.1 149.1 149.2
Data variables:
    rainfall   (time, latitude, longitude) float32 6.088235 7.4755325 ...

In [64]:
monthly_rainfall.time.dt.month

<xarray.DataArray 'month' (time: 39)>
array([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,  1,  2,  3,  4,  5,  6,
        7,  8,  9, 10, 11, 12,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12,
        1,  2,  3])
Coordinates:
  * time     (time) datetime64[ns] 2010-01-01 2010-02-01 2010-03-01 ...

In [65]:
# find the number of years in the data
years_in_data = len(monthly_rainfall.rainfall.time)/12

In [66]:
years_in_data

3.25

In [67]:
monthly_rainfall

<xarray.Dataset>
Dimensions:    (latitude: 2, longitude: 3, time: 39)
Coordinates:
  * time       (time) datetime64[ns] 2010-01-01 2010-02-01 2010-03-01 ...
  * latitude   (latitude) float64 -35.27 -35.32
  * longitude  (longitude) float64 149.1 149.1 149.2
Data variables:
    rainfall   (time, latitude, longitude) float32 6.088235 7.4755325 ...

In [68]:
cumulative_rainfall = monthly_rainfall.cumsum(dim='time')

In [69]:
monthly_rainfall

<xarray.Dataset>
Dimensions:    (latitude: 2, longitude: 3, time: 39)
Coordinates:
  * time       (time) datetime64[ns] 2010-01-01 2010-02-01 2010-03-01 ...
  * latitude   (latitude) float64 -35.27 -35.32
  * longitude  (longitude) float64 149.1 149.1 149.2
Data variables:
    rainfall   (time, latitude, longitude) float32 6.088235 7.4755325 ...

In [70]:
cumulative_rainfall

<xarray.Dataset>
Dimensions:    (latitude: 2, longitude: 3, time: 39)
Coordinates:
  * latitude   (latitude) float64 -35.27 -35.32
  * longitude  (longitude) float64 149.1 149.1 149.2
Dimensions without coordinates: time
Data variables:
    rainfall   (time, latitude, longitude) float32 6.088235 7.4755325 ...

In [71]:
#cumulative rainfall has lost its time coordinates. can we reattach?

In [72]:
cumulative_rainfall=cumulative_rainfall.assign_coords(time=monthly_rainfall.coords['time'])

In [73]:
cumulative_rainfall

<xarray.Dataset>
Dimensions:    (latitude: 2, longitude: 3, time: 39)
Coordinates:
  * latitude   (latitude) float64 -35.27 -35.32
  * longitude  (longitude) float64 149.1 149.1 149.2
  * time       (time) datetime64[ns] 2010-01-01 2010-02-01 2010-03-01 ...
Data variables:
    rainfall   (time, latitude, longitude) float32 6.088235 7.4755325 ...

In [74]:

#arr = monthly_rainfall.rainfall.values

#cum_rf = np.cumsum(arr, axis = 0)

In [75]:
##cum_rf_xr = xr.DataArray(cum_rf, dims = ('time', 'latitude', 'longitude'),
      #                  coords = [monthly_rainfall.time, monthly_rainfall.latitude, monthly_rainfall.longitude])

In [76]:
#cum_rf_xr

In [77]:
# NOw we will calculate a cumulative rainfall assuming average rainfall on a month by month basis
# Find the average of all months
ave_months = rainfall.rainfall.groupby('time.month').mean('time').values

In [78]:
ave_months.shape

(12, 2, 3)

In [79]:
#d#aily_mean_rainfall.plot(rainfall, time.dayofyear)

In [80]:
# In the case that we are not starting from January we will need to reorder the array

start_month = rainfall.time[0].dt.month.values - 1

ave_month = np.concatenate((ave_months[start_month:,:,:], ave_months[0:start_month,:,:]), axis = 0)

In [81]:
ave_month.shape

(12, 2, 3)

In [82]:
print(round(years_in_data))

3


In [88]:
# Tile an array so that we can run a cumulative sum on it
tiled_ave = np.tile(ave_months, (round(years_in_data), 1, 1))

In [89]:
tiled_ave.shape

(36, 2, 3)

In [90]:
tiled_ave.shape

(36, 2, 3)

In [91]:
# Generate the cumulative sum of rainfall one would get assuming average rainfall every month
cum_ave = np.cumsum(tiled_ave, axis = 0)

In [92]:
cum_ave.shape

(36, 2, 3)

In [93]:
monthly_rainfall

<xarray.Dataset>
Dimensions:    (latitude: 2, longitude: 3, time: 39)
Coordinates:
  * time       (time) datetime64[ns] 2010-01-01 2010-02-01 2010-03-01 ...
  * latitude   (latitude) float64 -35.27 -35.32
  * longitude  (longitude) float64 149.1 149.1 149.2
Data variables:
    rainfall   (time, latitude, longitude) float32 6.088235 7.4755325 ...

In [94]:
cum_ave_xr = xr.DataArray(cum_ave, dims = ('time', 'latitude', 'longitude'),
                          coords = [monthly_rainfall.time, monthly_rainfall.latitude, monthly_rainfall.longitude])

ValueError: conflicting sizes for dimension 'time': length 36 on the data but length 39 on coordinate 'time'

In [None]:
# The mass residual curve is the difference between the cumulative rainfall data and the cumulative
# rainfall one would get iff the average always occured
mass_res_curve = cum_rf_xr - cum_ave_xr

mass_res_curve

In [None]:
mass_res_curve.plot(col='time', col_wrap=6)

In [None]:
mass_res_curve.mean(dim=('latitude','longitude')).plot()


In [None]:
# def calculate_residual_mass_curve(a):
#     '''This function calculates the residual mass rainfall curve
#     :param a: '''
    
#     #resample rainfall data to month start
#     a = a.resample('MS', dim='time', how='sum', keep_attrs=True) 
#     # find the number of time steps (ie. years)
#     n = len(a.rainfall.time)/12
    
#     # First calculate a cumulative rainfall xarray from the rainfall data
    
#     arr = a.rainfall.values
    
#     cum_rf = np.cumsum(arr, axis = 0)
    
#     cum_rf_xr = xr.DataArray(cum_rf, dims = ('time', 'latitude', 'longitude'),
#                             coords = [a.time, a.latitude, a.longitude])
    
#     # NOw we will calculate a cumulative rainfall assuming average rainfall on a month by month basis
#     # Find the average of all months
#     ave_months = a.rainfall.groupby('time.month').mean('time').values
   
#     # In the case that we are not starting from January we will need to reorder the array
    
#     start_month = a.time[0].dt.month.values - 1
    
#     ave_month = np.concatenate((ave_months[start_month:,:,:], ave_months[0:start_month,:,:]), axis = 0)

    
#     # Tile an array so that we can run a cumulative sum on it
#     tiled_ave = np.tile(ave_months, (round(n), 1, 1))
    
#     # In the case that we have residual months remove them from the tiled array
#     if (n).is_integer() == False:
#         month_remainder = int(round((n%1) * 12))

#         tiled_ave = tiled_ave[:int(-month_remainder),:,:]
        
#     # Generate the cumulative sum of rainfall one would get assuming average rainfall every month
#     cum_ave = np.cumsum(tiled_ave, axis = 0)
    
#     cum_ave_xr = xr.DataArray(cum_ave, dims = ('time', 'latitude', 'longitude'),
#                               coords = [a.time, a.latitude, a.longitude])
    
#     # The mass residual curve is the difference between the cumulative rainfall data and the cumulative
#     # rainfall one would get iff the average always occured
#     mass_res_curve = cum_rf_xr - cum_ave_xr
    
#     return mass_res_curve


## calculate metrics based on rainfall
- increasing and decreasing periods
- wet year after dry years
- dry year after wet years
-

## calculate metrics based on residual rainfall
- increasing and decreasing periods
- wet year after dry years
- dry year after wet years


## calculate seasonality
- Koppen climate region (use notebook to follow decision tree - requires climate data)
- Summer dominant, Summer, Uniform, Winter, Winter dominant #
- TROP SOI value
- IOD /SAM indices
"During El Niño, rainfall in eastern Australian is typically below average during winter and spring. A neutral ENSO phase has little effect on Australian climate."Bureau of meteorology http://www.bom.gov.au/climate/enso/  

In [None]:
# Calculate failed wet seasons - calculate total wet season + dry season rainfalls
# (end of wet season rainfalls?)