## Initial Setup

In [9]:
%matplotlib inline
from IPython.display import display
import matplotlib.pyplot as plt
import sys
sys.path.append('../Scripts')
import os
import pickle
from pathlib import Path
import dea_bom
import pandas as pd
import numpy as np
import xarray as xr
import datacube
from datacube.utils import geometry 
from datacube.utils.geometry import CRS
from datacube.storage import masking
from datacube.helpers import ga_pq_fuser, write_geotiff
import warnings
warnings.filterwarnings('ignore', module='datacube')
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## List of All Available Stations

In [10]:
%%time
stations_pkl = Path('stations.pkl')

# if cache exists, get it from cache
if stations_pkl.exists():
    print('Loading from cache')
    stations = pickle.load(open(str(stations_pkl), 'rb'))
else:
    print('Fetching from BoM')
    stations = dea_bom.get_stations()
    pickle.dump(stations, open(str(stations_pkl), 'wb'))
    
len(stations), stations[:5]

Loading from cache
CPU times: user 11.7 ms, sys: 5.64 ms, total: 17.3 ms
Wall time: 13.8 ms


(6027,
 [namespace(name='15 MILE @ GRETA STH', pos=(-36.61945775, 146.24407214), url='http://bom.gov.au/waterdata/services/stations/403213'),
  namespace(name='15 MILE @ WANGARATTA', pos=(-36.36666667, 146.2833333), url='http://bom.gov.au/waterdata/services/stations/403239'),
  namespace(name='15 MILE CK GLENROWAN', pos=(-36.47080718, 146.246199), url='http://bom.gov.au/waterdata/services/stations/403251'),
  namespace(name='16 Mile Waterhole', pos=(-18.876921, 139.360487), url='http://bom.gov.au/waterdata/services/stations/913010A'),
  namespace(name='163 Clifton Rd', pos=(-32.97808, 115.90111), url='http://bom.gov.au/waterdata/services/stations/6131318')])

## Prune list of station for faster plotting

This step can be safely omited.

In [11]:
stations_all = [st for st in stations if st.pos is not None]
stations = [st for st in stations_all if (-39 < st.pos[0] < -31) and (137  < st.pos[1] < 152)]
len(stations)

3085

In [13]:
gauge_data, station = dea_bom.ui_select_station(stations);

VBox(children=(HBox(children=(Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_z…

In [None]:
pos = station.pos
pos, gauge_data.head(5)

## Select a guage from the map
Make sure it has data in it as some gauges are empty.

In [None]:
lat, lon = pos
buffer = 8000

print("You have selected: lat = {}".format(lat))
print("You have selected: lon = {}".format(lon))

In [None]:
#Rearranging data into a flow duration curve
gauge_data = gauge_data.dropna()
gauge_data = gauge_data.sort_values('Value')
gauge_data['rownumber'] = np.arange(len(gauge_data))
gauge_data['Exceedence'] = (1-(gauge_data.rownumber/len(gauge_data)))*100


gauge_data.plot(x='Exceedence', y='Value', figsize=(11,7))
plt.ylabel('cubic meters per second')
plt.grid(True)
plt.title('FDC');

In [None]:
#What part of the Flow Duration Curve do you want to look at?
yaxis_lower_parameter = 200 #600
yaxis_higher_parameter = 400 #1750

#Let's look at it on a log scale
ax2 = gauge_data.plot(x='Exceedence', y='Value', figsize=(11,7)) 
ax2 = plt.axhspan(yaxis_lower_parameter, yaxis_higher_parameter, color='red', alpha=0.2)
ax2 = plt.title('This is the range you selected displayed as a log')
ax2 = plt.ylabel('cubic meters per second (log)')
ax2 = plt.xlabel('Exceedence')
ax2 = plt.yscale('log')

In [None]:
gauge_data_xr = gauge_data.to_xarray()

#Dask loading wofs_albers data (loading parameters only, not loading the actual satellite data since 1988)
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - buffer, x + buffer),
         'y': (y - buffer, y + buffer),    
         'time': ('1988-01-01', '2019-08-22'), # You might want to change the date to todays date
         'crs': 'EPSG:3577'} 
dc = datacube.Datacube(app='dc-WOfS')
wofs_albers= dc.load(product = 'wofs_albers', dask_chunks = {}, group_by='solar_day', **query)

# Merging satellite data with gauge data by timestamp
merged_data = gauge_data_xr.interp(Timestamp=wofs_albers.time)

# Here is where it takes into account user input for the FDC
specified_level = merged_data.where((merged_data.Value > yaxis_lower_parameter) & 
                                    (merged_data.Value < yaxis_higher_parameter), drop=True)
date_list = specified_level.time.values

print("WARNING: You are about to load satellite pass data from the Geoscience datacube.\n" 
      "If you load more than 300 passes it will take a long time to load.\n"
     "To change the amount of passes, change the yaxis parameters.\n"
      "If you got 0 passes unexpectedly, check that you got the lower and higher yaxis parameters around the right way.\n"
     "You are about to load this many passes (check that it's under 300): {}".format(specified_level.time.shape[0]))

In [None]:
%%time
specified_passes = wofs_albers.sel(time=date_list).compute()

In [None]:
# prune out "too cloudy" passes
cc = masking.make_mask(specified_passes.water, cloud=True)

npixels_per_slice = specified_passes.water.shape[1]*specified_passes.water.shape[2]
npixels_per_slice

ncloud_pixels = cc.sum(dim='x').sum(dim='y')
cloud_pixels_fraction = (ncloud_pixels/npixels_per_slice)

clear_specified_passes = specified_passes.water.isel(time=cloud_pixels_fraction<0.5)
clear_specified_passes.shape

In [None]:
wet = (clear_specified_passes == 128).sum(dim='time')
dry = (clear_specified_passes == 0).sum(dim='time')
clear = wet + dry
frequency = wet / clear
frequency= frequency.fillna(0) #this is to get rid of the NAs that occur due to mountain shadows
frequency = frequency.where(frequency!=0) #This is to tell it to make areas that were dry 100% of the time white

#Plotting the image
frequency.plot(figsize = (16, 12))
plt.axis('off')

fig, ax = plt.subplots(ncols=2, figsize=(16, 6))

ax1 = frequency.plot(ax=ax[0])

ax2 = gauge_data.plot(x='Exceedence', y='Value', ax=ax[1]) 
ax2 = plt.axhspan(yaxis_lower_parameter, yaxis_higher_parameter, color='red', alpha=0.2)
ax2 = plt.title('This was the specified range for which the image was generated (FDC log)')
ax2 = plt.ylabel('cubic meters per second (log)')
ax2 = plt.xlabel('Exceedence')
ax2 = plt.yscale('log')

plt.tight_layout()

print("This image was made by layering this many images: {}".format(clear_specified_passes.time.shape[0]))

--------------------------------------------------------------