## Initial Setup

Download scraper module `dea_bom`:

```
!wget https://raw.githubusercontent.com/GeoscienceAustralia/dea-notebooks/kirill-bom-water/Scripts/dea_bom.py
```

Needs to be done once

In [1]:
%matplotlib inline
import ipywidgets as W
from IPython.display import display
import matplotlib.pyplot as plt
import sys
sys.path.append('../Scripts')
import os
import dea_bom
import pandas as pd
import numpy as np
import xarray as xr
import datacube
from datacube.utils import geometry 
from datacube.utils.geometry import CRS
from datacube.storage import masking
from datacube.helpers import ga_pq_fuser, write_geotiff
import warnings
warnings.filterwarnings('ignore', module='datacube')
%load_ext autoreload
%autoreload 2

## List of All Available Stations

In [2]:
%%time
stations = dea_bom.get_stations()
len(stations), stations[:5]

CPU times: user 93.1 ms, sys: 20.2 ms, total: 113 ms
Wall time: 4.16 s


(6027,
 [namespace(name='15 MILE @ GRETA STH', pos=(-36.61945775, 146.24407214), url='http://bom.gov.au/waterdata/services/stations/403213'),
  namespace(name='15 MILE @ WANGARATTA', pos=(-36.36666667, 146.2833333), url='http://bom.gov.au/waterdata/services/stations/403239'),
  namespace(name='15 MILE CK GLENROWAN', pos=(-36.47080718, 146.246199), url='http://bom.gov.au/waterdata/services/stations/403251'),
  namespace(name='16 Mile Waterhole', pos=(-18.876921, 139.360487), url='http://bom.gov.au/waterdata/services/stations/913010A'),
  namespace(name='163 Clifton Rd', pos=(-32.97808, 115.90111), url='http://bom.gov.au/waterdata/services/stations/6131318')])

In [3]:
dbg = W.Output()
fig_display = W.Output()
stt = [] # call stt to see all selected stations in the Kernel
collected_data = [] #call collected_data to see what data you are working with
legends = []

with fig_display:
    plt.ioff()
    fig, ax = plt.subplots(1, figsize=(14,4))
    
def on_clear():
    dbg.clear_output(wait=True)
    with dbg:
        print('Clear')

    ax.clear()
    legends.clear()
    collected_data.clear()
    stt.clear()
    fig_display.clear_output()
    

def on_select(st):
    with dbg:
        print(f'Fetching data for: {st.name}')
    stt.append(st)
    try:
        xx = dea_bom.get_station_data(st).dropna()
    except Exception:
        with dbg:
            print('Failed to read data')
            return

    with dbg:
        print(f'Got {xx.shape[0]} observations')
    
    if xx.shape[0] == 0:
        return
    collected_data.append(xx)
    
    xx.plot(ax=ax)
    legends.append(st.name)
    ax.legend(legends)
    
    fig_display.clear_output(wait=True)
    with fig_display:
        display(fig)
        
m, cluster = dea_bom.mk_station_selector(on_select,
                                         stations,
                                         center=(-24, 138),
                                         zoom=3)

btn = W.Button(description="Clear")
btn.on_click(lambda b: on_clear())

ui = W.VBox(
    [W.HBox([m, W.VBox([btn, dbg], layout=W.Layout(width="30%"))]), fig_display]
)

display(ui)

VBox(children=(HBox(children=(Map(basemap={'url': 'https://{s}.tile.openstreetmap.org/{z}/{x}/{y}.png', 'max_z…

In [4]:
stt

[]

## Select a guage from the map
Make sure it has data in it as some gauges are empty.

In [None]:
#Please select the lat and lon of the area you want to make a picture of
lat = -35.0442
lon = 144.4464
buffer = 10000

print("Here is the lat and lon of the gauge: {}".format(stt))
print("make sure your selected lat and lon matches the gauge lat and lon:\n")
print("You have selected: lat = {}".format(lat))
print("You have selected: lon = {}".format(lon))

In [None]:
#Rearranging data into a flow duration curve
gauge_data = collected_data[0]
gauge_data = gauge_data.dropna()
gauge_data = gauge_data.sort_values('Value')
gauge_data['rownumber'] = np.arange(len(gauge_data))
gauge_data['Exceedence'] = (1-(gauge_data.rownumber/len(gauge_data)))*100



gauge_data.plot(x='Exceedence', y='Value', figsize=(11,7))
plt.ylabel('cubic meters per second')
plt.grid(True)
plt.title('FDC')

In [None]:
#What part of the Flow Duration Curve do you want to look at?
yaxis_lower_parameter = 30
yaxis_higher_parameter = 80

#Let's look at it on a log scale
ax2 = gauge_data.plot(x='Exceedence', y='Value', figsize=(11,7)) 
ax2 = plt.axhspan(yaxis_lower_parameter, yaxis_higher_parameter, color='red', alpha=0.2)
ax2 = plt.title('This is the range you selected displayed as a log')
ax2 = plt.ylabel('cubic meters per second (log)')
ax2 = plt.xlabel('Exceedence')
ax2 = pyplot.yscale('log')

In [None]:
gauge_data_xr = gauge_data.to_xarray()

#Dask loading wofs_albers data (loading parameters only, not loading the actual satellite data since 1988)
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - buffer, x + buffer),
         'y': (y - buffer, y + buffer),    
         'time': ('1988-01-01', '2019-08-22'), # You might want to change the date to todays date
         'crs': 'EPSG:3577'} 
dc = datacube.Datacube(app='dc-WOfS')
wofs_albers= dc.load(product = 'wofs_albers', dask_chunks = {}, group_by='solar_day', **query)

# Merging satellite data with gauge data by timestamp
merged_data = gauge_data_xr.interp(Timestamp=wofs_albers.time)

# Here is where it takes into account user input for the FDC
specified_level = merged_data.where((merged_data.Value > yaxis_lower_parameter) & 
                                    (merged_data.Value < yaxis_higher_parameter), drop=True)
date_list = specified_level.time.values

print("WARNING: You are about to load satellite pass data from the Geoscience datacube.\n" 
      "If you load more than 300 passes it will take a long time to load.\n"
     "To change the amount of passes, change the yaxis parameters.\n"
      "If you got 0 passes unexpectedly, check that you got the lower and higher yaxis parameters around the right way.\n"
     "You are about to load this many passes (check that it's under 300):\n {}".format(specified_level.time))

In [None]:
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - buffer, x + buffer),
         'y': (y - buffer, y + buffer), 
         'crs': 'EPSG:3577'} 

#loop selecting data based on FDC parameters
xr_list = []
for date in date_list:
    date = str(date)  
    wofs_albers= dc.load(product = 'wofs_albers', time=date,  **query)
    xr_list.append(wofs_albers)
specified_passes = xr.concat(xr_list, dim='time')

#Cloud Mask
ClearTimesteps = []
for ix, timestep in enumerate(specified_passes.time):
    SingleTime = specified_passes.water.isel(time=ix)
    IsItCloudy = masking.make_mask(SingleTime, cloud=True)
    CountClouds = IsItCloudy.sum()   
    PercentCloudy = CountClouds.values.item()/(len(specified_passes.x)*len(specified_passes*y))*100
    IsItClearEnough = PercentCloudy <= 50  
    if IsItClearEnough:
        ClearTimesteps.append(ix)     
clear_specified_passes = specified_passes.water.isel(time = ClearTimesteps)

wet = (clear_specified_passes == 128).sum(dim='time')
dry = (clear_specified_passes == 0).sum(dim='time')
clear = wet + dry
frequency = wet / clear
frequency= frequency.fillna(0) #this is to get rid of the NAs that occur due to mountain shadows
frequency = frequency.where(frequency!=0) #This is to tell it to make areas that were dry 100% of the time white

#Plotting the image
frequency.plot(figsize = (16, 12))
plt.axis('off')

fig, ax = plt.subplots(ncols=2, figsize=(16, 6))

ax1 = frequency.plot(ax=ax[0])

ax2 = gauge_data.plot(x='Exceedence', y='Value', ax=ax[1]) 
ax2 = plt.axhspan(yaxis_lower_parameter, yaxis_higher_parameter, color='red', alpha=0.2)
ax2 = plt.title('This was the specified range for which the image was generated (FDC log)')
ax2 = plt.ylabel('cubic meters per second (log)')
ax2 = plt.xlabel('Exceedence')
ax2 = pyplot.yscale('log')

plt.tight_layout()

print("This image was made by layering this many images: {}".format(clear_specified_passes.time))