# Extract annual geomedian waterlines across time
**What does this notebook do?** 

This notebooks demonstrates how to extract waterline contours from the geomedian composite layers for each year. 

**Requirements:** 

You need to run the following commands from the command line prior to launching jupyter notebooks from the same terminal so that the required libraries and paths are set:

`module use /g/data/v10/public/modules/modulefiles` 

`module load dea/20180515`  *(currently using an older version of `dea` due to a bug in `xr.concat`; will be reverted to `module load dea` in future)*

If you find an error or bug in this notebook, please either create an 'Issue' in the Github repository, or fix it yourself and create a 'Pull' request to contribute the updated notebook back into the repository (See the repository [README](https://github.com/GeoscienceAustralia/dea-notebooks/blob/master/README.rst) for instructions on creating a Pull request).

**Date:** September 2018

**Author:** Robbi Bishop-Taylor

## Import modules

In [4]:
import os
import sys
import datacube
import itertools
import warnings
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
from datacube.utils import geometry
from datacube.utils.geometry import CRS
from datacube.helpers import write_geotiff
from shapely.geometry import Point
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

sys.path.append('../10_Scripts')
import SpatialTools

# For nicer notebook plotting, hide warnings (comment out for real analysis)
warnings.filterwarnings('ignore') 

# Create datacube instance
dc = datacube.Datacube(app='Tidal geomedian filmstrips')

%load_ext autoreload
%autoreload 2


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Geomedian filmstrip parameters
Set the area, time period  and sensors of interest, and tide limits and epoch length used to produce each geomedian composite. This is the only cell that needs to be edited to run the notebook.

In [None]:
# Set up centre of study area and buffer size in metres for data extraction
study_area = 'hume'  # name used as prefix for output files
lat, lon = -36.100, 147.210  # centre of study area
buffer = 23000  # metre units to extend region of interest on each side of centre point

# Set up query
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - buffer, x + buffer),
         'y': (y - buffer, y + buffer),
         'crs': 'EPSG:3577'}

query

## Combine multiple sensors, load data and generate geomedians
For each epoch, combine all sensors into one dataset, load the data for the first time using `dask`'s `.compute()`, then composite all timesteps into a single array using a geometric median computation.

In [1]:
# # Dict to hold output geomedian composits
# all_data = []

# time_dict = {'ls5': ('1987-01-01', '2018-01-01'),
#             'ls7': ('1987-01-01', '2003-05-30'),
#             'ls8': ('1987-01-01', '2018-01-01')}

# for sensor in ['ls5', 'ls7', 'ls8']:

#     # Return observations matching query without actually loading them using dask
#     sensor_all = dc.load(product = '{}_nbart_geomedian_annual'.format(sensor), 
#                      group_by = 'solar_day', 
#                      time=time_dict[sensor],
# #                      dask_chunks={'time': 1},
#                      **query)
    
#     all_data.append(sensor_all)

# # Import data
# all_data 

# xr.concat()  #.mean(dim=['concat_dims'])  #[['red', 'green', 'blue']].to_array()  #.plot.imshow(robust=True)


# for from_date in sensor_epoch_dict.keys():
    

    
#     # Compute NDWI
#     # sensor_combined["ndwi"] = (sensor_combined.green - sensor_combined.nir) / (sensor_combined.green + sensor_combined.nir)
#     sensor_combined["ndwi"] = (sensor_combined.green - sensor_combined.swir1) / (sensor_combined.green + sensor_combined.swir1)

#     # Compute NDWI composite using all timesteps
#     print('    Computing NDWI median')
#     ndwi_median = sensor_combined[["ndwi"]].median(dim='time', keep_attrs=True)
    
#     # Export to file
#     filename = 'output_data/{0}/{0}_{1}.tif'.format(study_area, from_date)
#     print('    Exporting to {}'.format(filename))
#     write_geotiff(filename=filename, dataset=ndwi_median)
    
#     # Assign to dict
#     ndwi_dict[from_date] = ndwi_median

import datacube
dc = datacube.Datacube(app='Annual geomedians')
    
# Return observations matching query without actually loading them using dask
sensor_all = dc.load(product = 'ls8_nbart_geomedian_annual', 
                     time=('1987-01-01', '2018-01-01'),
                     x=(1343834.550438912, 1389834.550438912),
                     y=(-4048783.3089217427, -4002783.3089217427),
                     crs='EPSG:3577')

sensor_all

<xarray.Dataset>
Dimensions:  (time: 5, x: 1841, y: 1841)
Coordinates:
  * time     (time) datetime64[ns] 2013-01-01 2014-01-01 2015-01-01 ...
  * y        (y) float64 -4.003e+06 -4.003e+06 -4.003e+06 -4.003e+06 ...
  * x        (x) float64 1.344e+06 1.344e+06 1.344e+06 1.344e+06 1.344e+06 ...
Data variables:
    blue     (time, y, x) int16 452 438 447 455 440 434 422 427 429 433 416 ...
    green    (time, y, x) int16 792 783 804 835 837 823 799 780 769 761 769 ...
    red      (time, y, x) int16 698 682 708 728 681 660 627 658 673 684 626 ...
    nir      (time, y, x) int16 4180 4165 4186 4280 4479 4638 4546 4043 3970 ...
    swir1    (time, y, x) int16 2297 2275 2321 2374 2304 2261 2179 2166 2165 ...
    swir2    (time, y, x) int16 1209 1185 1214 1247 1172 1125 1087 1103 1123 ...
Attributes:
    crs:      EPSG:3577

## Extract waterlines

In [None]:
for date, ndwi in ndwi_dict.items():
    
    print(date)
    
    # Prepare attributes as input to contour extract
    attribute_data = {'date': [date[0:4]]}
    attribute_dtypes = {'date': 'int'}
    
    # Extract contours with custom attribute fields:
    contour_dict = SpatialTools.contour_extract(z_values=[0],
                                   ds_array=ndwi.ndwi,
                                   ds_crs='epsg:3577',
                                   ds_affine=ndwi.geobox.transform,
                                   output_shp=f'output_data/{study_area}/{study_area}_{date}.shp',
                                   attribute_data=attribute_data,
                                   attribute_dtypes=attribute_dtypes)
    
# Combine all shapefiles into one file
import glob
shapefiles = glob.glob(f'output_data/{study_area}/{study_area}_*01-01.shp')
gdf = pd.concat([gpd.read_file(shp) for shp in shapefiles], sort=False).pipe(gpd.GeoDataFrame)

# Save as combined shapefile
gdf = gdf.reset_index()[['date', 'geometry']].sort_values('date')
gdf.crs = 'epsg:3577'
gdf.to_file(f'output_data/{study_area}/{study_area}_combined.shp')

gdf.head()




## Create tidal modelled vs observed plot
Create a plot comparing selected Landsat observations to all Landsat observations and the entire tidal history of the study area:

In [None]:
# For each hour between start and end of time series, predict tide and add to list
all_times = date_range(start, end, 1, 'hours')
tp_model = [TimePoint(tidepost_lon, tidepost_lat, dt) for dt in all_times]
tides_model = [tide.tide_m for tide in predict_tide(tp_model)]

# Covert to dataframe of modelled dates and tidal heights
modelled_df = pd.DataFrame({'tide_heights': tides_model}, index=pd.DatetimeIndex(all_times))

# Return dataframe of previously-generated observed dates and tidal heights
observed_df = xr.concat([i.tide_heights for i in sensor_dict.values()], dim='time').to_dataframe()   

# Set up plot
fig = plt.figure(figsize=(10, 4))
plt.margins(0)
fig.axes[0].spines['right'].set_visible(False)
fig.axes[0].spines['top'].set_visible(False)
fig.axes[0].yaxis.set_major_formatter(FormatStrFormatter('%.2f'))
plt.ylabel('Tide height (m)')

plt.annotate('Tidepost: {} E, {} S ({})'.format(tidepost_lon, tidepost_lat, study_area), 
             xy=(0, 1.2), xycoords='axes fraction', 
             xytext=(5, -3), textcoords='offset points',
             fontsize=10, verticalalignment='top', fontweight='bold') 

# Plot modelled values as grey background
plt.plot(modelled_df.index, modelled_df.tide_heights,
         color='gainsboro', linewidth=0.6, zorder=1, label='OTPS model')

# Plot all observations as black points
plt.scatter(observed_df.index, observed_df.tide_heights,
            s=4, color='darkgrey', marker='o', zorder=2, label='All observations')

# Plot selected observations in red by clipping observed_df to min and max tide selection
selected_df = observed_df[observed_df.tide_heights.between(sel_min-0.001, sel_max+0.001)]
plt.scatter(selected_df.index, selected_df.tide_heights,
            s=10, color='black', marker='o', zorder=2, label='Selected observations')

# Plot horizontal lines defining border of selected tidal range
plt.axhline(y=sel_min, color='red', alpha=0.2) 
plt.axhline(y=sel_max, color='red', alpha=0.2) 

# Add vertical lines and annotation defining each epoch
for epoch in epochs[:-1]:
    
    # Compute from and to date strings
    from_date = epoch.strftime('%Y-%m-%d')
    to_date = (epoch + relativedelta(years=epoch_years)).strftime('%Y-%m-%d')
    
    # Add vertical line and epoch titles
    plt.axvline(x=epoch, color='red', alpha=0.2)
    plt.annotate('{} to {}'.format(from_date, to_date), 
                 xy=(epoch, modelled_df.tide_heights.max()), 
                 xytext=(5, 10), textcoords='offset points', fontsize=7) 
    
# Add legend
plt.legend(bbox_to_anchor=(0.975, 1.2), loc=1, borderaxespad=0, ncol=3, 
           handletextpad=0.1, frameon=False, columnspacing=0.3, fontsize=7)

# Export plot
filename = 'figures/{0}/{0}_tideobs.png'.format(study_area)
print('    Exporting to {}'.format(filename))
# fig.savefig(filename, dpi=300, bbox_inches='tight', pad_inches=0)

In [None]:
dc = datacube.Datacube(config='/home/561/rt1527/unpublished_products.conf')
dc.list_products()

# Set up centre of area to analyse, and a buffer in metres around this centrepoint
lat, lon, buffer_m, name = -17.6080348351, 139.80119891, 5000, 'mangrove_test'
time_range = ('1986-01-01', '2019-09-01')
resolution = (-25, 25)

x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - buffer_m - 3000, x + buffer_m + 3000),
         'y': (y - buffer_m, y + buffer_m),    
         'time': time_range,
         'crs': 'EPSG:3577',
         'output_crs': 'EPSG:3577',
         'resolution': resolution} 


test = dc.load(product="mangrove_extent_cover_albers", **query)

import DEAPlotting
DEAPlotting.animated_timeseries(ds=test.isel(time=[1, 5, 10]),
                                output_path=f'animated_timeseries_{name}.gif',
                                bands=['canopy_cover_class'],
                                interval=500,
                                width_pixels=1000,
                                percentile_stretch=[0.0, 1.0],
                                show_date=False,
                                onebandplot_kwargs={'cmap':'jet'},
#                                 onebandplot_cbar = False,
                                title=test.time.dt.year.values.tolist())
