# Extract annual geomedian waterlines across time
**What does this notebook do?** 

This notebooks demonstrates how to extract waterline contours from the geomedian composite layers for each year. 

**Requirements:** 

You need to run the following commands from the command line prior to launching jupyter notebooks from the same terminal so that the required libraries and paths are set:

`module use /g/data/v10/public/modules/modulefiles` 

`module load dea/20180515`  *(currently using an older version of `dea` due to a bug in `xr.concat`; will be reverted to `module load dea` in future)*

If you find an error or bug in this notebook, please either create an 'Issue' in the Github repository, or fix it yourself and create a 'Pull' request to contribute the updated notebook back into the repository (See the repository [README](https://github.com/GeoscienceAustralia/dea-notebooks/blob/master/README.rst) for instructions on creating a Pull request).

**Date:** September 2018

**Author:** Robbi Bishop-Taylor

## Import modules

In [1]:
import os
import sys
import datacube
import itertools
import warnings
import numpy as np
import xarray as xr
import pandas as pd
import geopandas as gpd
from datacube.utils import geometry
from datacube.utils.geometry import CRS
from datacube.helpers import write_geotiff
from shapely.geometry import Point
from collections import defaultdict
import matplotlib.pyplot as plt
from matplotlib.ticker import FormatStrFormatter

sys.path.append('../10_Scripts')
import SpatialTools

# For nicer notebook plotting, hide warnings (comment out for real analysis)
warnings.filterwarnings('ignore') 

# Create datacube instance
dc = datacube.Datacube(app='Tidal geomedian filmstrips')

%load_ext autoreload
%autoreload 2


## Geomedian filmstrip parameters
Set the area, time period  and sensors of interest, and tide limits and epoch length used to produce each geomedian composite. This is the only cell that needs to be edited to run the notebook.

In [2]:
# Set up centre of study area and buffer size in metres for data extraction
study_area = 'hume'  # name used as prefix for output files
lat, lon = -36.100, 147.210  # centre of study area
buffer = 22000  # metre units to extend region of interest on each side of centre point

study_area = 'warragamba'  # name used as prefix for output files
lat, lon = -34.0124387269, 150.388624121  # centre of study area
buffer = 18000  # metre units to extend region of interest on each side of centre point


# Set up query
x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
query = {'x': (x - buffer, x + buffer),
         'y': (y - buffer, y + buffer),
         'crs': 'EPSG:3577'}

# If output data and figure directories doesn't exist, create them
if not os.path.isdir('output_data/{}/'.format(study_area)):
    os.makedirs('output_data/{}/'.format(study_area))
    
if not os.path.isdir('figures/{}/'.format(study_area)):
    os.makedirs('figures/{}/'.format(study_area))

## Combine multiple sensors, load data and generate geomedians
For each epoch, combine all sensors into one dataset, load the data for the first time using `dask`'s `.compute()`, then composite all timesteps into a single array using a geometric median computation.

In [3]:
# time_dict = {'ls5': ('1987-01-01', '2018-01-01'),
#             'ls7': ('1987-01-01', '2003-05-30'),
#             'ls8': ('1987-01-01', '2018-01-01')}

import datacube
dc = datacube.Datacube(app='Annual geomedians')
    
# Return observations matching query without actually loading them using dask
sensor_ls5 = dc.load(product = 'ls5_nbart_geomedian_annual', 
                     time=('1987-01-01', '2018-01-01'),
                     **query)

# Return observations matching query without actually loading them using dask
sensor_ls7 = dc.load(product = 'ls7_nbart_geomedian_annual', 
                     time=('1987-01-01', '2002-01-01'),
                     **query)

# Return observations matching query without actually loading them using dask
sensor_ls8 = dc.load(product = 'ls8_nbart_geomedian_annual', 
                     time=('1987-01-01', '2018-01-01'),
                     **query)

sensor_all = xr.concat([sensor_ls5, sensor_ls7, sensor_ls8], dim='time')# .median(dim='time', keep_attrs=True)

In [4]:
sensor_combined = xr.concat([sensor_ls5, sensor_ls7, sensor_ls8], dim='time')# .median(dim='time', keep_attrs=True)
sensor_combined = sensor_combined.groupby('time').median(dim='time', keep_attrs=True)
sensor_combined

<xarray.Dataset>
Dimensions:  (time: 27, x: 1441, y: 1441)
Coordinates:
  * y        (y) float64 -3.816e+06 -3.816e+06 ... -3.852e+06 -3.852e+06
  * x        (x) float64 1.666e+06 1.666e+06 1.666e+06 ... 1.702e+06 1.702e+06
  * time     (time) datetime64[ns] 1988-01-01 1989-01-01 ... 2017-01-01
Data variables:
    blue     (time, y, x) float64 243.0 268.0 287.0 300.0 ... 484.0 494.0 456.0
    green    (time, y, x) float64 343.0 367.0 391.0 402.0 ... 803.0 823.0 773.0
    red      (time, y, x) float64 337.0 367.0 407.0 427.0 ... 884.0 905.0 849.0
    nir      (time, y, x) float64 1.812e+03 1.758e+03 ... 3.713e+03 3.587e+03
    swir1    (time, y, x) float64 1.159e+03 1.204e+03 ... 3.078e+03 2.904e+03
    swir2    (time, y, x) float64 514.0 600.0 670.0 ... 1.727e+03 1.603e+03
Attributes:
    crs:      EPSG:3577

In [5]:
sensor_combined["ndwi"] = (sensor_all.green - sensor_combined.swir1) / (sensor_combined.green + sensor_combined.swir1)

## Extract waterlines

In [6]:
years = sensor_combined.time

for year in years:    

    ndwi = sensor_combined.sel(time=year)
    date = year.dt.year.item()
    print(date) 
    
    # Compute area
    area = ((ndwi.ndwi > 0).sum() * (25 * 25) / (1000 * 1000)).item()
    
    # Prepare attributes as input to contour extract
    attribute_data = {'date': [date], 'area': [area]}
    attribute_dtypes = {'date': 'int', 'area': 'int'}
    
    # Extract contours with custom attribute fields:
    contour_dict = SpatialTools.contour_extract(z_values=[0],
                                   ds_array=ndwi.ndwi,
                                   ds_crs='epsg:3577',
                                   ds_affine=ndwi.geobox.transform,
                                   output_shp=f'output_data/{study_area}/{study_area}_{date}.shp',
                                   attribute_data=attribute_data,
                                   attribute_dtypes=attribute_dtypes)
    
# Combine all shapefiles into one file
import glob
shapefiles = glob.glob(f'output_data/{study_area}/{study_area}_*.shp')
gdf = pd.concat([gpd.read_file(shp) for shp in shapefiles], sort=False).pipe(gpd.GeoDataFrame)

# Save as combined shapefile
gdf = gdf.reset_index()[['date', 'area', 'geometry']].sort_values('date')
gdf.crs = 'epsg:3577'
gdf.to_file(f'output_data/{study_area}/{study_area}_combined.shp')

gdf.head()




1988
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1988.shp
1989
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1989.shp
1990
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1990.shp
1991
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1991.shp
1992
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1992.shp
1993
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1993.shp
1994
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1994.shp
1995
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1995.shp
1996
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba_1996.shp
1997
Extracting contour 0

Exporting contour shapefile to output_data/warragamba/warragamba

Unnamed: 0,date,area,geometry
1,1988,66,"(LINESTRING (1674803.717483164 -3851537.5, 167..."
16,1989,66,"(LINESTRING (1674787.229509162 -3851537.5, 167..."
10,1990,69,"(LINESTRING (1667187.5 -3815567.510080902, 166..."
3,1991,66,"(LINESTRING (1675162.5 -3815562.942288791, 167..."
4,1992,68,"(LINESTRING (1670866.622832209 -3815537.5, 167..."


In [7]:
# import datacube
# dc = datacube.Datacube(app='Annual geomedians')
    
# # Return observations matching query 
# sensor_ls5 = dc.load(product = 'ls5_nbart_geomedian_annual', 
#                      time=('1987-01-01', '2018-01-01'),
#                      x=(1343834.550438912, 1389834.550438912),
#                      y=(-4048783.3089217427, -4002783.3089217427),
#                      crs='EPSG:3577')

# # Compute NDVI
# sensor_ls5['ndvi'] = (sensor_ls5.nir - sensor_ls5.red)/(sensor_ls5.nir + sensor_ls5.red)
# sensor_ls5['mndwi'] = (sensor_ls5.green - sensor_ls5.swir1)/(sensor_ls5.green + sensor_ls5.swir1)

# # Find index of min NDVI, and use to pull out all other band values for that timestep
# inds = sensor_ls5.mndwi.argmin(dim='time')
# min_composite = sensor_ls5.isel(time=inds)

# # Find index of max NDVI, and use to pull out all other band values for that timestep
# inds = sensor_ls5.mndwi.argmax(dim='time')
# max_composite = sensor_ls5.isel(time=inds)

# # Plot
# fig, axes = plt.subplots(1, 2, figsize=(20, 10))
# min_composite[['red', 'green', 'blue']].to_array().plot.imshow(robust=True, ax=axes[0])
# max_composite[['red', 'green', 'blue']].to_array().plot.imshow(robust=True, ax=axes[1])


In [8]:
# import datacube
# from datacube.utils import geometry
# from datacube.utils.geometry import CRS

# dc = datacube.Datacube(config='/home/561/rt1527/unpublished_products.conf')
# dc.list_products()

# # Set up centre of area to analyse, and a buffer in metres around this centrepoint
# lat, lon, buffer_m, name = -17.6080348351, 139.80119891, 5000, 'mangrove_test'
# time_range = ('1987-01-01', '1987-09-01')
# resolution = (-25, 25)

# x, y = geometry.point(lon, lat, CRS('WGS84')).to_crs(CRS('EPSG:3577')).points[0]
# query = {'x': (x - buffer_m - 3000, x + buffer_m + 3000),
#          'y': (y - buffer_m, y + buffer_m),    
#          'time': time_range,
#          'crs': 'EPSG:3577',
#          'output_crs': 'EPSG:3577',
#          'resolution': resolution} 

# test = dc.load(product="mangrove_extent_cover_albers", **query)
# test

# # import DEAPlotting
# # DEAPlotting.animated_timeseries(ds=test.isel(time=[1, 5, 10]),
# #                                 output_path=f'animated_timeseries_{name}.gif',
# #                                 bands=['canopy_cover_class'],
# #                                 interval=500,
# #                                 width_pixels=1000,
# #                                 percentile_stretch=[0.0, 1.0],
# #                                 show_date=False,
# #                                 onebandplot_kwargs={'cmap':'jet'},
# # #                                 onebandplot_cbar = False,
# #                                 title=test.time.dt.year.values.tolist())
