# MAHTS stats

## Background


## Description



## Getting started


### Load packages

First we import the required Python packages, then we connect to the database, and load the catalog of virtual products.

In [55]:
%matplotlib inline
%load_ext line_profiler
%load_ext autoreload
%autoreload 2

# !pip install --user ruptures

import os
import sys
# import glob
# import shutil
# import numpy as np
# import xarray as xr
# import pandas as pd
import geopandas as gpd
# import ruptures as rpt
# from scipy import stats
# from affine import Affine
# import matplotlib.pyplot as plt
# from shapely.wkt import loads
from shapely.geometry import box
# from rasterio.features import rasterize
from rasterio.transform import array_bounds
# from skimage.measure import label
# from skimage.morphology import disk
# from skimage.morphology import square
# from skimage.morphology import binary_opening
# from skimage.morphology import binary_dilation

sys.path.append('../Scripts')
from dea_spatialtools import subpixel_contours


import deacoastlines_statistics as dcl_stats


from shapely.ops import nearest_points
import xarray as xr
import numpy as np


The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


## Load in data

In [5]:
# Read in contours
study_area = 7832
output_name = 'test2'
water_index = 'mndwi'
index_threshold = 0.00

# Create output folder
output_dir = f'output_data/{output_name}_{study_area}/vectors'
os.makedirs(output_dir, exist_ok=True)

## Load DEA Coastline rasters

In [9]:
yearly_ds = dcl_stats.load_rasters(output_name, study_area, water_index)

## Load external data

In [41]:
# Get bounding box to load data for
bbox = gpd.GeoSeries(box(*array_bounds(height=yearly_ds.sizes['y'], 
                                       width=yearly_ds.sizes['x'], 
                                       transform=yearly_ds.transform)), 
                     crs=yearly_ds.crs)

# Estaury mask
estuary_gdf = (gpd.read_file('input_data/estuary_mask.shp', bbox=bbox)
               .to_crs(yearly_ds.crs))

# Rocky shore mask
smartline_gdf = (gpd.read_file('input_data/Smartline.gdb', bbox=bbox)
                 .to_crs(yearly_ds.crs))

# Tide points
points_gdf = (gpd.read_file('input_data/tide_points_coastal.geojson', bbox=bbox)
          .to_crs(yearly_ds.crs))

# Study area polygon
comp_gdf = (gpd.read_file('input_data/50km_albers_grid.shp', bbox=bbox)
            .set_index('id')
            .to_crs(str(yearly_ds.crs)))

# Mask to study area
study_area_poly = comp_gdf.loc[study_area]


## Extract shoreline contours

### Extract ocean-masked contours

In [32]:
# Mask dataset to focus on coastal zone only
masked_ds = dcl_stats.contours_preprocess(yearly_ds, 
                                          water_index, 
                                          index_threshold, 
                                          estuary_gdf, 
                                          points_gdf)

# Extract contours
contours_gdf = subpixel_contours(da=masked_ds,
                                 z_values=index_threshold,
                                 min_vertices=10,
                                 dim='year')

Operating in single z-value, multiple arrays mode


## Compute statistics
### Measure distances from baseline

In [47]:
# Get array of water index values for baseline time period 
baseline_year = '2018'
baseline_array = yearly_ds[water_index].sel(year=int(baseline_year))

# Import contours and project to local CRS
# contours_gdf = contours_clean_gdf
contours_index_gdf = contours_gdf.set_index('year')

# Set annual shoreline to use as a baseline
baseline_contour = contours_index_gdf.loc[[baseline_year]].geometry

# Generate points along line and convert to geopandas.GeoDataFrame
points_line = [baseline_contour.iloc[0].interpolate(i) 
               for i in range(0, int(baseline_contour.length), 30)]
points_gdf = gpd.GeoDataFrame(geometry=points_line, crs=baseline_array.crs)


# Make a copy of the GeoDataFrame to hold tidal data
tide_points_gdf = points_gdf.copy()

In [56]:
# Copy geometry to baseline point
points_gdf['p_baseline'] = points_gdf.geometry
baseline_x_vals = points_gdf.geometry.x
baseline_y_vals = points_gdf.geometry.y

# Iterate through all comparison years in contour gdf
for comp_year in contours_index_gdf.index.unique().values[0:32]:

    print(comp_year)

    # Set comparison contour
    comp_contour = contours_index_gdf.loc[[comp_year]].geometry.iloc[0]

    # Find nearest point on comparison contour
    points_gdf[f'p_{comp_year}'] = points_gdf.apply(lambda x: 
                                                    nearest_points(x.p_baseline, comp_contour)[1], axis=1)

    # Compute distance between baseline and comparison year points
    points_gdf[f'{comp_year}'] = points_gdf.apply(lambda x: 
                                                  x.geometry.distance(x[f'p_{comp_year}']), axis=1)

    # Extract comparison array
    comp_array = yearly_ds[water_index].sel(year=int(comp_year))

    # Convert baseline and comparison year points to geoseries to allow easy access to x and y coords
    comp_x_vals = gpd.GeoSeries(points_gdf[f'p_{comp_year}']).x
    comp_y_vals = gpd.GeoSeries(points_gdf[f'p_{comp_year}']).y

    # Sample NDWI values from arrays based on baseline and comparison points
    baseline_x_vals = xr.DataArray(baseline_x_vals, dims='z')
    baseline_y_vals = xr.DataArray(baseline_y_vals, dims='z')
    comp_x_vals = xr.DataArray(comp_x_vals, dims='z')
    comp_y_vals = xr.DataArray(comp_y_vals, dims='z')   
    points_gdf['index_comp_p1'] = comp_array.interp(x=baseline_x_vals, y=baseline_y_vals)
    points_gdf['index_baseline_p2'] = baseline_array.interp(x=comp_x_vals, y=comp_y_vals)

    # Compute directionality of change (negative = erosion, positive = accretion)    
    points_gdf['loss_gain'] = np.where(points_gdf.index_baseline_p2 > 
                                       points_gdf.index_comp_p1, 1, -1)
    points_gdf[f'{comp_year}'] = points_gdf[f'{comp_year}'] * points_gdf.loss_gain

    # Add tide data
    tide_array = yearly_ds['tide_m'].sel(year=int(comp_year))
    tide_points_gdf[f'{comp_year}'] = tide_array.interp(x=baseline_x_vals, y=baseline_y_vals)

# Keep required columns
points_gdf = points_gdf[['geometry'] + 
                        contours_index_gdf.index.unique().values.tolist()]
points_gdf = points_gdf.round(2)

# Zero values to 1988
points_gdf.iloc[:,1:] = points_gdf.iloc[:,1:].subtract(points_gdf['1988'], axis=0)

# Identify dates for regression
x_years = yearly_ds.year.values

1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018


In [60]:
contours_index_gdf.index.unique().values

array(['1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003',
       '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
       '2012', '2013', '2014', '2015', '2016', '2017', '2018'],
      dtype=object)

In [57]:
x_years

array([1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
       1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009,
       2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018])

In [61]:
yearly_ds.year.values.astype(str)

array(['1988', '1989', '1990', '1991', '1992', '1993', '1994', '1995',
       '1996', '1997', '1998', '1999', '2000', '2001', '2002', '2003',
       '2004', '2005', '2006', '2007', '2008', '2009', '2010', '2011',
       '2012', '2013', '2014', '2015', '2016', '2017', '2018'],
      dtype='<U21')

### Calculate regressions

In [None]:
# Identify SOI values for regression
climate_df = pd.read_csv('input_data/climate_indices.csv', index_col='year')
climate_df = climate_df.loc[x_years,:]

# Compute change rates
rate_out = points_gdf[x_years.astype(str)].apply(lambda x: change_regress(row=x, 
                                                     x_vals = x_years, 
                                                     x_labels = x_years, 
                                                     std_dev=2), axis=1)
points_gdf[['rate_time', 'incpt_time', 'sig_time', 'outl_time']] = rate_out


# Compute tide flag
tide_out = tide_points_gdf[x_years.astype(str)].apply(lambda x: change_regress(row=points_gdf[x_years.astype(str)].iloc[x.name], 
                                               x_vals=x, 
                                               x_labels=x_years, 
                                               std_dev=2), axis=1)
points_gdf[['rate_tide', 'incpt_tide', 'sig_tide', 'outl_tide']] = tide_out 


# Compute stats for each index
for ci in climate_df:

    print(ci)

    # Compute stats for each row
    ci_out = points_gdf[x_years.astype(str)].apply(lambda x: change_regress(row=x,
                                                       x_vals = climate_df[ci].values, 
                                                       x_labels = x_years, 
#                                                        detrend_params=[x.rate_time, x.incpt_time],
                                                       std_dev=2), axis=1)

    # Add data as columns  
    points_gdf[[f'rate_{ci}', f'incpt_{ci}', f'sig_{ci}', f'outl_{ci}']] = ci_out


# # Add breakpoints
# print('Identifying breakpoints')
# points_gdf['breakpoint'] = points_gdf.apply(lambda x: breakpoints(x=x[x_years.astype(str)], 
#                                                                   labels=x_years, 
#                                                                   pen=10), axis=1)

# Set CRS
points_gdf.crs = baseline_array.crs

# Custom sorting
points_towrite = points_gdf.loc[:, [
    'rate_time', 'rate_SOI', 'rate_IOD', 'rate_SAM', 'rate_IPO', 'rate_PDO', 'rate_tide',
    'sig_time', 'sig_SOI', 'sig_IOD', 'sig_SAM', 'sig_IPO', 'sig_PDO', 'sig_tide',
    'outl_time', 'outl_SOI', 'outl_IOD', 'outl_SAM', 'outl_IPO', 'outl_PDO', 'outl_tide',
#     'breakpoint', 
    *x_years.astype(str).tolist(), 'geometry'
]]

## Export files

In [None]:
# Clip points to extent of polygon
stats_path = f'{output_dir}/{study_area}_stats_{water_index}_{index_threshold:.2f}_{output_name}'
points_gdf = points_gdf[points_gdf.intersects(study_area_poly['geometry'])]
points_gdf.to_file(f'{stats_path}.geojson', driver='GeoJSON')

# Overwrite contours after clipping to study area
contour_path = f'{output_dir}/{study_area}_contours_{water_index}_{index_threshold:.2f}_{output_name}'
contours_gdf['geometry'] = contours_gdf.intersection(study_area_poly['geometry'])
contours_gdf.to_file(f'{contour_path}.geojson', driver='GeoJSON')

# Export as shapefile
contours_gdf.to_file(f'{contour_path}.shp')
points_towrite.to_file(f'{stats_path}.shp')

shutil.make_archive(base_name=f'output_data/outputs_{study_area}_{output_name}', 
                    format='zip', 
                    root_dir=output_dir)

rocky_shore_buffer = rocky_shores_buffer(smartline_gdf=smartline_gdf, buffer=50)
points_gdf = points_gdf[~points_gdf.intersects(rocky_shore_buffer.geometry.unary_union)]
points_gdf.to_file(f'{stats_path}_nonrocky.geojson', driver='GeoJSON')

## Evaluation

### Time history

In [None]:
test = points_gdf[x_years.astype(str)].iloc[[5129]] 

test.apply(lambda x: change_regress(row=x,
                                    x_vals = x_years, 
                                    x_labels = x_years, 
                                    std_dev=2), axis=1)

In [None]:
test

In [None]:
id_val = 5129
plot_df = pd.DataFrame({
    'mov': points_gdf.loc[id_val, contours_gdf.year.to_list()].values.astype(float),
    'time': x_years,
    'soi': climate_df['SOI'].values,
    'tide': tide_points_gdf.mean(axis=0),
})

plot_df.plot.scatter(x='time',
                     y='mov',
                     c='soi',
                     cmap='RdYlBu',
                     s=50,
                     edgecolors="black")

plot_df.plot.scatter(x='soi',
                     y='mov',
                     c='soi',
                     cmap='RdYlBu',
                     s=50,
                     edgecolors="black")

plot_df.plot.scatter(x='tide',
                     y='mov',
                     c='tide',
                     cmap='RdYlBu',
                     s=50,
                     edgecolors="black")

In [None]:
plot_df.plot.scatter(x='time',
                     y='tide',
                     c='tide',
                     cmap='RdYlBu',
                     s=50,
                     edgecolors="black")

### Test breakpoints

In [None]:
# id_val = 6015
# signal = points_gdf.loc[id_val,contours_gdf.index.to_list()].values

# # detection
# algo = rpt.Pelt(model="rbf", min_size=2, jump=1).fit(signal)
# result = algo.predict(pen=8)
# print(contours_gdf.index.to_list()[result[0]])
# rpt.display(signal, [32], result)


### Index correlation

In [None]:
# corr = climate_df.corr()
# corr.style.background_gradient(cmap='RdBu')

In [None]:
import datetime
from otps import TimePoint
from otps import predict_tide

# Use the tidal model to compute tide heights for each observation:
obs_datetimes = [datetime.datetime(1986, 8, 23, 1, 36, 23),
                 datetime.datetime(1987, 5, 29, 1, 44, 59)]
obs_timepoints = [TimePoint(115.35, -20.86, dt) 
                  for dt in obs_datetimes]
obs_predictedtides = predict_tide(obs_timepoints)  

In [None]:
import geopandas as gpd
comp_gdf = gpd.read_file('input_data/item_polygons.geojson')

In [None]:
import datacube
dc = datacube.Datacube(env='c3-samples')

from dea_datahandling import load_ard
from dea_coastaltools import tidal_tag


def tidal_sync(row):
    
    try:
    
    #     lon, lat = row.geometry.centroid.coords.xy
        lon = row.lon
        lat = row.lat

        # Load available data from all three Landsat satellites
        ds = load_ard(dc=dc,
                      products=['ga_ls5t_ard_3', 'ga_ls7e_ard_3', 'ga_ls8c_ard_3'],
                      x=(lon - 0.01, lon + 0.01),
                      y=(lat - 0.01, lat + 0.01),
                      time=('1988', '2018'),
                      measurements=['nbart_green'],
                      output_crs='EPSG:3577',
                      gqa_iterative_mean_xy=[0, 1],
                      cloud_cover=[0, 80],
                      resolution=(-30, 30),
                      group_by='solar_day',
                      dask_chunks={})

        ds = tidal_tag(ds=ds)


        annual_ht_mean = ds.tide_height.sel(time = ds.tide_height > ds.tide_height.median()).resample(time='Y').mean()
        annual_ht_stats = change_regress(row=annual_ht_mean,
                       x_vals=annual_ht_mean.time.dt.year,
                       x_labels=annual_ht_mean.time.dt.year,
                       std_dev=5,
                       detrend_params=None,
                       slope_var='slope',
                       interc_var='intercept',
                       pvalue_var='pvalue',
                       outliers_var='outliers')

        print(annual_ht_stats)
        return(row.append(annual_ht_stats))
    
    except:
        print('Failed')


In [None]:
# test = comp_gdf.iloc[200:].apply(lambda x: tidal_sync(x), axis=1)
# test[~test.ID.isna()].to_file('tide_sync_test_3.geojson', driver='GeoJSON')

In [None]:
pip install -U --user --extra-index-url="https://packages.dea.ga.gov.au/" odc-algo

In [None]:
dc = datacube.Datacube(app='Intertidal_elevation', env='c3-samples')

In [None]:
import datacube
dc = datacube.Datacube(env='c3-samples')
from dea_datahandling import load_ard

try:
    dc = datacube.Datacube(app='Intertidal_elevation', env='c3-samples')
except:
    dc = datacube.Datacube(app='Intertidal_elevation')

lat, lon = -20.58, 117.87

# Load available data from all three Landsat satellites
ds = load_ard(dc=dc,
              products=['ga_ls5t_ard_3', 'ga_ls7e_ard_3', 'ga_ls8c_ard_3'],
              x=(lon - 0.01, lon + 0.01),
              y=(lat - 0.01, lat + 0.01),
              time=('1988', '2018'),
              measurements=['nbart_green'],
              output_crs='EPSG:3577',
              resolution=(-30, 30),
              group_by='solar_day',
              dask_chunks={})


In [None]:
import datacube
dc = datacube.Datacube(env='c3-samples')

from dea_datahandling import load_ard
from dea_coastaltools import tidal_tag
from dea_coastaltools import tidal_stats


#     lon, lat = row.geometry.centroid.coords.xy
lat, lon = -15.99, 137.21

# Load available data from all three Landsat satellites
ds = load_ard(dc=dc,
              products=['ga_ls5t_ard_3', 'ga_ls7e_ard_3', 'ga_ls8c_ard_3'],
              x=(lon - 0.01, lon + 0.01),
              y=(lat - 0.01, lat + 0.01),
              time=('1988', '2018'),
              measurements=['nbart_green'],
              output_crs='EPSG:3577',
              gqa_iterative_mean_xy=[0, 1],
              cloud_cover=[0, 80],
              resolution=(-30, 30),
              group_by='solar_day',
              dask_chunks={})


stats, observed_tides, all_tides = tidal_stats(ds=ds, return_tides=True, modelled_freq='30T')

# print(annual_ht_stats)
# return(row.append(annual_ht_stats))

In [None]:
all_tides.quantile([0.4, 0.6])

In [None]:
ds, = dc.find_datasets(product='ga_ls5t_ard_3', limit=1)
dir(ds.metadata)

In [None]:
tide_diff = (all_tides.max() - all_tides.min()) * 0.15
tide_min = 0 - tide_diff
tide_max = 0 + tide_diff

In [None]:
observed_tides[observed_tides.tide_height.between(tide_min.item(), tide_max.item())].resample('1Y').count()

In [None]:
all_tides.loc[slice('2018-06-04', '2018-07-05')].plot(figsize=(8, 8))

In [None]:
all_tides.loc[slice('2018-12-01', '2018-12-02')].plot(figsize=(8, 8))

In [None]:
tide_buffer = min(((observed_tides.tide_height.max() - observed_tides.tide_height.min()) * 0.15), 1.0)
tide_cutoff_min = 0.5-tide_buffer
tide_cutoff_max = 0.5+tide_buffer


observed_tides.plot()
plt.axhline(tide_cutoff_min, color='red')
plt.axhline(tide_cutoff_max, color='red')

yearly_counts = observed_tides[(observed_tides.tide_height > tide_cutoff_min) & 
                               (observed_tides.tide_height < tide_cutoff_max)].resample('Y').count()



In [None]:
tide_buffer 

In [None]:



((ds.tide_height.max() - ds.tide_height.min()) * 0.25).clip(0, 1.0)

In [None]:
yearly_counts.mean()

In [None]:
yearly_counts.min()

In [None]:
yearly_counts.max()

In [None]:
test = ds.tide_height.sel(time = ((ds.tide_height >= ds.tide_height.quantile(q=0.6)) & 
                                  (ds.tide_height <= ds.tide_height.quantile(q=0.9))))
test.plot(size=8)
test.resample(time='Y').mean().plot(linewidth=5, color='red')

In [None]:
test.resample(time='Y').mean()

In [None]:
tide_points_gdf.iloc[3057, 1:].plot()

In [None]:
one = gpd.read_file('tide_sync_test_1.geojson')
two = gpd.read_file('tide_sync_test_3.geojson')
three = gpd.read_file('tide_sync_test_3.geojson')

In [None]:


pd.concat([one, two, three]).to_file('tide_sync_test.geojson', driver='GeoJSON')

In [None]:
test[~test.ID.isna()].plot()

# .to_file('tide_sync_test.geojson', driver='GeoJSON')

In [None]:
import pickle            
            
with open(r"someobject.pickle", "wb") as output_file:
    pickle.dump(test, output_file)

***

## Additional information

**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). 
Digital Earth Australia data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.

**Contact:** If you need assistance, please post a question on the [Open Data Cube Slack channel](http://slack.opendatacube.org/) or on the [GIS Stack Exchange](https://gis.stackexchange.com/questions/ask?tags=open-data-cube) using the `open-data-cube` tag (you can view previously asked questions [here](https://gis.stackexchange.com/questions/tagged/open-data-cube)).
If you would like to report an issue with this notebook, you can file one on [Github](https://github.com/GeoscienceAustralia/dea-notebooks).

**Last modified:** October 2019

**Compatible datacube version:** 

In [None]:
# print(datacube.__version__)

## Tags
Browse all available tags on the DEA User Guide's [Tags Index](https://docs.dea.ga.gov.au/genindex.html)