# MAHTS stats

## Background


## Description



## Getting started


### Load packages

First we import the required Python packages, then we connect to the database, and load the catalog of virtual products.

In [41]:
%matplotlib inline
%load_ext line_profiler
%load_ext autoreload
%autoreload 2

# import os
# import sys
# import otps
# import datacube
# import shapely.wkt
import numpy as np
# import pandas as pd
import xarray as xr
# import geopandas as gpd
# import matplotlib.pyplot as plt
# from skimage import measure
# from skimage.morphology import disk
# from skimage.morphology import square
# from skimage.morphology import binary_dilation
# from datacube.helpers import write_geotiff
# from datacube.virtual import catalog_from_file

# sys.path.append('../Scripts')
# from dea_plotting import rgb
# from dea_plotting import map_shapefile
# from dea_plotting import display_map
# from dea_spatialtools import interpolate_2d
# from dea_spatialtools import subpixel_contours
# from dea_spatialtools import largest_region

from scipy import stats
import pandas as pd

def change_regress(row, x_vals, x_labels, std_dev=3):
    
    # Extract x (time) and y (distance) values
    x = x_vals
    y = row.values[1:].astype(np.float)
    
    # Drop NAN rows
    xy_df = np.vstack([x, y]).T
    is_valid = ~np.isnan(xy_df).any(axis=1)
    xy_df = xy_df[is_valid]
    valid_labels = x_labels[is_valid]
    
    # Remove outliers
    outlier_bool = (np.abs(stats.zscore(xy_df)) < float(std_dev)).all(axis=1)
    xy_df = xy_df[outlier_bool]
        
    # Compute linear regression
    lin_reg = stats.linregress(x=xy_df[:,0], 
                               y=xy_df[:,1])
    
    # Return slope, p-values and list of outlier years excluded from regression   
    return pd.Series({'slope': np.round(lin_reg.slope, 2), 
                      'pvalue': np.round(lin_reg.pvalue, 3),
                      'outliers': str(valid_labels[~outlier_bool]).replace('[', '').replace(']', '')})


# This will speed up loading data
import datacube.utils.rio
datacube.utils.rio.configure_s3_access(aws_unsigned=True)


The line_profiler extension is already loaded. To reload it, use:
  %reload_ext line_profiler
The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [61]:
import fiona
from shapely.geometry import mapping
from shapely.ops import nearest_points
from shapely.geometry import Point, LineString, MultiPoint
import geopandas as gpd

# Read in contours
study_area = 'goldcoast_overlap'
water_index = 'mndwi'
index_threshold = '0.00'

# Get array of water index values for baseline time period 
baseline_array = xr.open_rasterio(filename=f'output_data/{study_area}/{water_index}_1990.tif').squeeze(dim='band')

# Import contours and project to local CRS
contours_gdf = gpd.read_file(f'output_data/{study_area}/contours_{water_index}_{index_threshold}.geojson').to_crs(baseline_array.crs).set_index('year')

# Set annual shoreline to use as a baseline
baseline_year = '1990'
baseline_contour = contours_gdf.loc[[baseline_year]].geometry

# Generate points along line and convert to geopandas.GeoDataFrame
points_line = [baseline_contour.iloc[0].interpolate(i) 
               for i in range(0, int(baseline_contour.length), 30)]
points_gdf = gpd.GeoDataFrame(geometry=points_line, crs=baseline_array.crs)

# Copy geometry to baseline point
points_gdf['p_baseline'] = points_gdf.geometry
baseline_x_vals = points_gdf.geometry.x
baseline_y_vals = points_gdf.geometry.y


In [62]:
# Iterate through all comparison years in contour gdf
for comp_year in contours_gdf.index.unique().values:

    print(comp_year)

    # Set comparison contour
    comp_contour = contours_gdf.loc[[comp_year]].geometry.iloc[0]

    # Find nearest point on comparison contour
    points_gdf[f'p_{comp_year}'] = points_gdf.apply(lambda x: 
                                                    nearest_points(x.p_baseline, comp_contour)[1], axis=1)

    # Compute distance between baseline and comparison year points
    points_gdf[f'{comp_year}'] = points_gdf.apply(lambda x: 
                                                  x.geometry.distance(x[f'p_{comp_year}']), axis=1)

    # Extract comparison array
    comp_array = xr.open_rasterio(filename=f'output_data/{study_area}/{water_index}_{comp_year}.tif').squeeze(dim='band')

    # Convert baseline and comparison year points to geoseries to allow easy access to x and y coords
    comp_x_vals = gpd.GeoSeries(points_gdf[f'p_{comp_year}']).x
    comp_y_vals = gpd.GeoSeries(points_gdf[f'p_{comp_year}']).y

    # Sample NDWI values from arrays based on baseline and comparison points
    baseline_x_vals = xr.DataArray(baseline_x_vals, dims='z')
    baseline_y_vals = xr.DataArray(baseline_y_vals, dims='z')
    comp_x_vals = xr.DataArray(comp_x_vals, dims='z')
    comp_y_vals = xr.DataArray(comp_y_vals, dims='z')   
    points_gdf['index_comp_p1'] = comp_array.interp(x=baseline_x_vals, y=baseline_y_vals)
    points_gdf['index_baseline_p2'] = baseline_array.interp(x=comp_x_vals, y=comp_y_vals)

    # Compute directionality of change (negative = erosion, positive = accretion)    
    points_gdf['loss_gain'] = (points_gdf.index_baseline_p2 > points_gdf.index_comp_p1).astype(int).replace(to_replace=0, value=-1)
    points_gdf[f'{comp_year}'] = points_gdf[f'{comp_year}'] * points_gdf.loss_gain

# Keep required columns
points_gdf = points_gdf[['geometry'] + contours_gdf.index.unique().values.tolist()]
points_gdf = points_gdf.round(2)

# # Identify dates for regression
x_years = np.array([int(i[:4]) for i in points_gdf.columns[1:]])

# # Identify SOI values for regression
# soi_df = pd.read_csv('/g/data/r78/rt1527/dea-notebooks/Waterline_extraction/raw_data/SOI_EastAnglia.txt', 
#                      sep='\t', skiprows=1, usecols=['year', 'annual average'], index_col='year')
# soi_df = soi_df.rename({'annual average': 'annual_SOI'}, axis=1)
# x_soi = soi_df.loc[x_years].annual_SOI.values

# # Identify La Nina / El Nino years
# lan_eln = np.array(['ElN', 'LaN', 'LaN', 'na', 'ElN', 'ElN', 'ElN', 'ElN', 'na', 'na', 'ElN', 
#                     'LaN', 'LaN', 'LaN', 'na', 'ElN', 'na', 'na', 'na', 'ElN', 'LaN', 'LaN', 
#                     'ElN', 'LaN', 'LaN', 'na', 'na', 'na', 'ElN', 'na', 'na', 'na'])
# lan_eln_df = pd.DataFrame({'year': range(1987, 2019), 'lan_eln': lan_eln}).set_index('year')
# x_lan_eln = lan_eln_df.loc[x_years].lan_eln.values

# # Get custom x values
# x_neg = np.where(x_soi >= 0, x_soi, np.nan)
# x_pos = np.where(x_soi <= 0, x_soi, np.nan)
# x_lan = np.where(x_lan_eln == 'LaN', x_soi, np.nan)
# x_eln = np.where(x_lan_eln == 'ElN', x_soi, np.nan)

# # Compute change rates
rate_out = points_gdf.apply(lambda x: change_regress(x, x_vals = x_years, x_labels = x_years, std_dev=3), axis=1)
# soi_out = points_gdf.apply(lambda x: change_regress(x, x_vals = x_soi, x_labels = x_years, std_dev=3), axis=1)
# neg_out = points_gdf.apply(lambda x: change_regress(x, x_vals = x_neg, x_labels = x_years, std_dev=3), axis=1)
# pos_out = points_gdf.apply(lambda x: change_regress(x, x_vals = x_pos, x_labels = x_years, std_dev=3), axis=1)
# eln_out = points_gdf.apply(lambda x: change_regress(x, x_vals = x_eln, x_labels = x_years, std_dev=3), axis=1)
# lan_out = points_gdf.apply(lambda x: change_regress(x, x_vals = x_lan, x_labels = x_years, std_dev=3), axis=1)
points_gdf[['mov_rate', 'mov_sig', 'mov_outl']] = rate_out
# points_gdf[['soi_rate', 'soi_sig', 'soi_outl']] = soi_out
# points_gdf[['neg_rate', 'neg_sig', 'neg_outl']] = neg_out
# points_gdf[['pos_rate', 'pos_sig', 'pos_outl']] = pos_out
# points_gdf[['eln_rate', 'eln_sig', 'eln_outl']] = eln_out
# points_gdf[['lan_rate', 'lan_sig', 'lan_outl']] = lan_out

# # Set insignificant rates to nan
points_gdf.loc[points_gdf.mov_sig > 0.05, 'mov_rate'] = np.nan
# points_gdf.loc[points_gdf.soi_sig > 0.05, 'soi_rate'] = np.nan
# points_gdf.loc[points_gdf.neg_sig > 0.05, 'neg_rate'] = np.nan
# points_gdf.loc[points_gdf.pos_sig > 0.05, 'pos_rate'] = np.nan
# points_gdf.loc[points_gdf.eln_sig > 0.05, 'eln_rate'] = np.nan
# points_gdf.loc[points_gdf.lan_sig > 0.05, 'lan_rate'] = np.nan

# # Set CRS
points_gdf.crs = baseline_array.crs

# # Sort by descending absolute value and export
# points_gdf.reindex(points_gdf.mov_rate.abs().sort_values().index).to_file(baseline_points_shp)

1987
1988
1989
1990
1991
1992
1993
1994
1995
1996
1997
1998
1999
2000
2001
2002
2003
2004
2005
2006
2007
2008
2009
2010
2011
2012
2013
2014
2015
2016
2017
2018


In [63]:
points_gdf.reindex(points_gdf.mov_rate.abs().sort_values().index).to_file(f'output_data/{study_area}/stats_{water_index}_{index_threshold}.geojson', 
                                                           driver='GeoJSON')

In [None]:
comp_year

***

## Additional information

**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). 
Digital Earth Australia data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.

**Contact:** If you need assistance, please post a question on the [Open Data Cube Slack channel](http://slack.opendatacube.org/) or on the [GIS Stack Exchange](https://gis.stackexchange.com/questions/ask?tags=open-data-cube) using the `open-data-cube` tag (you can view previously asked questions [here](https://gis.stackexchange.com/questions/tagged/open-data-cube)).
If you would like to report an issue with this notebook, you can file one on [Github](https://github.com/GeoscienceAustralia/dea-notebooks).

**Last modified:** October 2019

**Compatible datacube version:** 

In [None]:
print(datacube.__version__)

## Tags
Browse all available tags on the DEA User Guide's [Tags Index](https://docs.dea.ga.gov.au/genindex.html)