# DEA Coastlines summary plots

In [None]:
cd ../'Tools'

In [None]:
import pandas as pd
import numpy as np
import geopandas as gpd
import matplotlib.pyplot as plt

from sklearn.linear_model import LinearRegression
from dea_tools.coastal import get_coastlines

from scipy.stats import ttest_ind
from scipy.stats import ttest_rel

## Load data

In [None]:
# Set study area from vector file

## Use regions_gdf for mainland polygons
regions_gdf = (
    gpd.read_file(
        # "https://data.dea.ga.gov.au/derivative/dea_coastlines/supplementary/australia_mainland_regions.geojson"
        # '/home/jovyan/dev/dea-notebooks/Testing/Buffered_GBR_mngt_areas.shp' ## Management area polygons
        # '/home/jovyan/dev/dea-notebooks/Testing/Manual_all_GBR_Coastlines.shp' ## Greater GBR extent
        # '/home/jovyan/dev/dea-notebooks/Testing/Aus_bbox.shp' ## 
        # '/home/jovyan/dev/dea-notebooks/Testing/mainland_buffered_edited_mngt_regions_incl_allGBR.shp' ## all mainland only extents
        '/home/jovyan/dev/dea-notebooks/Testing/All_GBR_merged.shp' ## all GBR shapefiles
    )
    # .set_index("Descriptio")
    .set_index("layer")
    # .set_index('id')
)
# regions_gdf

In [None]:
print(regions_gdf.index)
# print(regions_gdf_offshore.index)

In [None]:
study_area = regions_gdf.loc[['All_MackCap']]
# study_area_offshore = regions_gdf_offshore.loc[['All_GBR_offshore']]
# study_area = regions_gdf.loc[["1"]]
# study_area = regions_gdf

In [None]:
study_area.geometry[0]

In [None]:
# study_area_offshore.geometry[0]

In [None]:
# Load data from WFS for study area bounding box
bbox = study_area.geometry.bounds.values[0]
# bbox = study_area.geometry.bounds#.values[0]
ratesofchange_gdf = get_coastlines(
    bbox=bbox, layer="rates_of_change"
)

# Clip returned data to polygon extent
ratesofchange_gdf = gpd.clip(ratesofchange_gdf, mask=study_area.to_crs("EPSG:3577"))
ratesofchange_gdf.head()

In [None]:
# # ## Temp - bring in csv version of Australian ratesofchange_gdf and clip to management boundaries for plotting

# from shapely import wkt

# allAus = pd.read_csv("/home/jovyan/dev/dea-notebooks/Testing/All_Aust_16-10-2023.csv")

# allAus['geometry'] = allAus['geometry'].apply(wkt.loads)
# allAus_gdf = gpd.GeoDataFrame(allAus, crs = 'epsg:3577')
# # allAus_gdf.head(2)

# ratesofchange_gdf = gpd.clip(allAus_gdf, mask=study_area.to_crs("EPSG:3577"))

# ratesofchange_gdf.head()

In [None]:
## Temp - bring in csv version of Australian ratesofchange_gdf and clip to management boundaries for plotting
# gpd.read_file("/home/jovyan/dev/dea-notebooks/Testing/All_Aust_16-10-2023.csv")

# Reading only geometries intersecting ``mask``:

# >>> df = geopandas.read_file("nybb.shp", mask=polygon)  # doctest: +SKIP


# gdf_mask = gpd.read_file("/home/jovyan/dev/dea-notebooks/Testing/All_Aust_16-10-2023.csv", crs = 'epsg:4326')
# regions_gdf = (gpd.read_file('/home/jovyan/dev/dea-notebooks/Testing/All_GBR_merged.shp').set_index("layer"))

# allAus_gdf = allAus_gdf.to_crs(crs = 'epsg:4326')
# regions_gdf = regions_gdf.to_crs(crs = 'epsg:4326')

# gpd.read_file(allAus_gdf, mask=regions_gdf.loc[['Mainland_CairnsCook']])
# mask = regions_gdf.loc[['Australia']].geometry#.boundary
# allAus_gdf.to_crs("EPSG:3577").clip(mask)

In [None]:
# mask_gdf = gpd.GeoDataFrame([1], geometry=mask, crs = allAus_gdf.crs)
# mask_gdf


In [None]:
## Epoch based change
### NOTE: there are three places in this code cell where epoch dates need to be adjusted

## Create a linear regression object
linear_regressor = LinearRegression()

## Set the dates (add 1 to your end date)
x=np.arange(2019, 2023, 1).reshape(-1,1) 
# x=np.arange(2019, 2020, 1).reshape(-1,1) 

## Define the function to take each row of the gdf and calculate the linear regression 
## coefficient for the time period of interest
def epoch_rate_time(x,y,linear_regressor):

    try:
        # If Nans in 'y', remove them and their equivalent indexed values from 'x'
        if pd.isnull(y).any() == True:
            x= np.delete(x, np.argwhere(y.values!=y.values).flatten(order='C')).reshape(-1,1)
            y= y.dropna().values
        else:
            y = y.values
        # Calculate the linear regression
        linear_regressor.fit(x,y)
        # Return only the coefficient into the gdf
        return linear_regressor.coef_[0]
    except:
        # print(f'An exception error occurred at {y.id}')
        print(f'An exception error occurred at {y}. Check that x and y the same length')

### rename gdf column to match input dates and adjust slices to match exact input and output years
ratesofchange_gdf['2019-2022_epoch_rate_time'] = ratesofchange_gdf.apply(lambda y: epoch_rate_time(x,       
                                                                                                     y.loc['dist_2019':'dist_2022'],  
                                                                                                     linear_regressor), axis=1)
ratesofchange_gdf.head()

## Data prep

In [None]:
# Optional: Keep only rates of change points with "good" certainty 
# (i.e. no poor quality flags)
ratesofchange_gdf = ratesofchange_gdf.query("certainty == 'good'")

# Optional: Apply correction factor from Bishop-Taylor et al. 2021
ratesofchange_gdf.rate_time += 0.08

# ratesofchange_gdf['2014-2022_epoch_rate_time']+= 0.08
# ratesofchange_gdf['2014-2017_epoch_rate_time']+= 0.08
# ratesofchange_gdf['1988-2022_epoch_rate_time']+= 0.08
# ratesofchange_gdf['2014-2018_epoch_rate_time']+= 0.08

ratesofchange_gdf['2019-2022_epoch_rate_time']+= 0.08
# ratesofchange_gdf['1988-2019_epoch_rate_time']+= 0.08

# Add x and y coords to data
ratesofchange_gdf["y_coord"] = ratesofchange_gdf.geometry.y
ratesofchange_gdf["x_coord"] = ratesofchange_gdf.geometry.x

In [None]:
# Replace with 0 all rates with 'sig_time' > 0.01

# Make a copy of the column of interest
ratesofchange_gdf['2019-2022_epoch_rate_time_sig_filtered'] = ratesofchange_gdf['2019-2022_epoch_rate_time']

## Replace with 0 all values where sig_time > 0.01
ratesofchange_gdf.loc[ratesofchange_gdf['sig_time'] > 0.01, '2019-2022_epoch_rate_time_sig_filtered'] = 0
# ratesofchange_gdf[['sig_time', '2019-2022_epoch_rate_time','2019-2022_epoch_rate_time_sig_filtered']] 

## Repeat for all-time change rates
ratesofchange_gdf['rate_time_sig_filtered'] = ratesofchange_gdf['rate_time']

## Replace with 0 all values where sig_time > 0.01
ratesofchange_gdf.loc[ratesofchange_gdf['sig_time'] > 0.01, 'rate_time_sig_filtered'] = 0
ratesofchange_gdf[['sig_time', 'rate_time','rate_time_sig_filtered']] 

In [None]:
# Resample to make sure we have evenly spaced rows (important for sensible rolling mean)
bin_size = 30
min_coord = ratesofchange_gdf["y_coord"].min()
max_coord = ratesofchange_gdf["y_coord"].max()
bin_edges = np.arange(min_coord, max_coord, bin_size)
groups = pd.cut(
    ratesofchange_gdf["y_coord"],
    bins=bin_edges,
    labels=bin_edges[:-1] + (bin_size / 2),
    right=False,
)
# ratesofchange_standardised_gdf = ratesofchange_gdf.groupby(groups).mean() ## Original code
ratesofchange_standardised_gdf = ratesofchange_gdf.groupby(groups).mean() ## TEMPORARY CHANGE FOR TESTING 21/09/23 CP

# Set index to numeric so we can plot it nicely
ratesofchange_standardised_gdf.index = pd.to_numeric(
    ratesofchange_standardised_gdf.index
)

## Rolling mean

In [None]:
# Apply rolling median
window_size = 15000  # km
window_n = int(window_size / 30)
ratesofchange_rolling_gdf = ratesofchange_standardised_gdf.rolling(
    window=window_n, center=True, min_periods=1
).mean()

# Apply an additional level of aesthetic smoothing (can be removed)
ratesofchange_rolling_gdf = ratesofchange_rolling_gdf.rolling(
    window=window_n, center=True, min_periods=1
).mean()

## Plotting

In [None]:
## Set the fontsize for the plot
plt.rcParams.update({'font.size': 12})

In [None]:
## All time plotting

# Set first and last entry to zero so we get clean graph outlines
ratesofchange_rolling_gdf["rate_time_sig_filtered"].iloc[0] = 0
ratesofchange_rolling_gdf["rate_time_sig_filtered"].iloc[-1] = 0

# Split out positive and negative rates so we can plot them individually
ratesofchange_positive_gdf = ratesofchange_rolling_gdf["rate_time_sig_filtered"].clip(0, np.inf)
ratesofchange_negative_gdf = ratesofchange_rolling_gdf["rate_time_sig_filtered"].clip(-np.inf, 0)

# Plot negative rates in red, positive in blue
fig, ax = plt.subplots(figsize=(2, 11.5))#7))#1.75))#5))#7))
ax.fill_betweenx(
    ratesofchange_positive_gdf.index, 0, ratesofchange_positive_gdf, color="#6caed1"
)
ax.fill_betweenx(
    ratesofchange_negative_gdf.index, ratesofchange_negative_gdf, 0, color="#eb7668"
)
ax.plot(
    ratesofchange_rolling_gdf["rate_time_sig_filtered"],
    ratesofchange_rolling_gdf.index,
    color="black",
    linewidth=1
)

# Add vertical axis line
ax.axvline(0, color="black", linewidth=1)

# Styling
ax.set_xlim(-1, 1)
ax.set_ylim(min_coord, max_coord)
ax.set_xticks(ticks=[-3, 0, 3])
ax.set_xlabel("(m / year between \n 1988 and 2022)")
ax.spines[["top", "left", "right"]].set_visible(False)
ax.tick_params(top=False, left=False, labelleft=False, labeltop=False)
# ax.text


# Export
fig.savefig(f"/home/jovyan/dev/dea-notebooks/Testing/{study_area.index[0]}_1988_2022_latsummary.svg", bbox_inches="tight", transparent=True)

In [None]:
## Epoch plotting (without outlier removal - unlike original rate_time method)

# Set first and last entry to zero so we get clean graph outlines
ratesofchange_rolling_gdf['2019-2022_epoch_rate_time_sig_filtered'].iloc[0] = 0
ratesofchange_rolling_gdf['2019-2022_epoch_rate_time_sig_filtered'].iloc[-1] = 0

# Split out positive and negative rates so we can plot them individually
ratesofchange_positive_gdf = ratesofchange_rolling_gdf['2019-2022_epoch_rate_time_sig_filtered'].clip(0, np.inf)
ratesofchange_negative_gdf = ratesofchange_rolling_gdf['2019-2022_epoch_rate_time_sig_filtered'].clip(-np.inf, 0)

# Plot negative rates in red, positive in blue
fig, ax = plt.subplots(figsize=(2,11.5))#1.5, 7))#5))#7))
ax.fill_betweenx(
    ratesofchange_positive_gdf.index, 0, ratesofchange_positive_gdf, color="#6caed1"
)
ax.fill_betweenx(
    ratesofchange_negative_gdf.index, ratesofchange_negative_gdf, 0, color="#eb7668"
)
ax.plot(
    ratesofchange_rolling_gdf['2019-2022_epoch_rate_time_sig_filtered'],
    ratesofchange_rolling_gdf.index,
    color="black",
    linewidth=1,
)

# Add vertical axis line
ax.axvline(0, color="black", linewidth=1)

# Styling
ax.set_xlim(-1, 1)
ax.set_ylim(min_coord, max_coord)
ax.set_xticks(ticks=[-3, 0, 3])
ax.set_xlabel("(m / year between \n 2019 and 2022)")
ax.spines[["top", "left", "right"]].set_visible(False)
ax.tick_params(top=False, left=False, labelleft=False, labeltop=False)

# Export
fig.savefig(f"/home/jovyan/dev/dea-notebooks/Testing/{study_area.index[0]}_2019_2022_latsummary.svg", bbox_inches="tight", transparent=True)

In [None]:
# ratesofchange_gdf.to_csv("/home/jovyan/dev/dea-notebooks/Testing/DEACoastlines_ratesofchange_GBR_mngt_area_all.csv")

## Tabular summaries

In [None]:
region = 'GBR_mainland'

In [None]:
study_area.index[0]

In [None]:
## Baseline comparison stats for mainland polygons
v1 = ratesofchange_gdf['rate_time_sig_filtered'].values
v2 = ratesofchange_gdf['2019-2022_epoch_rate_time_sig_filtered'].values

rel = ttest_rel(v1,v2)
rel

In [None]:
ratetimemean = ratesofchange_gdf['rate_time_sig_filtered'].mean().round(2)
ratetimestd = round(ratesofchange_gdf['rate_time_sig_filtered'].std(),2)
epochmean = ratesofchange_gdf['2019-2022_epoch_rate_time_sig_filtered'].mean().round(2)
epochstd = round(ratesofchange_gdf['2019-2022_epoch_rate_time_sig_filtered'].std(),2)

# Append new row to dataframe
StatSummary.loc[len(StatSummary.index)] =          [f'{ratetimemean} ({ratetimestd})', 
                                                   f'{epochmean} ({epochstd})',
                                                   rel.pvalue.round(2),
                                                   round(rel.statistic,2),
                                                   rel.df]
## Update index label
StatSummary.rename(index={StatSummary.loc[len(StatSummary.index)-1].name:region},inplace=True)

In [None]:
region = 'GBR_offshore'

In [None]:
## Baseline comparison stats for offshore polygons
v1 = ratesofchange_gdf_offshore['rate_time_sig_filtered'].values
v2 = ratesofchange_gdf_offshore['2019-2022_epoch_rate_time_sig_filtered'].values

rel = ttest_rel(v1,v2)
rel


In [None]:

ratetimemean = ratesofchange_gdf_offshore['rate_time_sig_filtered'].mean().round(2)
ratetimestd = round(ratesofchange_gdf_offshore['rate_time_sig_filtered'].std(),2)
epochmean = ratesofchange_gdf_offshore['2019-2022_epoch_rate_time_sig_filtered'].mean().round(2)
epochstd = round(ratesofchange_gdf_offshore['2019-2022_epoch_rate_time_sig_filtered'].std(),2)

## Run once as the master dataframe
# data = {
#         '1988-2022':f'{ratetimemean} ({ratetimestd})', 
#         '2019-2022':f'{epochmean} ({epochstd})',
#         'pvalue (1988-2022 vs 2019-2022)':rel.pvalue,
#         'tstat (1988-2022 vs 2019-2022)':round(rel.statistic,2),
#         'df (1988-2022 vs 2019-2022)':rel.df
#         }
# StatSummary = pd.DataFrame(data, index=[region]) 

# Append new row to dataframe
StatSummary.loc[len(StatSummary.index)] =          [f'{ratetimemean} ({ratetimestd})', 
                                                   f'{epochmean} ({epochstd})',
                                                   round(rel.pvalue,2),
                                                   round(rel.statistic,2),
                                                   rel.df
                                                  ]
                                          
## Update index label
StatSummary.rename(index={StatSummary.loc[len(StatSummary.index)-1].name:region},inplace=True)
StatSummary

In [None]:
StatSummary.to_csv("/home/jovyan/dev/dea-notebooks/Testing/StatSummary.csv")

In [None]:
region = 'GBR'

In [None]:
v1 = ratesofchange_gdf['rate_time_sig_filtered'].values
v2 = ratesofchange_gdf_offshore['rate_time_sig_filtered'].values

rel = ttest_ind(v1,v2,equal_var=False)
rel

In [None]:
v1 = ratesofchange_gdf['2019-2022_epoch_rate_time_sig_filtered'].values
v2 = ratesofchange_gdf_offshore['2019-2022_epoch_rate_time_sig_filtered'].values

rel2 = ttest_ind(v1,v2,equal_var=False)
rel2

In [None]:
## Capture statistical significance in onshore vs offshore regions

## Run once as master dataframe
# data = {
#         'Onshore vs Offshore (1988-2022) tstat': rel.statistic.round(2),
#         'Onshore vs Offshore (1988-2022) pvalue': rel.pvalue.round(2),
#         'Onshore vs Offshore (2019-2022) tstat': rel2.statistic.round(2),
#         'Onshore vs Offshore (2019-2022) pvalue': rel2.pvalue.round(2)
#         }
# OnOffshoreSummary = pd.DataFrame(data, index=[region])

## Append new row to dataframe
OnOffshoreSummary.loc[len(OnOffshoreSummary.index)] = [rel.statistic.round(2),
                                                       rel.pvalue.round(2),
                                                       rel2.statistic.round(2),
                                                       rel2.pvalue.round(2)]
OnOffshoreSummary.rename(index={OnOffshoreSummary.loc[len(OnOffshoreSummary.index)-1].name:region}, inplace=True)
                                                       


In [None]:
OnOffshoreSummary

In [None]:
OnOffshoreSummary.to_csv("/home/jovyan/dev/dea-notebooks/Testing/OnOffshoreSummary.csv")

# After: https://gist.github.com/robbibt/760dcf367be4b98c493e70dd577aca6a

In [None]:
def change_summary(df, sig=0.01, rate=0.30, bias=0.08):

    # Create booleans indicating whether points were significant
    # or greatert than the minimum accuracy of the method
    sig_bool = df.sig_time <= sig
    # rate_bool = (df.rate_time + bias).abs() >= rate #CP removed as the bias correction has already been applied
    rate_bool = (df.rate_time).abs() >= rate

    # Calculate dynamic % (sig points greater than min rate)
    stat_dict = {}
    stat_dict['dynamic'] = (sig_bool & rate_bool).mean()

    # Calculate stable % (non-sig points or less than min rate)
    stat_dict['stable'] = 1.0 - stat_dict['dynamic']

    # For each rate of change categoru, calculate percent greater
    # (prograding) or percent smaller (eroding coasts)

    for rate_cat in [0.0, 0.5, 1.0, 3.0, 5.0]:
        stat_dict[f'eroding_{rate_cat}'] = (
            # sig_bool & rate_bool & (df.rate_time + bias < -rate_cat)).mean() #CP removed as the bias correction has already been applied
            sig_bool & rate_bool & (df.rate_time < -rate_cat)).mean()
        stat_dict[f'prograd_{rate_cat}'] = (
            # sig_bool & rate_bool & (df.rate_time + bias > rate_cat)).mean() #CP removed as the bias correction has already been applied
            sig_bool & rate_bool & (df.rate_time > rate_cat)).mean()

    return pd.Series(stat_dict)

In [None]:
# Load point data and Coastal Compartment regions data
# coastlines_data = gpd.read_file('../releases/DEACoastlines_v1.0.0/Shapefile/DEACoastlines_ratesofchange_v1.0.0.shp')[['rate_time', 'sig_time', 'geometry']]
coastlines_data = ratesofchange_gdf[['1988-2019_epoch_rate_time', 'sig_time', 'geometry']]
coastlines_data = coastlines_data.rename(columns={'1988-2019_epoch_rate_time' : 'rate_time'})
# acsc_regions = gpd.read_file('/g/data/r78/DEACoastlines/input_data/Coastal_compartments/Shapefiles_v3/Regions.shp').dissolve('Descriptio')
acsc_regions = regions_gdf.dissolve('layer')


In [None]:
# Join coastal compartments to points
from geopandas import gpd 
coastlines_joined = gpd.sjoin(coastlines_data, acsc_regions.to_crs('EPSG:3577'), op='within') 

In [None]:
regions_gdf.index

Create continental summary change table

In [None]:
# Compute summaries of change for all regions
summary_df = coastlines_joined.groupby(['index_right']).apply(lambda x: change_summary(x)).T
summary_df['Continental'] = change_summary(df=coastlines_joined, sig=0.01)

# # Sort into pretty format
summary_df = summary_df.loc[[
    'dynamic', 'stable', 'eroding_0.0', 'eroding_0.5', 'eroding_1.0',
    'eroding_3.0', 'eroding_5.0', 'prograd_0.0', 'prograd_0.5', 'prograd_1.0',
    'prograd_3.0', 'prograd_5.0'
], [
    'Australia', 'Mainland_CairnsCook', 'Mainland_FarNorth', 'Mainland_GBR',
       'Mainland_MackCap', 'Mainland_TownsWhit', 'All_GBR',
       'Offshore_CairnsCook', 'Offshore_FarNorth', 'Offshore_GBR',
       'Offshore_MackCap', 'Offshore_TownsWhit', 'All_FarNorth',
       'All_CairnsCook', 'All_TownsWhit', 'All_MackCap'
]]

# Rename index
summary_df.index = ['Dynamic', 'Stable', 
                    'Eroding',     
                    '    > 0.5 m / year', '    > 1.0 m / year', 
                    '    > 3.0 m / year', '    > 5.0 m / year', 
                    'Prograding', 
                    '    > 0.5 m / year', '    > 1.0 m / year', 
                    '    > 3.0 m / year', '    > 5.0 m / year']

# Scale and round
(summary_df * 100).round(2)

In [None]:
summary_df_round = (summary_df * 100).round(2)
Epoch_1988_2019_summary = summary_df_round[['Australia', 
                                      'All_GBR', 'Mainland_GBR', 'Offshore_GBR',
                                      'All_FarNorth','Mainland_FarNorth','Offshore_FarNorth', 
                                      'All_CairnsCook','Mainland_CairnsCook', 'Offshore_CairnsCook',
                                      'All_TownsWhit','Mainland_TownsWhit','Offshore_TownsWhit',
                                      'All_MackCap', 'Mainland_MackCap', 'Offshore_MackCap']]


In [None]:
Epoch_1988_2019_summary

In [None]:
regions_gdf.index

In [None]:
All_time_summary.to_csv("/home/jovyan/dev/dea-notebooks/Testing/All_time_perc_summary.csv")

In [None]:
Epoch_1988_2019_summary.to_csv("/home/jovyan/dev/dea-notebooks/Testing/Epoch_1988_2019_perc_summary.csv")

In [None]:
ratesofchange_gdf.to_csv("/home/jovyan/dev/dea-notebooks/Testing/All_Aust_16-10-2023.csv")