# DE Africa Coastlines vector statistics <img align="right" src="https://github.com/digitalearthafrica/deafrica-sandbox-notebooks/raw/main/Supplementary_data/DE_Africa_Logo_Stacked_RGB_small.jpg">

This code conducts vector subpixel shoreline extraction for DE Africa Coastlines:

* Apply morphological extraction algorithms to mask annual median composite rasters to a valid coastal region
* Extract waterline vectors using subpixel waterline extraction ([Bishop-Taylor et al. 2019b](https://doi.org/10.3390/rs11242984))
* Compute rates of coastal change at every 30 m using linear regression

This is an interactive version of the code intended for prototyping; to run this analysis at scale, use the [command line tools](DEAfricaCoastlines_generation_CLI.ipynb).


---

## Getting started
Set working directory to top level of repo to ensure links work correctly:

In [2]:
cd ..

/home/jovyan/Robbi/deafrica-coastlines


### Load packages

First we import the required Python packages, then we connect to the database, and load the catalog of virtual products.

In [3]:
pip install -r requirements.in --quiet

Note: you may need to restart the kernel to use updated packages.


In [4]:
%matplotlib inline
%load_ext line_profiler
%load_ext autoreload
%autoreload 2

import os
import sys
import numpy as np
import pandas as pd
import geohash as gh
import geopandas as gpd
import matplotlib.pyplot as plt
from shapely.geometry import box
from rasterio.transform import array_bounds
from dea_tools.spatial import subpixel_contours

# Load DEA Coastlines code
import coastlines.raster
import coastlines.vector

# Hide Pandas warnings
pd.options.mode.chained_assignment = None



## Load in data

In [9]:
study_area = 1636
raster_version = "testing"
vector_version = "wms_update"
water_index = "mndwi"
index_threshold = 0.00
start_year = 2000
end_year = 2021
baseline_year = 2021

# Load analysis params from config file
config = coastlines.raster.load_config(
    config_path="configs/deafrica_coastlines_config.yaml"
)

## Load DEA Coastlines rasters

In [10]:
yearly_ds, gapfill_ds = coastlines.vector.load_rasters(
    path="data/interim/raster",
    raster_version=raster_version,
    study_area=study_area,
    water_index=water_index,
    start_year=start_year,
    end_year=end_year,
)
print(yearly_ds)

# Create output vector folder
output_dir = f"data/interim/vector/{vector_version}/{study_area}_{vector_version}"
os.makedirs(output_dir, exist_ok=True)

<xarray.Dataset>
Dimensions:  (year: 21, y: 1305, x: 1588)
Coordinates:
  * year     (year) int64 2000 2001 2002 2003 2004 ... 2016 2017 2018 2019 2020
  * y        (y) float64 9.354e+05 9.354e+05 9.354e+05 ... 8.963e+05 8.963e+05
  * x        (x) float64 3.76e+05 3.76e+05 3.76e+05 ... 4.235e+05 4.236e+05
Data variables:
    mndwi    (year, y, x) float32 -0.3865 -0.3842 -0.3749 ... -0.199 -0.2005
    ndwi     (year, y, x) float32 -0.2682 -0.2606 -0.2661 ... 0.02573 0.052
    tide_m   (year, y, x) float32 0.02468 0.02467 0.02466 ... -0.1192 -0.1192
    count    (year, y, x) int16 13 13 13 13 13 13 13 13 ... 12 12 12 12 13 12 12
    stdev    (year, y, x) float32 0.06428 0.0649 0.06764 ... 0.2598 0.2279
Attributes:
    transform:      | 30.00, 0.00, 375945.00|\n| 0.00,-30.00, 935445.00|\n| 0...
    crs:            +init=epsg:32639
    res:            (30.0, 30.0)
    is_tiled:       1
    nodatavals:     (nan,)
    scales:         (1.0,)
    offsets:        (0.0,)
    AREA_OR_POINT:  Area

## Load vector data

In [11]:
# Get bounding box to load data for
bbox = gpd.GeoSeries(
    box(
        *array_bounds(
            height=yearly_ds.sizes["y"],
            width=yearly_ds.sizes["x"],
            transform=yearly_ds.transform,
        )
    ),
    crs=yearly_ds.crs,
)

# Tide points
tide_points_gdf = gpd.read_file(config["Input files"]["points_path"], bbox=bbox).to_crs(
    yearly_ds.crs
)

# Study area polygon
gridcell_gdf = (
    gpd.read_file(config["Input files"]["grid_path"], bbox=bbox)
    .set_index("id")
    .to_crs(str(yearly_ds.crs))
)
gridcell_gdf.index = gridcell_gdf.index.astype(int).astype(str)
gridcell_gdf = gridcell_gdf.loc[[str(study_area)]]

# Coastal mask modifications
modifications_gdf = gpd.read_file(
    config["Input files"]["modifications_path"], bbox=bbox
).to_crs(str(yearly_ds.crs))

# Geomorphology dataset
geomorphology_gdf = gpd.read_file(
    config["Input files"]["geomorphology_path"], bbox=bbox
).to_crs(str(yearly_ds.crs))

# Region attribute dataset
region_gdf = gpd.read_file(
    config["Input files"]["region_attributes_path"], bbox=bbox
).to_crs(str(yearly_ds.crs))

## Extract annual shorelines

### Restrict annual shorelines to coastal study area

In [12]:
# Mask dataset to focus on coastal zone only
masked_ds, certainty_masks = coastlines.vector.contours_preprocess(
    yearly_ds,
    gapfill_ds,
    water_index,
    index_threshold,
    tide_points_gdf,
    buffer_pixels=33,
    mask_modifications=modifications_gdf,
)

In [13]:
# Extract contours
contours_gdf = subpixel_contours(
    da=masked_ds,
    z_values=index_threshold,
    min_vertices=10,
    dim="year",
    output_path=f"temp.geojson",
).set_index("year")

## Compute statistics
### Create stats points on baseline contour

In [14]:
# Extract statistics modelling points along baseline contour
points_gdf = coastlines.vector.points_on_line(contours_gdf, str(baseline_year), distance=30)

### Measure annual coastline movements

In [15]:
if points_gdf is not None and len(points_gdf) > 0:

    # Calculate annual movements and residual tide heights for every
    # contour compared to the baseline year
    points_gdf = coastlines.vector.annual_movements(
        points_gdf,
        contours_gdf,
        yearly_ds,
        str(baseline_year),
        water_index,
        max_valid_dist=5000,
    )

### Calculate regressions

In [16]:
if points_gdf is not None and len(points_gdf) > 0:

    # Apply regression function to each row in dataset
    points_gdf = coastlines.vector.calculate_regressions(points_gdf, contours_gdf)

# Add count and span of valid obs, Shoreline Change Envelope (SCE),
# Net Shoreline Movement (NSM) and Max/Min years
stats_list = ["valid_obs", "valid_span", "sce", "nsm", "max_year", "min_year"]
points_gdf[stats_list] = points_gdf.apply(
    lambda x: coastlines.vector.all_time_stats(x, initial_year=start_year), axis=1
)

### Add certainty flags
Add certainty column to flag points with:
- Likely rocky shorelines: Rates of change can be unreliable in areas with steep rocky/bedrock shorelines due to terrain shadow.
- Extreme rate of change value (> 200 m per year change) that is more likely to reflect modelling issues than real-world coastal change
- High angular variability: the nearest shorelines for each year do not fall on an approximate line, making rates of change invalid
- Insufficient observations: less than 15 valid annual shorelines, which make the resulting rates of change more likely to be inaccurate

In [17]:
points_gdf["certainty"] = "good"
points_gdf.loc[
    coastlines.vector.rocky_shoreline_flag(points_gdf, geomorphology_gdf), "certainty"
] = "likely rocky shoreline"
points_gdf.loc[
    points_gdf.rate_time.abs() > 200, "certainty"
] = "extreme value (> 200 m)"
points_gdf.loc[points_gdf.angle_std > 30, "certainty"] = "high angular variability"
points_gdf.loc[points_gdf.valid_obs < 15, "certainty"] = "insufficient observations"

### Add region attributes

In [18]:
points_gdf = coastlines.vector.region_atttributes(
    points_gdf, region_gdf, attribute_col="TERRITORY1", rename_col="country"
)

### Add geohash unique IDs

In [19]:
# Generate a geohash UID for each point and set as index
uids = points_gdf.geometry.to_crs("EPSG:4326").apply(
    lambda x: gh.encode(x.y, x.x, precision=10)
).rename('uid')
points_gdf = points_gdf.set_index(uids)

## Export files

### Export stats files

In [20]:
if points_gdf is not None:

    # Clip stats to study area extent
    points_gdf = points_gdf[points_gdf.intersects(gridcell_gdf.geometry.item())]

    # Set output path
    stats_path = (
        f"{output_dir}/ratesofchange_{study_area}_"
        f"{vector_version}_{water_index}_{index_threshold:.2f}"
    )

    # Export to GeoJSON
    points_gdf.to_crs("EPSG:4326").to_file(
        f"{stats_path}.geojson",
        driver="GeoJSON",
    )

    # Export as ESRI shapefiles
    points_gdf.to_file(
        f"{stats_path}.shp",
        schema={
            "properties": coastlines.vector.vector_schema(points_gdf),
            "geometry": "Point",
        },
    )

### Export annual shorelines

In [21]:
# Assign certainty to contours based on underlying masks
contours_gdf = coastlines.vector.contour_certainty(contours_gdf, certainty_masks)

# Add tide datum details (this supports future addition of extra tide datums)
contours_gdf["tide_datum"] = "0 m AMSL"

# Add region attributes
contours_gdf = coastlines.vector.region_atttributes(
    contours_gdf, region_gdf, attribute_col="TERRITORY1", rename_col="country"
)

# Set output path
contour_path = (
    f"{output_dir}/annualshorelines_{study_area}_{vector_version}_"
    f"{water_index}_{index_threshold:.2f}"
)

# Clip annual shoreline contours to study area extent
contours_gdf["geometry"] = contours_gdf.intersection(gridcell_gdf.geometry.item())

# Export to GeoJSON
contours_gdf.to_crs("EPSG:4326").to_file(f"{contour_path}.geojson", driver="GeoJSON")

# Export stats and contours as ESRI shapefiles
contours_gdf.to_file(
    f"{contour_path}.shp",
    schema={
        "properties": coastlines.vector.vector_schema(contours_gdf),
        "geometry": ["MultiLineString", "LineString"],
    },
)

***

## Additional information

**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). 
Digital Earth Africa data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.

**Contact:** For assistance with any of the Python code or Jupyter Notebooks in this repository, please post a [Github issue](https://github.com/GeoscienceAustralia/DEACoastLines/issues/new).

**Last modified:** September 2022