# DE Africa Coastlines continental hotspots <img align="right" src="https://github.com/digitalearthafrica/deafrica-sandbox-notebooks/raw/main/Supplementary_data/DE_Africa_Logo_Stacked_RGB_small.jpg">

This code combines individual datasets into continental DE Africa Coastlines layers:
* Combines output shorelines and rates of change statistics point vectors into single continental datasets
* Aggregates this data to produce moving window hotspot datasets that summarise coastal change at regional and continental scale.

This is an interactive version of the code intended for prototyping; to run this analysis at scale, use the [command line tools](DEAfricaCoastlines_generation_CLI.ipynb).


---

## Getting started
Set working directory to top level of repo to ensure links work correctly:

In [1]:
cd ..

/home/jovyan/Robbi/deafrica-coastlines


### Load packages

First we import the required Python packages, then we connect to the database, and load the catalog of virtual products.

In [2]:
pip install -r requirements.in --quiet

Note: you may need to restart the kernel to use updated packages.


In [4]:
%matplotlib inline
%load_ext line_profiler
%load_ext autoreload
%autoreload 2

import os

import numpy as np
import pandas as pd
import geohash as gh
import geopandas as gpd
from pathlib import Path

from coastlines.utils import STYLES_FILE
from coastlines.continental import wms_fields
from coastlines.vector import points_on_line, change_regress, vector_schema



## Load in data

In [5]:
vector_version = "wms_update"
continental_version = "wms_update"
baseline_year = 2021
hotspots_radius = [15000, 5000, 1000]


## Make output directory and identify files to load

In [18]:
# Make output directory 
output_dir = Path(f"data/processed/{continental_version}")
output_dir.mkdir(exist_ok=True, parents=True)

# Setup input and output file paths
shoreline_paths = (
    f"data/interim/vector/{vector_version}/*/" f"annualshorelines*.shp"
)
ratesofchange_paths = (
    f"data/interim/vector/{vector_version}/*/" f"ratesofchange*.shp"
)

# Output path for geopackage and zipped shapefiles
OUTPUT_GPKG = output_dir / f"deafricacoastlines_{continental_version}.gpkg"
OUTPUT_SHPS = output_dir / f"deafricacoastlines_{continental_version}.shp.zip"

# If shapefile zip exists, delete it first
if OUTPUT_SHPS.exists():
    OUTPUT_SHPS.unlink()    

## Combine data
### Shorelines

In [7]:
os.system(
    f"ogrmerge.py -o "
    f"{OUTPUT_GPKG} {shoreline_paths} "
    f"-single -overwrite_ds -t_srs epsg:6933 "
    f"-nln shorelines_annual"
)



0

### Rate of change points

In [8]:
os.system(
    f"ogrmerge.py "
    f"-o {OUTPUT_GPKG} {ratesofchange_paths} "
    f"-single -update -t_srs epsg:6933 "
    f"-nln rates_of_change"
)

0

## Continental hotspots
### Prepare data

In [9]:
# Load continental rates of change data
ratesofchange_gdf = gpd.read_file(OUTPUT_GPKG, layer="rates_of_change").set_index('uid')

# Load continental shorelines data
shorelines_gdf = gpd.read_file(OUTPUT_GPKG, layer="shorelines_annual").set_index('year')
shorelines_gdf = shorelines_gdf.loc[shorelines_gdf.geometry.is_valid]

### Calculate hotspots


In [10]:
# Convert radius to list if not already
hotspots_radius = (
    [hotspots_radius] if not isinstance(hotspots_radius, list) else hotspots_radius
)

for i, radius in enumerate(hotspots_radius):

    # Extract hotspot points
    print(f"Calculating hotspots at {radius} m")
    hotspots_gdf = points_on_line(
        shorelines_gdf,
        index=baseline_year,
        distance=int(radius / 2),
    )

    # Create polygon windows by buffering points
    buffered_gdf = hotspots_gdf[["geometry"]].copy()
    buffered_gdf["geometry"] = buffered_gdf.buffer(radius)

    # Spatial join rate of change points to each polygon
    hotspot_grouped = (
        ratesofchange_gdf.loc[
            ratesofchange_gdf.certainty == "good",
            ratesofchange_gdf.columns.str.contains("dist_|geometry"),
        ]
        .sjoin(buffered_gdf, predicate="within")
        .groupby("index_right")
    )

    # Aggregate/summarise values by taking median of all points
    # within each buffered polygon
    hotspot_values = hotspot_grouped.median().round(2)

    # Extract year from distance columns (remove "dist_")
    x_years = hotspot_values.columns.str.replace("dist_", "").astype(int)

    # Compute coastal change rates by linearly regressing annual
    # movements vs. time
    rate_out = hotspot_values.apply(
        lambda row: change_regress(
            y_vals=row.values.astype(float), x_vals=x_years, x_labels=x_years
        ),
        axis=1,
    )

    # Add rates of change back into dataframe
    hotspot_values[
        ["rate_time", "incpt_time", "sig_time", "se_time", "outl_time"]
    ] = rate_out

    # Join aggregated values back to hotspot points after
    # dropping unused columns (regression intercept)
    hotspots_gdf = hotspots_gdf.join(hotspot_values.drop("incpt_time", axis=1))

    # Add hotspots radius attribute column
    hotspots_gdf["radius_m"] = radius

    # Initialise certainty column with good values
    hotspots_gdf["certainty"] = "good"

    # Identify any points with insufficient observations and flag these as
    # uncertain. We can obtain a sensible threshold by dividing the
    # hotspots radius by 30 m along-shore rates of change point distance)
    hotspots_gdf["n"] = hotspot_grouped.size()
    hotspots_gdf["n"] = hotspots_gdf["n"].fillna(0)
    hotspots_gdf.loc[
        hotspots_gdf.n < (radius / 30), "certainty"
    ] = "insufficient points"
    
    # Generate a geohash UID for each point and set as index
    uids = (
        hotspots_gdf.geometry.to_crs("EPSG:4326")
        .apply(lambda x: gh.encode(x.y, x.x, precision=11))
        .rename("uid")
    )
    hotspots_gdf = hotspots_gdf.set_index(uids)

    # Export hotspots to file, incrementing name for each layer
    try:

        # Export to geopackage
        layer_name = f"hotspots_zoom_{range(0, 10)[i + 1]}"
        hotspots_gdf.to_file(
            OUTPUT_GPKG,
            layer=layer_name,
            schema={"properties": vector_schema(hotspots_gdf), "geometry": "Point"},
        )
        
        # Add additional WMS fields and add to shapefile
        hotspots_gdf = pd.concat([hotspots_gdf, wms_fields(gdf=hotspots_gdf)], axis=1)
        hotspots_gdf.to_file(
            OUTPUT_SHPS,
            layer=f"coastlines_{continental_version}_{layer_name}",
            schema={"properties": vector_schema(hotspots_gdf), "geometry": "Point"},
        )

    except ValueError as e:

        print(f"Failed to generate hotspots with error: {e}")

Calculating hotspots at 20000 m
Calculating hotspots at 10000 m
Calculating hotspots at 2000 m


### Write zipped shapefiles

In [11]:
# Add rates of change points to shapefile zip
# Add additional WMS fields and add to shapefile
ratesofchange_gdf = pd.concat(
    [ratesofchange_gdf, wms_fields(gdf=ratesofchange_gdf)], axis=1
)

ratesofchange_gdf.to_file(
    OUTPUT_SHPS,
    layer=f"coastlines_{continental_version}_rates_of_change",
    schema={"properties": vector_schema(ratesofchange_gdf), "geometry": "Point"},
)

In [12]:
# Add annual shorelines to shapefile zip
shorelines_gdf.to_file(
    OUTPUT_SHPS,
    layer=f"coastlines_{continental_version}_shorelines_annual",
    schema={
        "properties": vector_schema(shorelines_gdf),
        "geometry": ["MultiLineString", "LineString"],
    },
)

### Write styles to GeoPackage

In [14]:
# Insert styles table into GeoPackage
styles = gpd.read_file(STYLES_FILE)
styles.to_file(OUTPUT_GPKG, layer="layer_styles")

***

## Additional information

**License:** The code in this notebook is licensed under the [Apache License, Version 2.0](https://www.apache.org/licenses/LICENSE-2.0). 
Digital Earth Africa data is licensed under the [Creative Commons by Attribution 4.0](https://creativecommons.org/licenses/by/4.0/) license.

**Contact:** For assistance with any of the Python code or Jupyter Notebooks in this repository, please post a [Github issue](https://github.com/GeoscienceAustralia/DEACoastLines/issues/new).

**Last modified:** September 2022