# Deforestation Analysis in Mozambique
## A very simple approach

## Method

1. Select spatial extent and two time periods
2. Load Sentinel-2 data from the STAC API
3. Preprocess the data to remove invalid pixels
   - Use the Scene Classification Layer (SCL) to filter out clouds, shadows, and other non-vegetation pixels
   - Calculate the temporal median to create a mosaic for each time period
4. Add vegetation indices (e.g., NDVI) to the dataset
5. Classify forests in the two time periods, based on Indices
6. Calculate the difference between the two classifications
7. Visualize the results

In [None]:
import datetime as dt
from enum import IntEnum

import matplotlib.pyplot as plt
import odc.stac as odc_stac
import pystac
import pystac_client
import xarray as xr
from odc.geo.geobox import GeoBox

In [None]:
type Bounds = tuple[float, float, float, float]


def get_items(bounds: Bounds, timerange: str) -> pystac.ItemCollection:
    """Get Sentinel-2 items for a given bounding box and time range."""
    return (
        pystac_client.Client.open("https://earth-search.aws.element84.com/v1")
        .search(
            bbox=bounds,
            collections=["sentinel-2-l2a"],
            datetime=timerange,
            limit=100,
        )
        .item_collection()
    )


class SCLValues(IntEnum):
    """Enum for Sentinel-2 Scene Classification Layer (SCL) values."""

    NO_DATA = 0
    SATURATED_DEFECTIVE = 1
    DARK_AREA = 2
    CLOUD_SHADOW = 3
    VEGETATION = 4
    BARE_SOIL = 5
    WATER = 6
    CLOUD_LOW_PROB = 7
    CLOUD_MEDIUM_PROB = 8
    CLOUD_HIGH_PROB = 9
    CIRRUS = 10
    SNOW_ICE = 11


def is_valid_pixel(data: xr.DataArray) -> xr.DataArray:
    """Check if the pixel is valid based on the SCL band."""
    # include only vegetated, not_vegitated, water, and snow
    return ((data > SCLValues.CLOUD_SHADOW) & (data < SCLValues.CLOUD_LOW_PROB)) | (
        data == SCLValues.SNOW_ICE
    )

In [None]:
# Common parameters
dx: float = 0.002  # 0.0006  # 60m resolution
epsg = 4326

# Set Spatial extent
latmin: float = -19.6
latmax: float = -18.1
lonmin: float = 32.9
lonmax: float = 34.4
bounds: Bounds = (lonmin, latmin, lonmax, latmax)


# Set Temporal extent
year_before: int = 2017
year_after: int = 2020
month_start: int = 11
month_end: int = 12
day_end: int = 31
timerange_after: str = (
    f"{year_after}-{month_start}-01/{year_after}-{month_end}-{day_end}"
)
timerange_before: str = (
    f"{year_before}-{month_start}-01/{year_before}-{month_end}-{day_end}"
)

# Search for Sentinel-2 data
items_before = get_items(bounds, timerange_before)
items_after = get_items(bounds, timerange_after)

In [None]:
geobox = GeoBox.from_bbox(bounds, crs=f"epsg:{epsg}", resolution=dx)
dc_before = odc_stac.load(
    items_before,
    bands=["scl", "red", "green", "blue", "nir"],
    chunks={"time": 5, "x": 600, "y": 600},
    geobox=geobox,
)
dc_after = odc_stac.load(
    items_after,
    bands=["scl", "red", "green", "blue", "nir"],
    chunks={"time": 5, "x": 600, "y": 600},
    geobox=geobox,
)

In [None]:
# Preprocess the data
def preprocess(data: xr.Dataset) -> xr.Dataset:
    """Preprocess the dataset.

    Remove invalid pixels based on the SCL band and
    return the temporal median (mosaic).
    """
    data["valid"] = is_valid_pixel(data["scl"])
    return data.where(data["valid"]).median(dim="time", skipna=True)


mosaic_before = preprocess(dc_before)
mosaic_after = preprocess(dc_after)

In [None]:
# Could still be considered preprocessing...
def normalized_difference(
    band1: xr.DataArray,
    band2: xr.DataArray,
) -> xr.DataArray:
    """Calculate the normalized difference between two bands."""
    return (band1 - band2) / (band1 + band2)


def add_indices(data: xr.Dataset) -> xr.Dataset:
    """Add the following indices to the dataset.

    - NDVI: Normalized Difference Vegetation Index
    """
    data["ndvi"] = normalized_difference(data["nir"], data["red"])
    return data


mosaic_before = add_indices(mosaic_before)
mosaic_after = add_indices(mosaic_after)

In [None]:
def get_mosaic_time_title(
    year: int,
    month_start: int,
    month_end: int,
    day_end: int,
) -> str:
    """Generate a title for the mosaic based on the time range."""
    start_date = dt.datetime(year, month_start, day=1, tzinfo=dt.UTC)
    end_date = dt.datetime(year, month_end, day=day_end, tzinfo=dt.UTC)

    fmt_start: str = start_date.strftime("%d.%b")
    fmt_end: str = end_date.strftime("%d.%b %Y")
    return f"{fmt_start} - {fmt_end}"


timestamp_title_before = get_mosaic_time_title(
    year_before,
    month_start,
    month_end,
    day_end,
)
timestamp_title_after = get_mosaic_time_title(
    year_after,
    month_start,
    month_end,
    day_end,
)

In [None]:
# Classification of "Forest"
# Could be more sophisticated...
def classify_forest(
    data: xr.Dataset,
    ndvi_thres: float = 0.66,
) -> xr.DataArray:
    """Classify forested areas of the dataset."""
    return (data["ndvi"] >= ndvi_thres) * 1


# Bitshifting Mask values to combine classifications later
# 0b01 = forest_before -> deforestation (1)
# 0b10 = forest_after -> reforestation (2)
# 0b11 = unchanged forest (3)
# 0b00 = no forest (0)
forest_before = classify_forest(mosaic_before) * (1 << 0)
forest_after = classify_forest(mosaic_after) * (1 << 1)

In [None]:
fig, ax = plt.subplots()
forest_before.plot.imshow(
    ax=ax,
    robust=True,
    vmin=-1,
    vmax=1,
    cmap="RdYlGn",
)
ax.set_title(f"NDVI >= 0.66  {timestamp_title_before}")
plt.show()

In [None]:
fig, ax = plt.subplots()
forest_after.plot.imshow(
    ax=ax,
    robust=True,
    vmin=-1,
    vmax=1,
    cmap="RdYlGn",
)
ax.set_title(f"NDVI  {timestamp_title_after}")
plt.show()

In [None]:
def get_mosaic_time_title(
    year_start: int,
    year_end: int,
    month_start: int,
    month_end: int,
    day_end: int,
) -> str:
    """Generate a title for the mosaic based on the time range."""
    start_date = dt.datetime(year_start, month_start, day=1, tzinfo=dt.UTC)
    end_date = dt.datetime(year_end, month_end, day=day_end, tzinfo=dt.UTC)

    fmt_start: str = start_date.strftime("%d.%b")
    fmt_end: str = end_date.strftime("%d.%b")
    return f"{year_start} - {year_end} ({fmt_start} - {fmt_end})"


diff_time_title = get_mosaic_time_title(
    year_before,
    year_after,
    month_start,
    month_end,
    day_end,
)

In [None]:
# Difference
diff = forest_before + forest_after

deforestation_value: int = 1
diff = diff.where(diff == deforestation_value)
fig, ax = plt.subplots()
diff.plot.imshow(ax=ax, robust=True, cmap="Reds")
ax.set_title(f"NDVI Difference {diff_time_title}")
plt.show()