# Deforestation Analysis in Mozambique
## A very simple approach

In this notebook, we will focus on the classification of forest cover in Mozambique using Sentinel-2 imagery.
We will be useing a `Machine Learning` approach to classify the forest cover and will be covering the deforestation in the next notebook, where we will have a look at the changes in forest cover over time.

In [None]:
import datetime as dt  # noqa
from collections.abc import Sequence
from enum import IntEnum
from pathlib import Path
from typing import Literal, TypeAlias

import cmcrameri as cmc  # noqa: F401  # noqa
import geopandas as gpd  # noqa
import matplotlib.colors as colors  # noqa
import matplotlib.pyplot as plt
import numpy as np  # noqa
import odc.stac as odc_stac
import pandas as pd  # noqa
import rioxarray  # noqa: F401  # noqa
import xarray as xr
from odc.geo.geobox import GeoBox
from pystac import ItemCollection
from pystac_client import Client
from shapely.geometry import Polygon  # noqa
from sklearn.ensemble import RandomForestClassifier  # noqa
from sklearn.metrics import classification_report, confusion_matrix  # noqa
from sklearn.model_selection import train_test_split  # noqa
from sklearn.naive_bayes import GaussianNB  # noqa

In [None]:
# Define custom types
# does not change code behavior, just for clarity
Bbox: TypeAlias = tuple[float, float, float, float]
Chunks: TypeAlias = dict[str, int | Literal["auto"]]
Bands: TypeAlias = Sequence[str]


class SCLValues(IntEnum):
    """Enum for Sentinel-2 Scene Classification Layer (SCL) values."""

    # https://custom-scripts.sentinel-hub.com/custom-scripts/sentinel-2/scene-classification/
    NO_DATA = 0
    DEFECTIVE = 1
    DARK_SHADOWS = 2
    CLOUD_SHADOWS = 3
    VEGETATED = 4
    NOT_VEGETATED = 5
    WATER = 6
    CLOUDS_LOW_PROB = 7
    CLOUDS_MEDIUM_PROB = 8
    CLOUDS_HIGH_PROB = 9
    THIN_CIRRUS = 10
    SNOW_OR_ICE = 11

In [None]:
# Set common parameters
dx: float = 0.0001  # approx 60m resolution (0.0006 degrees)
epsg: int = 4326

# Set Spatial extent
latmin: float = -19.6
latmax: float = -18.9
lonmin: float = 32.9
lonmax: float = 33.7
bounds: Bbox = (lonmin, latmin, lonmax, latmax)

# Set Temporal extent
start_date: dt.datetime = dt.datetime(year=2024, month=5, day=1, tzinfo=dt.UTC)
end_date: dt.datetime = start_date + dt.timedelta(days=30)

# Define the time range for the search
time_fmt: str = "%Y-%m-%d"
start_date_fmt: str = start_date.strftime(time_fmt)
end_date_fmt: str = end_date.strftime(time_fmt)
timerange: str = f"{start_date_fmt}/{end_date_fmt}"

# Define the STAC catalog and collection
stac_catalog: str = "https://earth-search.aws.element84.com/v1"
collection: list[str] = ["sentinel-2-l2a"]

# Search for Sentinel-2 data
items: ItemCollection = (
    Client.open(stac_catalog)
    .search(bbox=bounds, collections=collection, datetime=timerange)
    .item_collection()
)

print(len(items), "scenes found")

In [None]:
# Define some options for loading data
geobox: GeoBox = GeoBox.from_bbox(bounds, crs=f"epsg:{epsg}", resolution=dx)
chunks: Chunks = {"time": 5, "x": 600, "y": 600}
bands: Bands = ["scl", "red", "green", "blue", "nir"]

# Lazily combine items into a datacube (Dask array)
dc: xr.Dataset = odc_stac.load(
    items,
    bands=bands,
    chunks=chunks,
    geobox=geobox,
    resampling="bilinear",
)
dc

In [None]:
# Define a mask for valid pixels (non-cloud)


def is_valid_pixel(data: xr.DataArray) -> xr.DataArray:
    """Get valid pixels from the SCL band."""
    return ((data >= SCLValues.VEGETATED) & (data <= SCLValues.WATER)) | (
        data == SCLValues.SNOW_OR_ICE
    )


dc["valid"] = is_valid_pixel(dc["scl"])

In [None]:
# Compute the median composite
median: xr.Dataset = dc.where(dc["valid"]).median(dim="time")

In [None]:
out_dir = Path("/mnt/c/Users/npikall/datasets/sentinel-2/mozambique").resolve()
filename: str = f"mozambique_{timerange.replace('/', '_')}_median.tiff"
savepath: Path = out_dir / filename
savepath.parent.mkdir(parents=True, exist_ok=True)

# Save the median composite as a GeoTIFF
median.rio.to_raster(savepath)

In [None]:
raise EOFError("End of file reached")

In [None]:
# Plot the rgb median composite
title_rgb: str = f"RGB - Median Composite\n{timerange}"
rgb_median: xr.DataArray = (
    median[["red", "green", "blue"]].to_dataarray(dim="band").astype(int)
)
rgb_median.plot.imshow(robust=True).axes.set_title(title_rgb)
plt.show()

In [None]:
# Plot the false color composite (NIR, Red, Green)
title_fcc: str = f"False Color Composite (NIR, Red, Green)\n{timerange}"
fcc: xr.DataArray = median[["nir", "red", "green"]].to_dataarray(dim="band").astype(int)
fcc.plot.imshow(robust=True).axes.set_title(title_fcc)
plt.show()

In [None]:
# Normalized Difference Vegetation Index (NDVI)


def normalized_difference(
    band1: xr.DataArray,
    band2: xr.DataArray,
) -> xr.DataArray:
    """Calculate the normalized difference between two bands."""
    return (band1 - band2 * 1.0) / (band1 + band2)


ndvi: xr.DataArray = normalized_difference(median.nir, median.red)
ndvi.plot.imshow(cmap="cmc.cork", vmin=-1, vmax=1).axes.set_title("NDVI")
plt.show()

In [None]:
median