# Combine wetlands from wetlands and mangroves datasets into single shapefile

In [None]:
import subprocess
import warnings

import geopandas as gpd
import numpy as np
import pandas as pd
import pygeos
import xarray as xr
from rhg_compute_tools import kubernetes as rhgk

from sliiders import settings as sset

warnings.filterwarnings("ignore", message=".*initial implementation of Parquet.*")


PATH_GLOBCOVER_NC = sset.PATH_GLOBCOVER_2009.parent / (
    sset.PATH_GLOBCOVER_2009.stem + ".nc"
)
PATH_GLOBCOVER_SHP = sset.PATH_GLOBCOVER_2009.parent / (
    sset.PATH_GLOBCOVER_2009.stem + ".shp"
)

## 1. Get wetland cover from `GLOBCOVER` as `.nc`

In [None]:
client, cluster = rhgk.get_micro_cluster()

cluster.scale(8)
cluster

In [None]:
da = xr.open_rasterio(sset.PATH_GLOBCOVER_2009, chunks={"x": 32400, "y": 27900})

In [None]:
da = da.persist()

In [None]:
da = da.squeeze("band").drop("band")

In [None]:
da = da.isin([160, 170, 180]).persist()

In [None]:
da = da.to_dataset(name="wetlands")

In [None]:
da = da.compute()

In [None]:
da.to_netcdf(PATH_GLOBCOVER_NC)

In [None]:
client.close()
cluster.close()

## 2. Vectorize
### Run `gdal_polygonize.py` in shell

In [None]:
cmd = f"gdal_polygonize.py {str(PATH_GLOBCOVER_NC)} {str(PATH_GLOBCOVER_SHP)}"

sp = subprocess.Popen(cmd, shell=True)
rc = sp.wait()

print(sp)

## 3. Clean up shapefile

In [None]:
gdf = gpd.read_file(PATH_GLOBCOVER_SHP)

In [None]:
gdf = gdf[gdf["DN"] == 1].drop(columns=["DN"])
gdf = gdf.explode().reset_index(drop=True)

In [None]:
gdf.to_file(PATH_GLOBCOVER_SHP)

## 4. Combine with mangroves

In [None]:
globcover_mask = gpd.read_file(PATH_GLOBCOVER_SHP)

In [None]:
mangroves = gpd.read_file(sset.PATH_GLOBAL_MANGROVES)

In [None]:
globcover_mask["geometry"] = pygeos.to_shapely(
    pygeos.make_valid(pygeos.from_shapely(globcover_mask["geometry"]))
)

In [None]:
combined = gpd.sjoin(globcover_mask, mangroves, how="left")

In [None]:
mangroves = mangroves.reset_index(drop=False)

In [None]:
mangroves["in_combined"] = mangroves["index"].isin(combined["index_right"].unique())

In [None]:
globcover_only = combined[combined["index_right"].isnull()].copy()
combined = combined[combined["index_right"].notnull()].copy()

combined = combined.reset_index(drop=True)

In [None]:
combined["index_right"] = combined["index_right"].astype(int)

In [None]:
combined["geometry_right"] = gpd.GeoSeries(
    np.take(mangroves["geometry"].to_numpy(), combined["index_right"].to_numpy())
)

In [None]:
combined = combined.set_geometry("geometry_right")

In [None]:
combined = combined.dissolve("FID").reset_index(drop=False)

In [None]:
combined["geometry"] = combined["geometry"].difference(combined["geometry_right"])

In [None]:
combined = combined.set_geometry("geometry")

In [None]:
combined = pd.concat([combined, mangroves, globcover_only], ignore_index=True)

In [None]:
combined = combined[["geometry"]].copy()

In [None]:
combined.iloc[:20].plot()

In [None]:
combined = combined.explode().reset_index(drop=True)

In [None]:
combined = combined[combined.geometry.area > 0].copy()

In [None]:
combined.to_file(sset.PATH_WETLANDS_INT)