In [1]:
from tile_mate.stitcher import get_all_tile_data
from rio_tools import get_geopandas_features_from_array
import rasterio
import geopandas as gpd
import pandas as pd
from rasterio.crs import CRS
from pathlib import Path
import numpy as np
from tqdm import tqdm
from mpire import WorkerPool
import pandas as pd

In [2]:
df_tiles = get_all_tile_data('radd_deforestation_alerts_2022')
df_tiles.head()

Unnamed: 0,geometry,tile_id,url
0,"POLYGON ((0.00000 0.00000, 10.00000 0.00000, 1...",00N_000E,https://data-api.globalforestwatch.org/dataset...
1,"POLYGON ((10.00000 0.00000, 20.00000 0.00000, ...",00N_010E,https://data-api.globalforestwatch.org/dataset...
2,"POLYGON ((20.00000 0.00000, 30.00000 0.00000, ...",00N_020E,https://data-api.globalforestwatch.org/dataset...
3,"POLYGON ((30.00000 0.00000, 40.00000 0.00000, ...",00N_030E,https://data-api.globalforestwatch.org/dataset...
4,"POLYGON ((40.00000 0.00000, 50.00000 0.00000, ...",00N_040E,https://data-api.globalforestwatch.org/dataset...


In [3]:
df_tiles.shape

(71, 3)

# Get Disturbance Area (without time of disturbance)

In [4]:
def get_spatial_features_from_radd_data(url: str) -> list:
    with rasterio.open(url) as ds:
        X_radd_tile = (ds.read(1) > 0).astype(np.uint8)
        t_radd = ds.transform

    features = get_geopandas_features_from_array(X_radd_tile, 
                                                 t_radd, 
                                                 label_name='radd_label', 
                                                 mask=(X_radd_tile == 0))
    return features

def format_radd_spatial_features(radd_features: list) -> gpd.GeoDataFrame:
    df_radd_area = gpd.GeoDataFrame.from_features(radd_features, 
                                                  crs=CRS.from_epsg(4326))
    return df_radd_area

def extract_radd_area(**record) -> Path:
    features = get_spatial_features_from_radd_data(record['url'])
    df_area = format_radd_spatial_features(features)
    df_area['tile_id'] = record['tile_id']
    return df_area

In [5]:
tile_records = df_tiles.to_dict('records')

Note the first url is coastal and much smaller than the others so not representative of the time required to do this.

In [8]:
df_spatial_area = extract_radd_area(**tile_records[0])

Unfortunately, it's not worth the time to reformat all tiles - even one tile is massive - the 10 meter tile is quite large.

In [None]:
# def process_driver(records: list[dict]):
#     with WorkerPool(n_jobs=10, use_dill=True) as pool:
#         dfs = pool.map(extract_radd_area, records[:3], progress_bar=True)
#     return dfs

# dfs = process_driver(tile_records)

In [10]:
df_spatial_area.to_file(f'{tile_records[0]["tile_id"]}_spatial.geojson', driver='GeoJSON')

# Get Disturbance and Time

In [13]:
OUT_RADD_DIR = Path('.')
OUT_RADD_DIR.mkdir(exist_ok=True, parents=True)

def get_features_from_radd_data(url: str) -> list:
    with rasterio.open(url) as ds:
        X_radd_tile = ds.read(1)
        t_radd = ds.transform

    features = get_geopandas_features_from_array(X_radd_tile, 
                                                 t_radd, 
                                                 label_name='radd_label', 
                                                 mask=(X_radd_tile == 0))
    return features

def format_radd_features(radd_features: list) -> gpd.GeoDataFrame:
    df_radd = gpd.GeoDataFrame.from_features(radd_features, crs=CRS.from_epsg(4326))
    df_radd['alert_conf'] = df_radd['radd_label'].map(lambda label: int(str(label)[0]))
    df_radd['days_int'] = df_radd['radd_label'].map(lambda label: float(''.join(str(label)[1:])))
    df_radd['alert_dt'] = df_radd['days_int'].map(lambda days: pd.Timestamp('2014-12-31') + pd.Timedelta(days, unit='days'))
    df_radd.drop(columns=['days_int', 'radd_label'], inplace=True)
    return df_radd

def serialize_radd_data_with_timing(record: dict) -> Path:
    tile_id = record['tile_id']
    url = record['url']

    features = get_features_from_radd_data(url)
    df_tile = format_radd_features(features)

    out_path = OUT_RADD_DIR / f'{tile_id}.geojson'
    df_tile.to_file(out_path, driver='GeoJSON')
    return out_path

In [14]:
p = serialize_radd_data_with_timing(tile_records[0])
p

PosixPath('00N_000E.geojson')