## Extracting the biomass value from the dataset
1. Download data from https://catalogue.ceda.ac.uk/uuid/bf535053562141c6bb7ad831f5998d77/ (a total of 300gb space requirement for data for the years 2015 to 2016)

2. Overlay reforestation polygon with yearly raster files and extract polygon-level biomass values

In [None]:
import geopandas as gpd
import rasterio
import os
import pandas as pd
import numpy as np
from helper_functions import extract_raster_values, process_in_chunks

Read reforestation polygons

In [None]:
polygons_path = "../input/Updated_Reforestation_Data.geojson"
polygons_gdf = gpd.read_file(polygons_path)

Locate downloaded biomass data

In [None]:
base_raster_dir = "/home/idisc02/Forest_Monitoring/dap.ceda.ac.uk/neodc/esacci/biomass/data/agb/maps/v5.01/geotiff"

Define periods of interest

In [None]:
periods = ["2015_2016","2016_2017","2017_2018", "2018_2019", "2019_2020", "2020_2021"]

In [None]:
final_gdf = polygons_gdf.copy()

for period in periods:
    
    raster_dir = os.path.join(base_raster_dir, period)
    
    # Determining  the CRS of the raster files before processing chunks
    first_raster_file = next((f for f in os.listdir(raster_dir) if f.endswith('.tif')), None)
    if first_raster_file:
        first_raster_path = os.path.join(raster_dir, first_raster_file)
        with rasterio.open(first_raster_path) as src:
            raster_crs = src.crs
        
            period_gdf = process_in_chunks(polygons_gdf, 10000, raster_crs, raster_dir, period)
            final_gdf = final_gdf.merge(period_gdf[['geometry', f'Biomass_change_{period}']], on='geometry', how='left')

Saving the updated GeoDataFrame as a new GeoJSON file

In [None]:
output_path = "../input/Updated_Reforestation_Data.geojson"
final_gdf.to_file(output_path, driver='GeoJSON')