# CAR Mexico Biomass Change (2010–2021)

In [1]:
# load packages
import os
import numpy as np
from glob import glob
import requests
from osgeo import gdal, ogr
import geopandas as gpd
import math
import pycurl
import pandas as pd

In [2]:
# Step 1: Read the project area shapefile
def read_project_area(shapefile_path):
    """
    Reads the shapefile containing the project area.
    """
    project_area = gpd.read_file(shapefile_path)
    if project_area.crs and project_area.crs.to_epsg() != 4326:
        print("Reprojecting project area to WGS 84...")
        project_area = project_area.to_crs(epsg=4326)
    # print(f'Area:{project_area.area}')
    
    return project_area

# Helper function to format tile name
def format_tile_name(lat, lon):
    
    lat_prefix = "N" if lat >= 0 else "S"
    lon_prefix = "E" if lon >= 0 else "W"
    
    # lat_rounded = math.floor(lat / 10) * 10
    # lon_rounded = math.floor(lon / 10) * 10
    
    return f"{lat_prefix}{abs(lat):02d}{lon_prefix}{abs(lon):03d}"

In [3]:
# a function for calculating project area in ha
def calculate_area(gdf):
    minx, miny, maxx, maxy = gdf.total_bounds
    print(minx, miny, maxx, maxy)
    
    central_lon = (minx + maxx) / 2
    utm_zone = int((central_lon + 180) / 6) + 1
    print(utm_zone)
    is_northern = (miny + maxy) / 2 >= 0  # Check if the data is in the northern hemisphere

    # EPSG code for the UTM zone
    epsg_code = 32600 + utm_zone if is_northern else 32700 + utm_zone
    print(f"Using EPSG:{epsg_code} for projection.")

    # Step 4: Reproject to UTM
    gdf_utm = gdf.to_crs(epsg=epsg_code)
    gdf_utm["area_ha"] = round(gdf_utm.geometry.area / 1e4, 2)
    return gdf_utm[["area_ha"]].sum()

In [4]:
# Step 2: Find and download tiles
def progress(download_t, downloaded, upload_t, uploaded):
    if download_t > 0:
        percent = downloaded / download_t * 100
        print(f"\rDownloading: {percent:.2f}% ({downloaded}/{download_t} bytes)", end='')

def find_and_download_tiles(project_area, year, output_folder):
    """
    Finds and downloads tiles that cover the project area.
    """
    if not os.path.exists(output_folder):
        os.makedirs(output_folder)
    
    # Calculate the bounding box of the project area
    bounds = project_area.total_bounds
    minx, miny, maxx, maxy = bounds
    
    # Determine the tile coordinates covering the bounding box
    min_tile_x = math.floor(minx / 10) * 10
    max_tile_x = math.floor(maxx / 10) * 10
    min_tile_y = math.ceil(miny / 10) * 10
    max_tile_y = math.ceil(maxy / 10) * 10
    
    # Generate a list of tiles based on the tile naming convention
    downloaded_files = []
    for lat in range(min_tile_y, max_tile_y + 10, 10):
        for lon in range(min_tile_x, max_tile_x + 10, 10):
            tile_name = format_tile_name(lat, lon)
            tile_filename = f"{tile_name}_ESACCI-BIOMASS-L4-AGB-MERGED-100m-{year}-fv5.0.tif"
            output_file = os.path.join(output_folder, tile_filename)
            
            if os.path.exists(output_file):
                print(f'Image tile downloaded already, check {output_file}')
                downloaded_files.append(output_file)
                continue
            
            tile_url = f"https://dap.ceda.ac.uk/neodc/esacci/biomass/data/agb/maps/v5.01/geotiff/{year}/{tile_filename}"
            print(f"Downloading {tile_filename} from {tile_url}...")
            
            # Download using pycurl
            try:
                with open(output_file, 'wb') as f:
                    curl = pycurl.Curl()
                    curl.setopt(curl.URL, tile_url)
                    curl.setopt(curl.WRITEDATA, f)
                    curl.setopt(curl.FOLLOWLOCATION, True)  # Follow redirects if needed
                    curl.setopt(curl.CONNECTTIMEOUT, 10)    # Timeout for connection
                    curl.setopt(curl.TIMEOUT, 300)         # Total timeout
                    curl.setopt(curl.NOPROGRESS, False)    # Enable progress function
                    curl.setopt(curl.XFERINFOFUNCTION, progress)  # Set progress function
                    curl.perform()
                    curl.close()
                downloaded_files.append(output_file)
                print(f"Downloaded {output_file}")
            except pycurl.error as e:
                print(f"Failed to download {tile_filename}: {e}")
    
    return downloaded_files

In [None]:
downloaded_files = find_and_download_tiles(project_area=project_area, year=2015, output_folder=r'Projects/ESA/')

In [42]:
print(downloaded_files)

['Projects/ESA/N20W110_ESACCI-BIOMASS-L4-AGB-MERGED-100m-2010-fv5.0.tif']


In [25]:
def mask_and_calculate_gdal(raster_tiles_list, shapefile_path, nodata=65535):
    """
    Masks a raster using a shapefile and calculates the sum and mean of pixel values.
    No intermediate files are created.
    """
    # Perform masking using GDAL Warp (in-memory)
    # print("Masking raster in-memory with GDAL...")
    mem_raster = gdal.Warp(destNameOrDestDS="", 
                           srcDSOrSrcDSTab=raster_tiles_list, 
                           cutlineDSName=shapefile_path,
                           cropToCutline=True,
                           dstNodata=nodata,
                           format="MEM")

    if mem_raster is None:
        raise RuntimeError("Raster masking failed.")

    # Read data from the in-memory raster
    band = mem_raster.GetRasterBand(1)
    data = band.ReadAsArray()
    # print("Calculating statistics...")
    data = data[data != nodata]
    if data.size == 0:
        print("No valid pixels found in the masked raster.")
        total_sum = 0
        mean_value = 0
    else:
        count = len(data)
        total_sum = np.sum(data)
        mean_value = np.nanmean(data)

    # Clean up memory
    mem_raster = None
    raster = None
    shapefile = None

    # print(f"Total Sum: {total_sum}, Mean Value: {mean_value}")
    return count, total_sum, mean_value

In [7]:
aa_path = 'Projects/CAR-Mexico/CAR1392/Calyx_CAR_1392_AA_new.shp'

In [6]:
pa_path = 'Projects/CAR-Mexico/CAR1674/PA_conhúas.shp'

In [6]:
downloaded_files = ['Projects/ESA/GEDI04_B_MW019MW223_02_002_02_R01000M_MU.tif']

In [6]:
downloaded_files = ['Projects/ESA/GEDI04_B_MW019MW223_02_002_02_R01000M_SE.tif']

In [6]:
downloaded_files = ['Projects/ESA/CONAFOR.tif']

In [8]:
print(downloaded_files, aa_path)

['Projects/ESA/CONAFOR.tif'] Projects/CAR-Mexico/CAR1392/Calyx_CAR_1392_AA_new.shp


In [26]:
count, total_sum, mean_value = mask_and_calculate_gdal(raster_tiles_list=downloaded_files,
                                                       shapefile_path=aa_path, nodata=-9999)
print(count, total_sum, mean_value)

24 2018.3335 84.09723


In [8]:
pids = [1455, 1552, 1411, 1388, 1514, 1544, 1387, 1566, 1574, 1674, 1428, 1429, 1568, 1626, 1658, 1660, 1661]
print(len(pids))

17


In [9]:
AA, PA = [], []
for pid in pids:
    print(f'Working on CAR{pid}:')
    aa_path = glob(f'Projects/CAR-Mexico/CAR{pid}/*.shp')[0]
    pa_path = glob(f'Projects/CAR-Mexico/CAR{pid}/*.shp')[1]
    # print(f'AA: {aa_path}, PA: {pa_path}')
    _, aa_mean_value = mask_and_calculate_gdal(raster_tiles_list=downloaded_files,
                                               shapefile_path=aa_path, nodata=-9999)
    _, pa_mean_value = mask_and_calculate_gdal(raster_tiles_list=downloaded_files,
                                               shapefile_path=pa_path, nodata=-9999)
    AA.append(round(aa_mean_value, 2))
    PA.append(round(pa_mean_value, 2))
# print(AA, PA)
    # print(f'AA GEDI ABGD: {aa_mean_value} Mg/ha, PA GEDI ABGD: {pa_mean_value} Mg/ha')

Working on CAR1455:
Working on CAR1552:
Working on CAR1411:
Working on CAR1388:
Working on CAR1514:
Working on CAR1544:
Working on CAR1387:
Working on CAR1566:
Working on CAR1574:
Working on CAR1674:
Working on CAR1428:
Working on CAR1429:
Working on CAR1568:
Working on CAR1626:
Working on CAR1658:
Working on CAR1660:
Working on CAR1661:


In [17]:
gedi_df = pd.DataFrame({'PID': pids, 'AA_MEAN_AGBD': AA, 'PA_MEAN_AGBD': PA})
gedi_df

Unnamed: 0,PID,AA_MEAN_AGBD,PA_MEAN_AGBD
0,1455,195.039993,164.860001
1,1552,186.050003,124.660004
2,1411,304.179993,303.950012
3,1388,152.240005,114.480003
4,1514,74.099998,69.540001
5,1544,135.440002,130.380005
6,1387,225.740005,185.679993
7,1566,44.470001,42.509998
8,1574,123.739998,127.620003
9,1674,56.540001,53.0


In [18]:
gedi_df.to_csv('Projects/CAR-Mexico/CAR_GEDI_AGBD.csv')

In [10]:
gedi_df = pd.DataFrame({'PID': pids, 'AA_MEAN_SE': AA, 'PA_MEAN_SE': PA})
gedi_df

Unnamed: 0,PID,AA_MEAN_SE,PA_MEAN_SE
0,1455,19.389999,17.879999
1,1552,29.02,22.26
2,1411,20.559999,22.0
3,1388,20.120001,17.51
4,1514,9.96,11.48
5,1544,14.96,14.7
6,1387,18.190001,20.18
7,1566,4.54,5.38
8,1574,13.81,13.58
9,1674,7.21,7.03


In [11]:
gedi_df.to_csv('Projects/CAR-Mexico/CAR_GEDI_SE.csv')

In [27]:
pids = [1574, 1568, 1626, 1660, 1674, 1710, 1658, 1661, 1514, 1262, 1552, 1566, 1429, 1387, 1388, 1455, 1411, 1544, 1531, 1428]
print(len(pids))

20


In [28]:
C, AA = [], []
for pid in pids:
    print(f'Working on CAR{pid}:')
    aa_path = glob(f'Projects/CAR-Mexico/CAR{pid}/*.shp')[0]
    print(downloaded_files, aa_path)
    # pa_path = glob(f'Projects/CAR-Mexico/CAR{pid}/*.shp')[1]
    # print(f'AA: {aa_path}, PA: {pa_path}')
    count, _, aa_mean_value = mask_and_calculate_gdal(raster_tiles_list=downloaded_files,
                                                      shapefile_path=aa_path, nodata=-9999)
    C.append(count)
    AA.append(round(aa_mean_value, 2))
    print(f'{count} pixels used and mean agbd value:{aa_mean_value}')

Working on CAR1574:
['Projects/ESA/CONAFOR.tif'] Projects/CAR-Mexico/CAR1574\AA.shp
58 pixels used and mean agbd value:98.34097290039062
Working on CAR1568:
['Projects/ESA/CONAFOR.tif'] Projects/CAR-Mexico/CAR1568\AA.shp
295 pixels used and mean agbd value:88.9797134399414
Working on CAR1626:
['Projects/ESA/CONAFOR.tif'] Projects/CAR-Mexico/CAR1626\AA.shp
45 pixels used and mean agbd value:82.1667709350586
Working on CAR1660:
['Projects/ESA/CONAFOR.tif'] Projects/CAR-Mexico/CAR1660\AA.shp
151 pixels used and mean agbd value:72.58574676513672
Working on CAR1674:
['Projects/ESA/CONAFOR.tif'] Projects/CAR-Mexico/CAR1674\AA_conhúas.shp
385 pixels used and mean agbd value:108.64352416992188
Working on CAR1710:
['Projects/ESA/CONAFOR.tif'] Projects/CAR-Mexico/CAR1710\AA.shp
178 pixels used and mean agbd value:81.71842956542969
Working on CAR1658:
['Projects/ESA/CONAFOR.tif'] Projects/CAR-Mexico/CAR1658\AA.shp
80 pixels used and mean agbd value:68.63790130615234
Working on CAR1661:
['Projects

In [33]:
gedi_df = pd.DataFrame({'PID': pids, 'Pixels': C, 'AA_MEAN_AGBD': AA})
gedi_df

Unnamed: 0,PID,Pixels,AA_MEAN_AGBD
0,1574,58,98.339996
1,1568,295,88.980003
2,1626,45,82.169998
3,1660,151,72.589996
4,1674,385,108.639999
5,1710,178,81.720001
6,1658,80,68.639999
7,1661,47,62.459999
8,1514,104,124.120003
9,1262,18,87.75


In [34]:
gedi_df.to_csv('Projects/CAR-Mexico/CAR_CONAFOR_AGBD.csv')