In [None]:
import os
import rasterio
import geopandas as gpd
import pandas as pd
import time
import numpy as np


In [None]:

starttime = time.time()
#reading in GEDI points
gedi_df = gpd.read_file('../data/GEDI_data/clipped_gedi.gpkg', driver='GPKG', index = True)
endtime = time.time()
print(f"Reading in the GEDI shapefile took {endtime - starttime} seconds")
# print(gedi_df.head())
#reading in ICESat2 shapefile
icesat_df = gpd.read_file('../data/GEDI_data/clipped_icesat.shp', driver='ESRI Shapefile', index = True)
# print(icesat_df.head())


In [None]:
# Get unique values in GeoTIFF column
unique_geotiffs = icesat_df['GeoTIFF'].unique()

# Specify the file location of the TIFF files
tif_folder = 'reprojected_icesat_to_gedi_crs'

gedi_bins = [0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300]
min = np.full((len(gedi_bins)+1), np.inf)
max = np.zeros(len(gedi_bins)+1)
num_points = np.zeros(len(gedi_bins)+1)
mean_helper = np.zeros(len(gedi_bins)+1)
std_helper = np.zeros(len(gedi_bins)+1)

# Iterate over unique GeoTIFF values
for geotiff in unique_geotiffs:
    # Create an empty list to store the data
    list = []
    
    # Construct the file path
    tif_file = os.path.join(tif_folder, geotiff[:-4] + '_reprojected.tif')
    
    # Load the TIFF file using rasterio
    with rasterio.open(tif_file) as icesat_tif:
        # print(icesat_tif)

        filtered_df =  gpd.read_file('../data/GEDI_data/clipped_gedi.gpkg', driver='GPKG', index = True, bbox=icesat_tif.bounds)
        # print(filtered_df.head())
        # Iterate through the "gedi shot number" values
        for _, point in filtered_df.iterrows():
            # Extract the GEDI shot number
            gedi_shot_number = point['shot_numbe']
            # Extract the latitude, longitude, and agbd values
            lat = point['lat_lowest']
            lon = point['lon_lowest']
            agbd_gedi = point['agbd']
            row, col = icesat_tif.index(lon, lat)
            agbd_icesat = icesat_tif.read(1)[row, col]

            if agbd_icesat != -9999:
                index = np.digitize(agbd_gedi, gedi_bins)
                difference = agbd_gedi - agbd_icesat
                num_points[index] += 1
                if difference < min[index]:
                    min[index] = difference
                if difference > max[index]:
                    max[index] = difference
                mean_helper[index] += difference
                std_helper[index] += difference**2

                # Create a dictionary with the data
                data = {
                    'Gedi Shot Number': gedi_shot_number,
                    'ICESat Tile': geotiff,
                    'ICESat AGBD': agbd_icesat,
                    'GEDI AGBD': agbd_gedi,
                    'Difference': agbd_gedi - agbd_icesat,
                    'Latitude': lat,
                    'Longitude': lon
                }

                # Append the data to the list
                list.append(data)
    #save the dataframe to a csv file
    df = pd.DataFrame(list) 
    df.to_csv(f'comparisons_bins/gedi_icesat_agbd_comparison_{geotiff}.csv', index=False)
    print("Dataframe saved to csv file")

In [None]:
gedi_bins = [0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300]
min = np.full((len(gedi_bins)+1), np.inf)
max = np.zeros(len(gedi_bins)+1)
num_points = np.zeros(len(gedi_bins)+1)
mean_helper = np.zeros(len(gedi_bins)+1)
std_helper = np.zeros(len(gedi_bins)+1)

folder = 'comparisons_bins'
i = 0
# Iterate over unique GeoTIFF values
for file in os.listdir(folder):

    filtered_df =  pd.read_csv(os.path.join(folder, file))
    # Iterate through the "gedi shot number" values
    for _, point in filtered_df.iterrows():
        i += 1
        gedi_shot_number = point['Gedi Shot Number']
        lat = point['Latitude']
        lon = point['Longitude']
        agbd_gedi = point['GEDI AGBD']
        agbd_icesat = point['ICESat AGBD']

        if (0 <= agbd_icesat <= 500 and 0 <= agbd_gedi <= 500 and not np.isnan(agbd_gedi) and not np.isnan(agbd_icesat)):
            index = np.digitize(agbd_gedi, gedi_bins)
            difference = agbd_gedi - agbd_icesat
            num_points[index] += 1
            if difference < min[index]:
                min[index] = difference
            if difference > max[index]:
                max[index] = difference
            mean_helper[index] += difference
            std_helper[index] += difference**2
    # print("=====================================")
    print(file)

print("done")

In [None]:
bins = [0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300]
icesat_histograms = np.zeros((len(bins)+1, len(bins)+1))
gedi_histograms = np.zeros((len(bins)+1, 10))
min = np.full((len(bins)+1), np.inf)
max = np.zeros(len(bins)+1)
num_points = np.zeros(len(bins)+1)
mean_helper = np.zeros(len(bins)+1)
std_helper = np.zeros(len(bins)+1)
folder = 'comparisons_bins'
i = 0
# Iterate over unique GeoTIFF values
for file in os.listdir(folder):
    filtered_df =  pd.read_csv(os.path.join(folder, file))
    # Iterate through the "gedi shot number" values
    for _, point in filtered_df.iterrows():
        i += 1
        # Extract the GEDI shot number
        gedi_shot_number = point['Gedi Shot Number']
        lat = point['Latitude']
        lon = point['Longitude']
        agbd_gedi = point['GEDI AGBD']
        agbd_icesat = point['ICESat AGBD']

        if (0 <= agbd_icesat <= 500 and 0 <= agbd_gedi <= 500 and not np.isnan(agbd_gedi) and not np.isnan(agbd_icesat)):
            index = np.digitize(agbd_gedi, bins)
            if int((agbd_gedi-bins[index-1])/2.5)>9:
                gedi_histograms[index][9] += 1
            else:
                gedi_histograms[index][int((agbd_gedi-bins[index-1])/2.5)] += 1

            icesat_index = np.digitize(agbd_icesat, bins)
            icesat_histograms[index][icesat_index] += 1

            difference = agbd_gedi - agbd_icesat
            num_points[index] += 1
            if difference < min[index]:
                min[index] = difference
            if difference > max[index]:
                max[index] = difference
            mean_helper[index] += difference
            std_helper[index] += difference**2
    # print("=====================================")
    print(file)

print("done")

In [None]:
mean = mean_helper/num_points
std = np.sqrt(std_helper/num_points - mean**2)
mean = mean[1:]
std = std[1:]
min = min[1:]
max = max[1:]

### Barplots
Display barplots of ICESat biomass binned by GEDI biomass.

In [None]:
import matplotlib.pyplot as plt

# Calculate quartiles
q1 = mean - std
q3 = mean + std

gedi_bins = [0, 25, 50, 75, 100, 125, 150, 175, 200, 225, 250, 275, 300]

# Create boxplot
plt.figure(figsize = (8,5))
# plt.boxplot(mean)
# create stacked errorbars:
plt.errorbar(gedi_bins, mean, std, fmt='ok', lw=3)
plt.errorbar(gedi_bins, mean, [mean - min, max - mean],
             fmt='.k', ecolor='gray', lw=1)
xticks = ["" for i in range(len(gedi_bins))]
for i in range(len(gedi_bins)-1):
    xticks[i] = (str(gedi_bins[i]) + " - "+ str(gedi_bins[i+1]) )
xticks[-1] = ("300 - inf" )
plt.xticks(gedi_bins, labels=xticks, rotation=45)
plt.xlabel('GEDI AGBD')
plt.ylabel('GEDi - ICESat AGBD')
plt.grid(axis='y', linestyle='--', linewidth=0.4)
plt.show()



### Histograms

Here the user can visualize the distribution of the ICESat data for different GEDI agb bins.

In [None]:
import numpy as np
import matplotlib.pyplot as plt

# print(icesat_histograms.astype(int))

# Create subplots for histograms
fig, axs = plt.subplots(14, 2, figsize=(20, 60))

# Loop over each row in the arrays
for i in range(icesat_histograms.shape[0]):
    # Create a histogram for each row
    axs[i, 0].bar(height = icesat_histograms[i, :], x = np.arange(0,14), width=0.5)
    axs[i,0].set_title(f'ICESat Histogram for GEDI bin {i*25}')
    axs[i,0].set_xlabel('ICESat AGBD')
    axs[i,0].set_ylabel('Frequency')
    axs[i,0].set_xticks(np.arange(0,14))
    axs[i,0].set_xticklabels(np.arange(0,350,25))
    
    axs[i,1].bar(height = gedi_histograms[i, :], x = np.arange(0,10), width=0.5)
    axs[i,1].set_title(f'GEDI Histogram for GEDI bin {i*25}')
    axs[i,1].set_xlabel('GEDI AGBD')
    axs[i,1].set_ylabel('Frequency')
    axs[i,1].set_xticks(np.arange(0,10))
    axs[i,1].set_xticklabels(np.full(10,(i-1)*25)+ np.arange(2.5,27.5,2.5))

plt.subplots_adjust(hspace=0.5)
# Display the histograms
plt.show()

