# ATL08 data processing

Extract the desired data from ICESat-2 ATL08 data product, the data are then organize them in a dataframe for each ground track.

Define some of the variables to store input/output data.

In [1]:
# Data extraction from ATL08 product (HDF5 formatted files)
ATL08_input_path = '/home/bodo/Dropbox/soft/github/ICESat-2_SVDA/ATL08_example_data/ATL08_*.h5'

# Output
ATL08_output_path = '/home/bodo/Dropbox/soft/github/ICESat-2_SVDA/ATL08_example_data/hdf'

#Region of interest to be clipped from ATL08 file:
ROI_fname = '/home/bodo/Dropbox/soft/github/ICESat-2_SVDA/ATL08_example_data/ROI_westernNamibia.shp'

EPSG_Code='epsg:32733'

In [2]:
import os, h5py, glob, sys
import pandas as pd
import numpy as np
import geopandas as gp
from pyproj import Transformer

sys.path.append('/home/bodo/Dropbox/soft/github/ICESat-2_SVDA/python')

from SVDA_helper_functions import *

In [3]:
ATL08_files = list(glob.glob(ATL08_input_path))
for f in ATL08_files:
    ATL08 = h5py.File(f,'r')

    # Retrieving the six beams names
    gtr = [g for g in ATL08.keys() if g.startswith('gt')]

    ATL08_objs = []
    # Retrieve list of all datasets
    ATL08.visit(ATL08_objs.append)   
    ATL08_SDS = [o for o in ATL08_objs if isinstance(ATL08[o], h5py.Dataset)]  

    h_te_median, night_flag, h_te_best_fit,dem_h, h_canopy,h_dif_ref, landsat_flag, Lat, \
    Lon,canopy_h_metr_25, canopy_h_metr_95, canopy_flag, canopy_rh_conf, h_max_canopy, \
    h_canopy_abs, n_ca_photons, layer_flag, terrain_slope = ([] for i in range(18))

    c = ATL08
    for b in gtr:
        [h_te_median.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/h_te_median') and b in g][0]][()]]
        [h_te_best_fit.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/h_te_best_fit') and b in g][0]][()]]
        [h_canopy.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/h_canopy') and b in g][0]][()]]   
        [Lat.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/latitude') and b in g][0]][()]]  
        [Lon.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/longitude') and b in g][0]][()]]  
        [canopy_h_metr_25.append(h[0]) for h in c[[g for g in ATL08_SDS if g.endswith('/canopy_h_metrics') and 
                                                   b in g][0]][()]]  
        [canopy_h_metr_95.append(h[8]) for h in c[[g for g in ATL08_SDS if g.endswith('/canopy_h_metrics') and 
                                                   b in g][0]][()]]  
        [canopy_flag.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/canopy_flag') and b in g][0]][()]]  
        [landsat_flag.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/landsat_flag') and b in g][0]][()]] 
        [canopy_rh_conf.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/canopy_rh_conf') and b in g][0]][()]]
        [h_max_canopy.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/h_max_canopy') and b in g][0]][()]]
        [h_canopy_abs.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/h_canopy_abs') and b in g][0]][()]]
        [h_dif_ref.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/h_dif_ref') and b in g][0]][()]]
        [dem_h.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/dem_h') and b in g][0]][()]]
        [terrain_slope.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/terrain_slope') and b in g][0]][()]]
        [night_flag.append(h) for h in c[[g for g in ATL08_SDS if g.endswith('/night_flag') and b in g][0]][()]]

    # Convert lists to Pandas dataframe
    ATL08_df = pd.DataFrame({'Latitude': Lat, 'Longitude': Lon, 'Terrain Height (m)': h_te_median, 
                             'Terrain Elevation best fit': h_te_best_fit, 'h_canopy_abs': h_canopy_abs, 
                             'Terrain_Slope': terrain_slope,'h_max_canopy': h_max_canopy, 'Canopy height (m)': h_canopy, 
                             'RH 95': canopy_h_metr_95, 'RH 25': canopy_h_metr_25, 'Canopy Flag': canopy_flag,
                             'landsat_flag': landsat_flag, 'canopy_rh_conf': canopy_rh_conf, 'night_flag':night_flag})

    # Transformation to UTM coordinate system
    x, y = np.array(ATL08_df['Longitude']), np.array(ATL08_df['Latitude'])
    transformer = Transformer.from_crs('epsg:4326', EPSG_Code, always_xy=True)
    xx, yy = transformer.transform(x, y)

    # Save the UTM coordinates into the dataframe
    ATL08_df['Easting'] = xx 
    ATL08_df['Northing'] = yy

    ATL08_df, rotation_data = get_atl_alongtrack(ATL08_df)
    # Photons and the study area intersections
    ROI = gp.GeoDataFrame.from_file(ROI_fname, crs='EPSG:4326')

    minLon, minLat, maxLon, maxLat = ROI.envelope[0].bounds
    ATL08_df = ATL08_df.where(ATL08_df['Latitude'] > minLat)
    ATL08_df = ATL08_df.where(ATL08_df['Latitude'] < maxLat)
    ATL08_df = ATL08_df.where(ATL08_df['Longitude'] > minLon)
    ATL08_df = ATL08_df.where(ATL08_df['Longitude'] < maxLon)
    ATL08_df = ATL08_df.dropna()
    ATL08.close()

    # save the dataframe into csv file (comment out to save to csv)
    # ATL08_df.to_csv(os.path.join(output_path,r'{}.csv'.format(fid[23:-3])), header=True)
    #save dataframe to compressed HDF file
    if not os.path.exists(ATL08_output_path):
        os.mkdir(ATL08_output_path)
    ATL08_df.to_hdf(os.path.join(ATL08_output_path,r'{}.hdf'.format(os.path.basename(f)[0:14])), 
                    key='ATL08_df_%s'%os.path.basename(f)[0:14], complevel=7)
    

  return f(*args, **kwds)


# Use plotly to visualize ATL08 data

There are no vegetation photons in these ATL08 data.

In [10]:
import plotly.graph_objects as go
ATL08_files = glob.glob(os.path.join(ATL08_output_path, 'ATL08_*.hdf'))
ATL08_files.sort()
df_ATL08 = pd.read_hdf(ATL08_files[0], mode='r')

df_ATL08_canopy_df = df_ATL08[df_ATL08['Canopy height (m)'] < 1e4]
fig = go.Figure()
ATL08_terrain_data = go.Scatter3d(name='All ATL08',
    x=df_ATL08['Easting'], y=df_ATL08['Northing'], z=df_ATL08['Terrain Elevation best fit'],
    mode='markers',
    marker=dict(
        size=1,
        color='black',                # set color to an array/list of desired values
        opacity=0.8
    )
)

ATL08_canopy_data = go.Scatter3d(name='All ATL08',
    x=df_ATL08_canopy_df['Easting'], y=df_ATL08_canopy_df['Northing'], z=df_ATL08_canopy_df['Canopy height (m)'],
    mode='markers',
    marker=dict(
        size=1,
        color=df_ATL08_canopy_df['Canopy height (m)'],                # set color to an array/list of desired values
        colorscale='Viridis',   # choose a colorscale
        opacity=0.8
    )
)

fig.add_trace(ATL08_terrain_data)
fig.add_trace(ATL08_canopy_data)

# tight layout
fig.update_layout(margin=dict(l=0, r=0, b=0, t=0), title='Land, Ground, and Preliminary Canopy')
fig.show()
