In [20]:
import os,glob
import h5py
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point, Polygon
data_dir = '/home/jovyan/ATL06/Byrd_Glacier_rel001/'
%matplotlib widget

In [2]:

import numpy as np
import h5py


def ATL06_to_dict(filename, dataset_dict):
    """
        Read selected datasets from an ATL06 file

        Input arguments:
            filename: ATl06 file to read
            dataset_dict: A dictinary describing the fields to be read
                    keys give the group names to be read, 
                    entries are lists of datasets within the groups
        Output argument:
            D6: dictionary containing ATL06 data.  Each dataset in 
                dataset_dict has its own entry in D6.  Each dataset 
                in D6 contains a list of numpy arrays containing the 
                data
    """
    
    D6=[]
    pairs=[1, 2, 3]
    beams=['l','r']
    # open the HDF5 file
    with h5py.File(filename) as h5f:
        # loop over beam pairs
        for pair in pairs:
            # loop over beams
            for beam_ind, beam in enumerate(beams):
                # check if a beam exists, if not, skip it
                if '/gt%d%s/land_ice_segments' % (pair, beam) not in h5f:
                    continue
                # loop over the groups in the dataset dictionary
                temp={}
                for group in dataset_dict.keys():
                    for dataset in dataset_dict[group]:
                        DS='/gt%d%s/%s/%s' % (pair, beam, group, dataset)
                        # since a dataset may not exist in a file, we're going to try to read it, and if it doesn't work, we'll move on to the next:
                        try:
                            temp[dataset]=np.array(h5f[DS])
                            # some parameters have a _FillValue attribute.  If it exists, use it to identify bad values, and set them to np.NaN
                            if '_FillValue' in h5f[DS].attrs:
                                fill_value=h5f[DS].attrs['_FillValue']
                                bad = temp[dataset]==fill_value
                                temp[dataset]=np.float64(temp[dataset])
                                temp[dataset][bad]=np.NaN
                        except KeyError as e:
                            pass
                if len(temp) > 0:
                    # it's sometimes convenient to have the beam and the pair as part of the output data structure: This is how we put them there.
                    temp['pair']=np.zeros_like(temp['h_li'])+pair
                    temp['beam']=np.zeros_like(temp['h_li'])+beam_ind
                    temp['filename']=filename
                    D6.append(temp)
    return D6

In [3]:
dataset_dict={'land_ice_segments':['h_li', 'delta_time','longitude','latitude'], 'land_ice_segments/ground_track':['x_atc']}

In [7]:
!ls $data_dir

processed_ATL06_20181014055428_02380111_001_01.h5
processed_ATL06_20181014165430_02450111_001_01.h5
processed_ATL06_20181015052849_02530111_001_01.h5
processed_ATL06_20181016160311_02750111_001_01.h5
processed_ATL06_20181017061148_02840111_001_01.h5
processed_ATL06_20181018054607_02990111_001_01.h5
processed_ATL06_20181019052028_03140111_001_01.h5
processed_ATL06_20181019162030_03210111_001_01.h5
processed_ATL06_20181020155451_03360111_001_01.h5
processed_ATL06_20181021060328_03450111_001_01.h5
processed_ATL06_20181022053749_03600111_001_01.h5
processed_ATL06_20181023051210_03750111_001_01.h5
processed_ATL06_20181023161212_03820111_001_01.h5
processed_ATL06_20181025055510_04060111_001_01.h5
processed_ATL06_20181026052931_04210111_001_01.h5
processed_ATL06_20181027050352_04360111_001_01.h5
processed_ATL06_20181027160354_04430111_001_01.h5
processed_ATL06_20181028153814_04580111_001_01.h5
processed_ATL06_20181029054650_04670111_001_01.h5
processed_ATL06_20181029151235_04730111_001_01.h5


In [60]:
def point_covert(row):
    geom = Point(row['longitude'],row['latitude'])
    return geom

In [77]:
def ATL06_2_gdf(ATL06_fn,dataset_dict):
    """
    function to convert ATL06 hdf5 to geopandas dataframe, containing columns as passed in dataset dict
    Used Ben's ATL06_to_dict function
    """
    if ('latitude' in dataset_dict['land_ice_segments']) != True:
        dataset_dict['land_ice_segments'].append('latitude')
    if ('longitude' in dataset_dict['land_ice_segments']) != True:
        dataset_dict['land_ice_segments'].append('longitude')
    #use Ben's Scripts to convert to dict
    data_dict = ATL06_to_dict(ATL06_fn,dataset_dict)
    #this will give us 6 tracks
    i = 0
    for track in data_dict:
        #1 track
        #convert to datafrmae
        df = pd.DataFrame(track)
        df['p_b'] = str(track['pair'][0])+'_'+str(track['beam'][0])
        df['geometry'] = df.apply(point_covert,axis=1)
        if i==0:
            df_final = df.copy()
        else:
            df_final = df_final.append(df)
        i = i+1
    gdf_final = gpd.GeoDataFrame(df_final,geometry='geometry',crs={'init':'epsg:4326'})
    return gdf_final
            
            
        

In [71]:
gdf = ATL06_2_gdf(ATL06_file[0],dataset_dict)

In [76]:
gdf.head()

Unnamed: 0,h_li,delta_time,longitude,latitude,x_atc,pair,beam,filename,p_b,geometry
0,1921.290527,26581670.0,151.438521,-80.000145,29004790.0,1.0,0.0,/home/jovyan/ATL06/Byrd_Glacier_rel001/process...,1.0_0.0,POINT (151.4385212658966 -80.00014491802949)
1,1920.874512,26581670.0,151.438268,-80.000318,29004810.0,1.0,0.0,/home/jovyan/ATL06/Byrd_Glacier_rel001/process...,1.0_0.0,POINT (151.4382675438711 -80.00031782037955)
2,1920.635742,26581670.0,151.438019,-80.000487,29004830.0,1.0,0.0,/home/jovyan/ATL06/Byrd_Glacier_rel001/process...,1.0_0.0,POINT (151.4380186679483 -80.00048725343832)
3,1920.423218,26581670.0,151.43779,-80.000643,29004850.0,1.0,0.0,/home/jovyan/ATL06/Byrd_Glacier_rel001/process...,1.0_0.0,POINT (151.4377904719828 -80.00064290628099)
4,1920.280273,26581670.0,151.437543,-80.000812,29004870.0,1.0,0.0,/home/jovyan/ATL06/Byrd_Glacier_rel001/process...,1.0_0.0,POINT (151.4375428271333 -80.00081213888039)


In [75]:
#plot the data
fig,ax = plt.subplots(figsize=(10,10))
world.plot(ax=ax,facecolor = 'lightgray', edgecolor = 'gray')
colors = {'1.0_0.0':'violet','1.0_1.0':'blue','2.0_0.0':'green','2.0_1.0':'yellow','3.0_0.0':'orange','3.0_1.0':'red'}
gdf.plot(ax=ax,c=gdf['p_b'].apply(lambda x:colors[x]))

FigureCanvasNbAgg()

<matplotlib.axes._subplots.AxesSubplot at 0x7fbcfaaeb8d0>

In [78]:
#save file to shapefile
gdf.to_file(os.path.splitext(ATL06_file[0])[0]+'.gpkg',driver='GPKG')