# ICESat-2 Surface Estimation 

In [14]:
import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import h5py  
import os
import fiona
import geopandas
import time
from sklearn.neighbors import KernelDensity
import scipy
from astropy.time import Time

In [15]:
print(geopandas.__version__)

0.8.1


getATL03 function loads in raw ICESat-2 ATL03 data in .h5 format and outputs are geodataframe where each row represents a photon.

In [16]:
def getATL03(f,beam):
    # height of each received photon, relative to the WGS-84 ellipsoid (with some, not all corrections applied, see background info above)
    heights=f[beam]['heights']['h_ph'][:]
    # latitude (decimal degrees) of each received photon
    lats=f[beam]['heights']['lat_ph'][:]
    # longitude (decimal degrees) of each received photon
    lons=f[beam]['heights']['lon_ph'][:]
    # seconds from ATLAS Standard Data Product Epoch. use the epoch parameter to convert to gps time
    dt=f[beam]['heights']['delta_time'][:]
    # confidence level associated with each photon event
    # -2: TEP
    # -1: Events not associated with a specific surface type
    #  0: noise
    #  1: buffer but algorithm classifies as background
    #  2: low
    #  3: medium
    #  4: high
    # Surface types for signal classification confidence
    # 0=Land; 1=Ocean; 2=SeaIce; 3=LandIce; 4=InlandWater    
    conf=f[beam]['heights']['signal_conf_ph'][:,0] #choose column 2 for confidence of sea ice photons
    # number of ATL03 20m segments
    n_seg, = f[beam]['geolocation']['segment_id'].shape
    #GEOID
    geoid = f[beam]['geophys_corr']['geoid'][:]
    # first photon in the segment (convert to 0-based indexing)
    Segment_Index_begin = f[beam]['geolocation']['ph_index_beg'][:] - 1
    # number of photon events in the segment
    Segment_PE_count = f[beam]['geolocation']['segment_ph_cnt'][:]
    # along-track distance for each ATL03 segment
    Segment_Distance = f[beam]['geolocation']['segment_dist_x'][:]
    # along-track distance (x) for photon events
    x_atc = np.copy(f[beam]['heights']['dist_ph_along'][:])
    # cross-track distance (y) for photon events
    y_atc = np.copy(f[beam]['heights']['dist_ph_across'][:])

    for j in range(n_seg):
        # index for 20m segment j
        idx = Segment_Index_begin[j]
        # number of photons in 20m segment
        cnt = Segment_PE_count[j]
        # add segment distance to along-track coordinates
        x_atc[idx:idx+cnt] += Segment_Distance[j]
        #geoid
        #geoid[idx:idx+cnt] += geoid[j]


    df03=pd.DataFrame({'lats':lats,'lons':lons,'x':x_atc,'y':y_atc,'heights':heights,'dt':dt,'conf':conf})
    return df03

In [17]:
def load_ATL03(folderpath, filename, beam):
    for file in os.listdir(folderpath):
        if file.startswith(filename):
            fname = file
    f = h5py.File(folderpath+fname,'r')
    geoid = f[beam]['geophys_corr']['geoid'][:]
    #10m res
    geoid = list(np.repeat(geoid, 2))
    #100m res
    #geoid = geoid[::5]
    #geoid = geoid.tolist()
    df = getATL03(f,beam)
    df = df[df['conf'] > 2]
    df['AT_dist']=df.x-df.x.values[0]
    return df, geoid

Surf function estimates the surface height by grouping photons height values along-track, using kernel density to estimate surface height, and substracting the geoid. 

In [18]:
def Surf(dfATL03, window_width, geoid):
    startTime = time.time()
    
    # Along Track Distance Values
    AT_dist_values = dfATL03['AT_dist'].values
    AT_dist_totmax = AT_dist_values.max()
    AT_dist_totmin = AT_dist_values.min()
    
    #dfATL03 = dfATL03[dfATL03['conf'] > 2]
    #Heights
    heights_values = dfATL03['heights'].values
    conf_values = dfATL03['conf'].values

    #Coordinates
    lats = dfATL03['lats'].values
    lons = dfATL03['lons'].values
    
    #Moving Window
    windows = np.arange(AT_dist_totmin, AT_dist_totmax, 10).tolist()

    print('windows',len(windows))
    #print('df',dfATL03.shape)
    #print(dfATL03)
    print('geoid',len(geoid))
    print(geoid,'geoid vals')

    #Empty array for AT_dist_Values
    a = np.empty(len(windows))
    b = np.empty(len(windows))  
    c = np.empty(len(windows))  
    d = np.empty(len(windows))  
    
    i = 0
    #Iterate through rows in dataframe
    for window_center in windows:    

        #---------------
        #Step 1: Window Boundries
        #---------------

        #Get minimum window boundries
        min_dist = window_center - window_width
        min_dist_array = np.where(AT_dist_values > min_dist)
        min_dist_row = min_dist_array[0][0]

        #Get maximum window boundries
        max_dist = window_center + window_width
        if max_dist < AT_dist_values[-1]:
            max_dist_array = np.where(AT_dist_values > max_dist)
            max_dist_row = max_dist_array[0][0]
        else:
            max_dist = AT_dist_values[-1] 

        #Get window center lat & long for plotting later        
        idx = (np.abs(AT_dist_values - window_center)).argmin()
        lat = lats[idx]
        lon = lons[idx]

        #Select photons AT_dist & heights within boundries
        window_heights = heights_values[min_dist_row:max_dist_row]

        #print(len(window_heights), 'win len')
        #Only Search for surface if there is enough Photons in window
        if len(window_heights) > 5: #Normally 5
            
        
            binwidth = 0.2
            #Kernel Density Function
            kde = KernelDensity(kernel='gaussian', bandwidth=binwidth).fit(window_heights[:, None])
            kde_scores = kde.score_samples(window_heights[:, None])
            maxdensityI = np.max(kde_scores)
            max_index = np.where(kde_scores == maxdensityI)
            maxdensityH =  window_heights[max_index]
            Surface_Estimation = maxdensityH[0]
            a[i] = window_center #AT_DIST of window Center
            b[i] = Surface_Estimation
            c[i] = lat
            d[i] = lon
            
            i+=1

        else:
            a[i] = window_center
            b[i] = 0
            c[i] = lat
            d[i] = lon     
            i+=1

        #------------------
        #Step 7: Outputs
        #------------------

        wincen = int(window_center)
    
        if wincen % 5000 < 1:  
            m = int(i / len(windows) * 100)
            print(m, "% Done")
      
    #pd.set_option('display.max_colwidth', None)
    df = pd.DataFrame(a, columns = ['AT_dist']) 
    df["Surf_mean"] = b
    df["lats"] = c
    df["lons"] = d

    #Add geoid to correct surf height.
    if len(geoid) < len(df.index):
      while len(geoid) < len(df.index):
        geoid.append(geoid[-1])
      df["geoid"] = geoid

    elif len(geoid) > len(df.index):
       df["geoid"] = geoid[:len(df.index)]
      
    else:
      df["geoid"] = geoid

    #Estimate surface height
    df['Est_Surf'] = (df['Surf_mean'] - df['geoid']).round(2)
    # Script Runtime
    runTime =  int(time.time() - startTime)
    runTimeMin = runTime/60
    runTimeSec = runTime%60
    print("\nScript Runtime: %i minutes and %i seconds" % (runTimeMin,runTimeSec))
    
    return df


df_to_shp converts every estimated surface height to a point in a shapefile. These points are used further on for snow depth estimations

In [19]:
def df_to_shp(df,beam,out_folder):
    gdf = geopandas.GeoDataFrame(
    df, geometry=geopandas.points_from_xy(df.lons, df.lats))
    print('crs',gdf.crs)
    gdf = gdf.set_crs('EPSG:4326')
    print('crs',gdf.crs)
    gdf = gdf.to_crs(25833)
    print('crs',gdf.crs)
    #gdf.crs = {"init":"epsg:25833"}
    print('crs',gdf.crs)
    gdf.to_file(out_folder + beam + '1.shp')

In [20]:
Load_params = {
    'folderpath': 'C:/Users/Rasmu/Documents/Thesis/ATL03/',
#    'filename': 'processed_ATL03_20200510234741_06940705_004_01',
    #'filename': 'processed_ATL03_20200306025211_10750605_005_01.h5',
    #'filename': 'processed_ATL03_20200306025211_10750605_005_01_2.h5' #SD AOI 2
    #'filename': 'processed_ATL03_20201107024759_06710903_005_01.h5'
    'filename':'processed_ATL03_20200314023536_11970605_004_01.h5' #SD AOI 1
    }
out_folder = 'C:/Users/Rasmu/Documents/Thesis/Surface_Estimation/'
#Snow = 'processed_ATL03_20190512044917_06710303_004_01'
#NO_Snow = 'processed_ATL03_20201107024759_06710903_004_01'

In [21]:
def run(params, out_folder):
    #beamlist = ['gt1l','gt2l','gt3l','gt1r','gt2r','gt3r']
    beamlist = ['gt2l']
#    i=0
    for ISbeam in beamlist:
        params['beam'] = ISbeam
        df, geoid = load_ATL03(**params)
        df_surf = Surf(df, 10, geoid)
        df_surf = df_surf[df_surf['Surf_mean'] != 0]
        shp = df_to_shp(df_surf,params['beam'], out_folder)
        print(ISbeam)
    return df, df_surf
        #Visual(df, df_surf)

In [22]:
df, df_surf = run(Load_params,out_folder)

windows 12446
geoid 12432
[46.14752, 46.14752, 46.14632, 46.14632, 46.145164, 46.145164, 46.14401, 46.14401, 46.142853, 46.142853, 46.141655, 46.141655, 46.1405, 46.1405, 46.139343, 46.139343, 46.138187, 46.138187, 46.137028, 46.137028, 46.135834, 46.135834, 46.134674, 46.134674, 46.13352, 46.13352, 46.132362, 46.132362, 46.131165, 46.131165, 46.13001, 46.13001, 46.128857, 46.128857, 46.127697, 46.127697, 46.12654, 46.12654, 46.125385, 46.125385, 46.12423, 46.12423, 46.12303, 46.12303, 46.121918, 46.121918, 46.120716, 46.120716, 46.11956, 46.11956, 46.118404, 46.118404, 46.11725, 46.11725, 46.116093, 46.116093, 46.114937, 46.114937, 46.11374, 46.11374, 46.112587, 46.112587, 46.11143, 46.11143, 46.110275, 46.110275, 46.109123, 46.109123, 46.107925, 46.107925, 46.106815, 46.106815, 46.10566, 46.10566, 46.104504, 46.104504, 46.103348, 46.103348, 46.102196, 46.102196, 46.10104, 46.10104, 46.099888, 46.099888, 46.09869, 46.09869, 46.097538, 46.097538, 46.096386, 46.096386, 46.095234, 46.095

4 % Done
8 % Done
12 % Done
16 % Done
20 % Done
24 % Done
28 % Done
32 % Done
36 % Done
40 % Done
44 % Done
48 % Done
52 % Done
56 % Done
60 % Done
64 % Done
68 % Done
72 % Done
76 % Done
80 % Done
84 % Done
88 % Done
92 % Done
96 % Done

Script Runtime: 0 minutes and 35 seconds
crs None
crs EPSG:4326
crs epsg:25833
crs epsg:25833
gt2l


In [10]:
def Visual(df, df_surf):

  plt.figure(figsize=(16, 8))
  y1 = 'heights'
  y2 = 'Est_Surf'

  minVal1, maxVal1 = df[y1].min(skipna=True), df[y1].max()
  minVal2, maxVal2 = df_surf[y2].min(skipna=True), df_surf[y2].max()
  minVal3 = minVal2 - 10

  plt.scatter(df.AT_dist.values,df.heights.values,c='g',s=1)
  plt.scatter(df_surf.AT_dist.values,df_surf.Surf_mean.values,c='r',s=5)

  #plt.ylim(minVal1, maxVal1);
  #plt.xlim(df.AT_dist.values[0],df.AT_dist.values[-1]);
  plt.ylim(1350, 1450);
  plt.xlim(68000,70000);

In [11]:
#Visual(df, df_surf)

In [None]:
#df_surf

In [13]:
df_surf

Unnamed: 0,AT_dist,Surf_mean,lats,lons,geoid,Est_Surf,geometry
0,0.0,875.031555,60.614515,6.817700,46.147518,828.88,POINT (6.81770 60.61452)
1,10.0,874.976501,60.614433,6.817681,46.147518,828.83,POINT (6.81768 60.61443)
2,20.0,872.139404,60.614338,6.817660,46.146320,825.99,POINT (6.81766 60.61434)
3,30.0,871.004333,60.614249,6.817641,46.146320,824.86,POINT (6.81764 60.61425)
4,40.0,870.958313,60.614160,6.817620,46.145164,824.81,POINT (6.81762 60.61416)
...,...,...,...,...,...,...,...
12439,124390.0,273.907593,59.504029,6.588030,45.030010,228.88,POINT (6.58803 59.50403)
12440,124400.0,282.996704,59.503940,6.588015,45.030010,237.97,POINT (6.58801 59.50394)
12443,124430.0,375.758698,59.503662,6.587970,45.030010,330.73,POINT (6.58797 59.50366)
12444,124440.0,375.758698,59.503579,6.587957,45.030010,330.73,POINT (6.58796 59.50358)
