This notebook contains code made for deriving snow depths from the ICESat-2 ATL03 Dataset. 

Workflow:

- Estimate surface height from ICESat-2 ATL03 data

Required input data: 

- ATL03 Trackline
- Digital Elevation Model
- Masks (Optional) 



In [115]:
## Package Import ##

import warnings
warnings.filterwarnings('ignore')
import numpy as np
import pandas as pd
import datetime as dt
import matplotlib.pyplot as plt
import h5py  
import os
import fiona
import geopandas
import time
from sklearn.neighbors import KernelDensity
import scipy
from astropy.time import Time
print(geopandas.__version__) # Version 0.8.1

import rasterio
import geopandas as gpd
import pyproj
import rasterio
import glob
from numpy import *

0.8.1


In [116]:
import rasterio
import glob
from numpy import *


getATL03 function loads in raw ICESat-2 ATL03 data in .h5 format and outputs are geodataframe where each row represents a photon.

In [117]:
def getATL03(f,beam):
    # height of each received photon, relative to the WGS-84 ellipsoid (with some, not all corrections applied, see background info above)
    heights=f[beam]['heights']['h_ph'][:]
    # latitude (decimal degrees) of each received photon
    lats=f[beam]['heights']['lat_ph'][:]
    # longitude (decimal degrees) of each received photon
    lons=f[beam]['heights']['lon_ph'][:]
    # seconds from ATLAS Standard Data Product Epoch. use the epoch parameter to convert to gps time
    dt=f[beam]['heights']['delta_time'][:]
    # confidence level associated with each photon event
    # -2: TEP
    # -1: Events not associated with a specific surface type
    #  0: noise
    #  1: buffer but algorithm classifies as background
    #  2: low
    #  3: medium
    #  4: high
    # Surface types for signal classification confidence
    # 0=Land; 1=Ocean; 2=SeaIce; 3=LandIce; 4=InlandWater    
    conf=f[beam]['heights']['signal_conf_ph'][:,0] #choose column 2 for confidence of sea ice photons
    # number of ATL03 20m segments
    n_seg, = f[beam]['geolocation']['segment_id'].shape
    #GEOID
    geoid = f[beam]['geophys_corr']['geoid'][:]
    # first photon in the segment (convert to 0-based indexing)
    Segment_Index_begin = f[beam]['geolocation']['ph_index_beg'][:] - 1
    # number of photon events in the segment
    Segment_PE_count = f[beam]['geolocation']['segment_ph_cnt'][:]
    # along-track distance for each ATL03 segment
    Segment_Distance = f[beam]['geolocation']['segment_dist_x'][:]
    # along-track distance (x) for photon events
    x_atc = np.copy(f[beam]['heights']['dist_ph_along'][:])
    # cross-track distance (y) for photon events
    y_atc = np.copy(f[beam]['heights']['dist_ph_across'][:])

    for j in range(n_seg):
        # index for 20m segment j
        idx = Segment_Index_begin[j]
        # number of photons in 20m segment
        cnt = Segment_PE_count[j]
        # add segment distance to along-track coordinates
        x_atc[idx:idx+cnt] += Segment_Distance[j]
        #geoid
        #geoid[idx:idx+cnt] += geoid[j]


    df03=pd.DataFrame({'lats':lats,'lons':lons,'x':x_atc,'y':y_atc,'heights':heights,'dt':dt,'conf':conf})
    return df03

In [118]:
def load_ATL03(path, date, beam):
    ATL03_path = path + date + '/ATL03/'
    for file in os.listdir(ATL03_path):
        if file.endswith('.h5'):
            fname = file
    f = h5py.File(ATL03_path+fname,'r')
    geoid = f[beam]['geophys_corr']['geoid'][:]
    #10m res
    geoid = list(np.repeat(geoid, 2))
    #100m res
    #geoid = geoid[::5]
    #geoid = geoid.tolist()
    df = getATL03(f,beam)
    df = df[df['conf'] > 2]
    df['AT_dist']=df.x-df.x.values[0]
    return df, geoid

Surf function estimates the surface height by grouping photons height values along-track, using kernel density to estimate surface height, and substracting the geoid. 

In [119]:
def Surf(dfATL03, window_width, geoid):
    startTime = time.time()
    
    # Along Track Distance Values
    AT_dist_values = dfATL03['AT_dist'].values
    AT_dist_totmax = AT_dist_values.max()
    AT_dist_totmin = AT_dist_values.min()
    
    #dfATL03 = dfATL03[dfATL03['conf'] > 2]
    #Heights
    heights_values = dfATL03['heights'].values
    conf_values = dfATL03['conf'].values

    #Coordinates
    lats = dfATL03['lats'].values
    lons = dfATL03['lons'].values
    
    #Moving Window
    windows = np.arange(AT_dist_totmin, AT_dist_totmax, 10).tolist()

    #print('df',dfATL03.shape)
    #print(dfATL03)

    #Empty array for AT_dist_Values
    a = np.empty(len(windows))
    b = np.empty(len(windows))  
    c = np.empty(len(windows))  
    d = np.empty(len(windows))  
    
    i = 0
    #Iterate through rows in dataframe
    for window_center in windows:    

        #---------------
        #Step 1: Window Boundries
        #---------------

        #Get minimum window boundries
        min_dist = window_center - window_width
        min_dist_array = np.where(AT_dist_values > min_dist)
        min_dist_row = min_dist_array[0][0]

        #Get maximum window boundries
        max_dist = window_center + window_width
        if max_dist < AT_dist_values[-1]:
            max_dist_array = np.where(AT_dist_values > max_dist)
            max_dist_row = max_dist_array[0][0]
        else:
            max_dist = AT_dist_values[-1] 

        #Get window center lat & long for plotting later        
        idx = (np.abs(AT_dist_values - window_center)).argmin()
        lat = lats[idx]
        lon = lons[idx]

        #Select photons AT_dist & heights within boundries
        window_heights = heights_values[min_dist_row:max_dist_row]

        #print(len(window_heights), 'win len')
        #Only Search for surface if there is enough Photons in window
        if len(window_heights) > 5: #Normally 5
            
        
            binwidth = 0.2
            #Kernel Density Function
            kde = KernelDensity(kernel='gaussian', bandwidth=binwidth).fit(window_heights[:, None])
            kde_scores = kde.score_samples(window_heights[:, None])
            maxdensityI = np.max(kde_scores)
            max_index = np.where(kde_scores == maxdensityI)
            maxdensityH =  window_heights[max_index]
            Surface_Estimation = maxdensityH[0]
            a[i] = window_center #AT_DIST of window Center
            b[i] = Surface_Estimation
            c[i] = lat
            d[i] = lon
            
            i+=1

        else:
            a[i] = window_center
            b[i] = 0
            c[i] = lat
            d[i] = lon     
            i+=1

        #------------------
        #Step 7: Outputs
        #------------------

        wincen = int(window_center)
    
        if wincen % 25000 < 1:  
            m = int(i / len(windows) * 100)
            print(m, "% Done")
      
    #pd.set_option('display.max_colwidth', None)
    df = pd.DataFrame(a, columns = ['AT_dist']) 
    df["Surf_mean"] = b
    df["lats"] = c
    df["lons"] = d

    #Add geoid to correct surf height.
    if len(geoid) < len(df.index):
      while len(geoid) < len(df.index):
        geoid.append(geoid[-1])
      df["geoid"] = geoid

    elif len(geoid) > len(df.index):
       df["geoid"] = geoid[:len(df.index)]
      
    else:
      df["geoid"] = geoid

    #Estimate surface height
    df['Est_Surf'] = (df['Surf_mean'] - df['geoid']).round(2)
    # Script Runtime
    runTime =  int(time.time() - startTime)
    runTimeMin = runTime/60
    runTimeSec = runTime%60
    print("\nScript Runtime: %i minutes and %i seconds" % (runTimeMin,runTimeSec))
    
    return df


df_to_shp converts every estimated surface height to a point in a shapefile. These points are used further on for snow depth estimations

In [120]:
def snow_depth(df, beam, DEM):
    
        filename = beam
        pts = df
        pts = geopandas.GeoDataFrame(
        pts, geometry=geopandas.points_from_xy(df.lons, df.lats))
            
        pts = pts.set_crs('EPSG:4326')
        pts = pts.to_crs(25833)       
        
        coords = [(x,y) for x, y in zip(pts.geometry.x, pts.geometry.y)]
        src = rasterio.open(DEM)

        pts['DEM'] = [x[0] for x in src.sample(coords)]
        
        #Norwegian DEM
        pts['SD'] = round(pts.Est_Surf - pts.DEM,2)
        
        #Arctic DEM
        #pts['SD'] = round(pts.Surf_mean - pts.DEM,2)
        
        print('Training Points: ',pts.shape[0])


        return pts
        

In [121]:
def Mask(df, path, date):
    
    pts = df
    coords = [(x,y) for x, y in zip(pts.geometry.x, pts.geometry.y)]

    SLOPE = path[:-7] + '/Masks/Slope.tif'
    Binary_Mask = path + date + '/Masks/Binary_Mask.tif'
    listofmasks=[SLOPE, Binary_Mask]
    listofmasksname=['SLOPE', 'Binary_Mask']
    
    i = 0
    for mask in listofmasks:
        src = rasterio.open(mask)
        pts[listofmasksname[i]] = [x[0] for x in src.sample(coords)]
        i=i+1
        
    
    #print(pts)
    print('Training Points: ',pts.shape[0])

    #Mask out points on slope over 8 degrees
    pts = pts[pts['SLOPE'] < 8]
    print('Slope: ',pts.shape[0])
    
    pts = pts[pts['Binary_Mask'] > 0]
    print('Binary Mask: ',pts.shape[0])


    #Mask out unrealistic snow depths
    #pts = pts[pts['SD'] < 8]
    #pts = pts[pts['SD'] > 0]
    print('Outliers: ',pts.shape[0])

    print('Training data points left after masking: ', pts.shape[0])
    
    return pts
        

In [122]:
def Mask_OLDVERSION(df, SLOPE, WATER, FC, WSC, SC):
    
    pts = df
    coords = [(x,y) for x, y in zip(pts.geometry.x, pts.geometry.y)]
    
    listofmasks=[SLOPE, WATER, FC, WSC, SC]
    listofmasksname=['SLOPE', 'WATER', 'FC', 'WSC', 'SC']
    
    i = 0
    for mask in listofmasks:
        src = rasterio.open(mask)
        pts[listofmasksname[i]] = [x[0] for x in src.sample(coords)]
        i=i+1
        
    
    #print(pts)
    print('Training Points: ',pts.shape[0])

    #Mask out points on slope over 8 degrees
    pts = pts[pts['SLOPE'] < 8]
    print('Slope: ',pts.shape[0])

    #Mask out points on water
    pts = pts[pts['WATER'] < 1]
    print('water: ',pts.shape[0])

    #Mask out points located in the forest
    pts = pts[pts['FC'] > 0]
    print('fc: ',pts.shape[0])

    #Mask out points located on wet snow (VH < -21)
    pts = pts[pts['WSC'] > 0]
    print('wsc: ',pts.shape[0])

    #Mask out unrealistic snow depths
    pts = pts[pts['SD'] < 10]
    pts = pts[pts['SD'] > -1]
    print('Outliers: ',pts.shape[0])

    print('Training data points left after masking: ', pts.shape[0])
    
    return pts
        
        
    
        
    

In [123]:
def Rescale(df, window_width):

        pts = df
        

            # Along Track Distance Values
        AT_dist_values = pts['AT_dist'].values
        AT_dist_totmax = AT_dist_values.max()
        AT_dist_totmin = AT_dist_values.min()

                    #Coordinates
        pts['lons'] = pts.geometry.x
        pts['lats']= pts.geometry.y
        
        
        lats = pts['lats'].values
        lons = pts['lons'].values

        SD_values = pts['SD'].values
        

            #Moving Window
        windows = np.arange(AT_dist_totmin, AT_dist_totmax, 1000).tolist()

        a = np.empty(len(windows))
        b = np.empty(len(windows))  
        c = np.empty(len(windows))  
        d = np.empty(len(windows))  

        i = 0

        for window_center in windows:

            #Get minimum window boundries
            min_dist = window_center - window_width
            min_dist_array = np.where(AT_dist_values > min_dist)
            min_dist_row = min_dist_array[0][0]

            #Get maximum window boundries
            max_dist = window_center + window_width
            if max_dist < AT_dist_values[-1]:
                max_dist_array = np.where(AT_dist_values > max_dist)
                max_dist_row = max_dist_array[0][0]
            else:
                max_dist = AT_dist_values[-1] 

            #Get window center lat & long for plotting later        
            idx = (np.abs(AT_dist_values - window_center)).argmin()
            lat = lats[idx]
            lon = lons[idx]
                #print('df',dfATL03.shape)

            window_SD = SD_values[min_dist_row:max_dist_row]
            #print('len',len(window_SD))
            if len(window_SD) > (5): #Normally 5
                SD = np.median(window_SD)


                a[i] = window_center #AT_DIST of window Center
                b[i] = SD
                c[i] = lat
                d[i] = lon

                i+=1

            else:
                a[i] = window_center
                b[i] = -999
                c[i] = lat
                d[i] = lon
                i+=1

        df = gpd.GeoDataFrame(a, columns = ['AT_dist']) 
        df["SD"] = b
        df["lats"] = c
        df["lons"] = d

        gdf = geopandas.GeoDataFrame(
        df, geometry=geopandas.points_from_xy(df.lons, df.lats))
        gdf = gdf.set_crs('EPSG:25833')
        gdf = gdf[gdf['SD'] > 0]
        gdf = gdf[gdf['SD'] < 10]
        gdf = gdf[gdf['SD'] != -999]
        #gdf.to_file(outdir + filename + '.shp')
        return gdf

In [124]:
def df_to_shp(df,beam, path, date):
    gdf = df
    if gdf.shape[0] > 0:
    #gdf = geopandas.GeoDataFrame(
    #df, geometry=geopandas.points_from_xy(df.lons, df.lats))
    #print('crs',gdf.crs)
    #gdf = gdf.set_crs('EPSG:4326')
    #print('crs',gdf.crs)
    #gdf = gdf.to_crs(25833)
    #print('crs',gdf.crs)
    #gdf.crs = {"init":"epsg:25833"}
    #print('crs',gdf.crs)
        outdir = path + date + '/SnowDepth/' + beam + '.shp'
        gdf.to_file(outdir)

In [125]:
def Sample(path, date):
    
    i=0
    SD_path = path + date + '/SnowDepth/'
    for file in os.listdir(SD_path):
        if file.endswith('shp'):

#            filename = file[3:7]
            points = os.path.join(SD_path,file)
            pts = gpd.read_file(points)
            
            if i != 0:
                df_train = pd.concat([df_train,pts])
            else: 
                df_train = pts
                i=i+1  
                
    pts = df_train
    coords = [(x,y) for x, y in zip(pts.geometry.x, pts.geometry.y)]
    
    #VH Diff: Substract of VH from fall baseline median to target date VH. (fx march)
    #Ratio: VH divided with VV (or other way around, forgot)
    #Subtract: VH subtract with VV (or other way around, forgot)
    VH = path + date + '/S1/VH.tif'
    Diff = path + date + '/S1/Diff.tif'
    Ratio = path + date + '/S1/Ratio.tif'
    Subtract = path + date + '/S1/Subtract.tif'
    DEM = path[:-7] + '/DEM/ArcticDEM_10m.tif'
    
    outdir = path + date + '/Sampled/SD.shp'
    
    listofmasks=[VH, Diff, Ratio, Subtract, DEM]
    listofmasksname=['VH', 'Diff', 'Ratio', 'Subtract','DEM']
    i = 0
    for mask in listofmasks:
        src = rasterio.open(mask)
        pts[listofmasksname[i]] = [x[0] for x in src.sample(coords)]
        i=i+1
    pts = pts.drop(columns=['lats', 'lons', 'AT_dist'])    
    pts.to_file(outdir)
    return pts

In [130]:
def run(params):

    #datelist = ['20200204','20200206','20200306','20200314','20200406','20200410','20210203','20210205','20210207','20210209','20210211','20210304','20210308']
    datelist = ['20210211','20210304','20210308']
    for dates in datelist:
        params['date'] = dates
        print('started processing of date: ', dates)
    
    
        beamlist = ['gt1l','gt2l','gt3l','gt1r','gt2r','gt3r']
        #beamlist = ['gt1l','gt2l']

    #    i=0
        for ISbeam in beamlist:
            params['beam'] = ISbeam
            df, geoid = load_ATL03(params['path'],params['date'], params['beam'])
            print('ATL03 Dataset loaded')

            df_surf = Surf(df, 10, geoid)
            df_surf = df_surf[df_surf['Surf_mean'] != 0]
            print('Surface Heights Estimated')

            df_SD = snow_depth(df_surf, params['beam'], params['DEM'])
            print('Snow Depths Estimated')

            df_Mask = Mask(df_SD, params['path'], params['date'])
            print('Masked out training points')

            df_RE = Rescale(df_Mask, 3000)
            print('Rescaled training points')

            shp = df_to_shp(df_RE,params['beam'], params['path'], params['date'])
            print('completed ICESat-2 Processing for beam: ',ISbeam)

            pts = Sample(params['path'], params['date'])
        
    return df, df_surf

In [131]:
Load_params = {
    'path' : 'C:/Users/Rasmu/Documents/Thesis/Hardangervidda/Dates/',
    'date' : '20200306',
    #'DEM': 'C:/Users/Rasmu/Documents/Thesis/DEM/ArcticDEM_10m.tif',
    'DEM': 'C:/Users/Rasmu/Documents/Thesis/DEM/mergedDEM10m_Hardanger.tif'
    }

In [132]:
#def Multitemporal_run(path):
    

In [133]:
df, df_surf = run(Load_params)

started processing of date:  20210211
ATL03 Dataset loaded
0 % Done
25 % Done
51 % Done
76 % Done

Script Runtime: 0 minutes and 3 seconds
Surface Heights Estimated
Training Points:  3080
Snow Depths Estimated
Training Points:  3080
Slope:  1787
Binary Mask:  567
Outliers:  567
Training data points left after masking:  567
Masked out training points
Rescaled training points
completed ICESat-2 Processing for beam:  gt1l
ATL03 Dataset loaded
0 % Done
25 % Done
50 % Done
76 % Done

Script Runtime: 0 minutes and 6 seconds
Surface Heights Estimated
Training Points:  4801
Snow Depths Estimated
Training Points:  4801
Slope:  3187
Binary Mask:  1590
Outliers:  1590
Training data points left after masking:  1590
Masked out training points
Rescaled training points
completed ICESat-2 Processing for beam:  gt2l
ATL03 Dataset loaded
0 % Done
25 % Done
50 % Done
76 % Done

Script Runtime: 0 minutes and 5 seconds
Surface Heights Estimated
Training Points:  3975
Snow Depths Estimated
Training Points: 