Find the residuals between ICESat-2 elevation measurements and a reference elevation (taken from IceBridge ATM flyover of Zachariae Isstrom)

Taryn Black, ICESat-2 Hackweek, June 17-21 2019

In [1]:
import numpy as np
import pandas as pd
from shapely.geometry import Point, Polygon
import matplotlib.pyplot as plt

# Set data location info

In [2]:
home_dir = "/home/jovyan/xtrak/"
cross_file = "data_prod/InterX_ATM2014_AllSmooth.csv"
ATM_year = '2014'

# Load ICESat-2 crossover points

ICESat-2 track points that were identified as crossing our OIB ATM reference track in Intersections.ipynb. This file contains both the ICESat-2 elevation data and the OIB ATM data at the crossover points.

In [3]:
cross_df = pd.read_csv(home_dir + cross_file, parse_dates=[4])

cross_df.head()
#cross_df.info()

Unnamed: 0,dist_along,ATM_elev,idx_ATM,z_ATL06,t_ATL06,idx_ATL06,gt_ATL06
0,122581.170689,45.04805,3130.0,42.769034,2018-10-18 15:53:52,617767.0,gt1l
1,122647.058967,44.7688,3132.0,42.156155,2018-10-18 15:53:52,621074.0,gt1r
2,125810.129706,36.67925,3229.0,34.448452,2018-10-18 15:53:52,624012.0,gt2l
3,125875.304455,36.28405,3231.0,32.827094,2018-10-18 15:53:52,626687.0,gt2r
4,80573.944294,295.0647,1901.0,,2018-10-21 05:21:45,205271.0,gt2r


# Drop NaNs

In [4]:
cross_df.dropna(axis=0, inplace=True)
cross_df.head()

Unnamed: 0,dist_along,ATM_elev,idx_ATM,z_ATL06,t_ATL06,idx_ATL06,gt_ATL06
0,122581.170689,45.04805,3130.0,42.769034,2018-10-18 15:53:52,617767.0,gt1l
1,122647.058967,44.7688,3132.0,42.156155,2018-10-18 15:53:52,621074.0,gt1r
2,125810.129706,36.67925,3229.0,34.448452,2018-10-18 15:53:52,624012.0,gt2l
3,125875.304455,36.28405,3231.0,32.827094,2018-10-18 15:53:52,626687.0,gt2r
5,77393.293766,269.7804,1803.0,255.108995,2018-10-21 05:21:45,205803.0,gt3r


# Add new time column to dataframe

For formatting reasons, we want time `t_ATL06` to be duplicated in a new column, with datetime64 format.

In [5]:
cross_df['t_YM'] = pd.to_datetime(cross_df['t_ATL06'], format="%Y/%m")
cross_df.head()

Unnamed: 0,dist_along,ATM_elev,idx_ATM,z_ATL06,t_ATL06,idx_ATL06,gt_ATL06,t_YM
0,122581.170689,45.04805,3130.0,42.769034,2018-10-18 15:53:52,617767.0,gt1l,2018-10-18 15:53:52
1,122647.058967,44.7688,3132.0,42.156155,2018-10-18 15:53:52,621074.0,gt1r,2018-10-18 15:53:52
2,125810.129706,36.67925,3229.0,34.448452,2018-10-18 15:53:52,624012.0,gt2l,2018-10-18 15:53:52
3,125875.304455,36.28405,3231.0,32.827094,2018-10-18 15:53:52,626687.0,gt2r,2018-10-18 15:53:52
5,77393.293766,269.7804,1803.0,255.108995,2018-10-21 05:21:45,205803.0,gt3r,2018-10-21 05:21:45


# Visual check of data

Plot reference elevation profile and crossover data points.

In [6]:
%matplotlib widget

plt.figure(figsize=(12,8))
ax1 = plt.scatter(cross_df['dist_along']/1000, cross_df['ATM_elev'], c='black')
ax2 = plt.scatter(cross_df['dist_along']/1000, cross_df['z_ATL06'], c=cross_df['t_ATL06'], s=12)
plt.xlabel('Distance along track (km)')
plt.ylabel('Elevation (m)')
plt.title('Elevation profiles from ATM and ICESat-2')
plt.colorbar(label='Time')
plt.legend(['%s ATM reference track' % ATM_year]);

FigureCanvasNbAgg()

# Calculate residual between ICESat-2 track crossovers and reference elevation profile

For each point in the ICESat-2 crossover dataframe, subtract the reference elevation at the same distance along-track.

$residual = z_{ICESat2} - z_{reference}$

Thus, positive values indicate an increase in elevation compared to the reference track, and negative values indicate a decrease in elevation.

In [7]:
cross_df['residuals'] = cross_df['z_ATL06'] - cross_df['ATM_elev']

cross_df.head()

Unnamed: 0,dist_along,ATM_elev,idx_ATM,z_ATL06,t_ATL06,idx_ATL06,gt_ATL06,t_YM,residuals
0,122581.170689,45.04805,3130.0,42.769034,2018-10-18 15:53:52,617767.0,gt1l,2018-10-18 15:53:52,-2.279016
1,122647.058967,44.7688,3132.0,42.156155,2018-10-18 15:53:52,621074.0,gt1r,2018-10-18 15:53:52,-2.612645
2,125810.129706,36.67925,3229.0,34.448452,2018-10-18 15:53:52,624012.0,gt2l,2018-10-18 15:53:52,-2.230798
3,125875.304455,36.28405,3231.0,32.827094,2018-10-18 15:53:52,626687.0,gt2r,2018-10-18 15:53:52,-3.456956
5,77393.293766,269.7804,1803.0,255.108995,2018-10-21 05:21:45,205803.0,gt3r,2018-10-21 05:21:45,-14.671405


Let's plot the residuals!

In [8]:
plt.figure(figsize=(12,4))
plt.scatter(cross_df['dist_along']/1000, cross_df['residuals'], c=cross_df['t_ATL06'])
plt.axhline(0, color='k', lw=0.5)
plt.xlabel('Distance along track (km)')
plt.ylabel('Elevation (m)')
plt.title('Elevation residuals \n (+)=raised, (-)=lowered')
plt.colorbar(label='Time');

FigureCanvasNbAgg()

# Export dataframe to CSV

Exported CSV file includes distance along track, ATM data (elevation, index), ATL06 data (elevation, index, time, groundtrack), and the residual (difference between ATL06 and ATM).

In [9]:
cross_df.to_csv(home_dir + "data_prod/residuals.csv")

# Residual statistics

Let's further explore what's going on in our elevation data.

## Variation in elevation changes, along-track

Bin the residuals by distance along track, and look at the standard deviation of residual values in each bin, using a box plot. Why do this? We hypothesize that we will see more elevation variation near the terminus than farther up-flow.

In [10]:
bin_min = np.around(np.floor(cross_df['dist_along'].min()/1000), -1)
bin_max = np.around(np.ceil(cross_df['dist_along'].max()/1000), -1)
bins = np.arange(bin_min,bin_max,10)
print(bins)

cross_df['dist_binned'] = pd.cut(cross_df['dist_along']/1000, bins)
cross_df.head()

[ 60.  70.  80.  90. 100. 110. 120.]


Unnamed: 0,dist_along,ATM_elev,idx_ATM,z_ATL06,t_ATL06,idx_ATL06,gt_ATL06,t_YM,residuals,dist_binned
0,122581.170689,45.04805,3130.0,42.769034,2018-10-18 15:53:52,617767.0,gt1l,2018-10-18 15:53:52,-2.279016,
1,122647.058967,44.7688,3132.0,42.156155,2018-10-18 15:53:52,621074.0,gt1r,2018-10-18 15:53:52,-2.612645,
2,125810.129706,36.67925,3229.0,34.448452,2018-10-18 15:53:52,624012.0,gt2l,2018-10-18 15:53:52,-2.230798,
3,125875.304455,36.28405,3231.0,32.827094,2018-10-18 15:53:52,626687.0,gt2r,2018-10-18 15:53:52,-3.456956,
5,77393.293766,269.7804,1803.0,255.108995,2018-10-21 05:21:45,205803.0,gt3r,2018-10-21 05:21:45,-14.671405,"(70.0, 80.0]"


In [11]:
cross_df.describe()

Unnamed: 0,dist_along,ATM_elev,idx_ATM,z_ATL06,idx_ATL06,residuals
count,183.0,183.0,183.0,183.0,183.0,183.0
mean,95976.285144,144.180045,2358.147541,134.242884,367851.021858,-9.937161
std,19367.491839,143.064628,569.891001,137.59821,206810.256049,17.105003
min,59166.875555,27.02515,1268.0,27.364034,1576.0,-67.94822
25%,82084.882919,27.517675,1948.0,30.591542,196334.0,-23.823591
50%,95717.578915,94.06105,2352.0,70.762263,376953.0,-5.569597
75%,112572.472862,263.5943,2849.0,240.270607,545953.0,1.104688
max,127486.938093,549.26845,3280.0,540.213215,716129.0,40.611603


In [12]:
crossbox = cross_df.boxplot(by='dist_binned', column='residuals')

FigureCanvasNbAgg()