In [21]:
import os
import h5py
from glob import glob
import numpy as np
import matplotlib.pyplot as plt
import scipy.signal
data_dir='ATL06/ATL06'

import os
import sys
module_path = os.path.abspath(os.path.join('..'))
if module_path not in sys.path:
    sys.path.append(module_path)


# make sure we're dealing with the most recent version of any code we're using
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


<font size="5">Analysis of clouds over outlet glaciers</font> <br>
Plot of ATL06 tracks after file transfers using aws

In [22]:
import numpy as np
import h5py


def ATL06_to_dict(filename, dataset_dict):
    """
        Read selected datasets from an ATL06 file

        Input arguments:
            filename: ATl06 file to read
            dataset_dict: A dictinary describing the fields to be read
                    keys give the group names to be read, 
                    entries are lists of datasets within the groups
        Output argument:
            D6: dictionary containing ATL06 data.  Each dataset in 
                dataset_dict has its own entry in D6.  Each dataset 
                in D6 contains a list of numpy arrays containing the 
                data
    """
    
    D6=[]
    pairs=[1, 2, 3]
    beams=['l','r']
    # open the HDF5 file
    with h5py.File(filename) as h5f:
        # loop over beam pairs
        for pair in pairs:
            # loop over beams
            for beam_ind, beam in enumerate(beams):
                # check if a beam exists, if not, skip it
                if '/gt%d%s/land_ice_segments' % (pair, beam) not in h5f:
                    continue
                # loop over the groups in the dataset dictionary
                temp={}
                for group in dataset_dict.keys():
                    for dataset in dataset_dict[group]:
                        DS='/gt%d%s/%s/%s' % (pair, beam, group, dataset)
                        # since a dataset may not exist in a file, we're going to try to read it, and if it doesn't work, we'll move on to the next:
                        try:
                            temp[dataset]=np.array(h5f[DS])
                            # some parameters have a _FillValue attribute.  If it exists, use it to identify bad values, and set them to np.NaN
                            if '_FillValue' in h5f[DS].attrs:
                                fill_value=h5f[DS].attrs['_FillValue']
                                temp[dataset][temp[dataset]==fill_value]=np.NaN
                        except KeyError as e:
                            pass
                if len(temp) > 0:
                    # it's sometimes convenient to have the beam and the pair as part of the output data structure: This is how we put them there.
                    temp['pair']=np.zeros_like(temp['h_li'])+pair
                    temp['beam']=np.zeros_like(temp['h_li'])+beam_ind
                    temp['filename']=filename
                    D6.append(temp)
    return D6

In [3]:
dataset_dict={'land_ice_segments':['h_li', 'delta_time','longitude','latitude'], 'land_ice_segments/ground_track':['x_atc']}

<font size ="5"> Interpolation and Filtering</font> <br>
From the analysis of individual tracks, there are several NaNs or missing values from each ICESat2 tracks. There are several steps from which we try to fix the NaNs and smooth the elevation data. First, we interpolate the NaNs from the dataframe using Pandas. Then, we use a median filter with 1$\sigma$ and window size to extrapolate or smoothen the time series. This is essestial to compare the filtered ICESat2 tracks with ATM tracks. 

In [11]:
# NOTE: From now on, we'll be working in matplotlib's widget mode, which lets us zoom in on our plots.  
# This means that the figures won't be rendered in the notebook until you run them.
# That means no more spoiler plots (for now...)
%matplotlib widget


data_dir='/home/jovyan/ATL06_data/ATL06/'
D6=[]
pairs=[1, 2, 3]
beams=['l','r']

files=glob(data_dir+'/*.h5')
for file in files:
    this_name=os.path.basename(file)
    D6 += ATL06_to_dict(file, dataset_dict)
print("read %d beam/pair combinations" % (len(D6)))
print(dataset_dict.keys())
# now plot the results:
plt.figure();
for Di in D6:
    plt.scatter(Di['longitude'], Di['latitude'], c=Di['h_li'], vmin=0, vmax=2000, linewidth=0)
plt.xlabel('longitude')
plt.ylabel('latitude')
plt.colorbar()

read 54 beam/pair combinations
dict_keys(['land_ice_segments', 'land_ice_segments/ground_track'])


FigureCanvasNbAgg()

<matplotlib.colorbar.Colorbar at 0x7f89be011ef0>

In [20]:
import pandas as pd
dataset_dict={'land_ice_segments':['latitude','longitude','h_li'], 'land_ice_segments/ground_track':['x_atc']}
print(files[0:5])
%matplotlib widget
for file in files[0:5]:
    this_D6=ATL06_to_dict(file, dataset_dict)
    plt.figure(figsize=(5,4))
    initial_plot = plt.plot(this_D6[1]['x_atc'], this_D6[1]['h_li'],'.')
    plt.title(this_D6[1]['filename'])
    
    a = this_D6[1]['x_atc'];
    b = this_D6[1]['h_li'];
    d = {'col1':a,'col2':b}
    df = pd.DataFrame(data=d)
    NaN_count = df.count()
    print(NaN_count)
    #print(df)
    #df_median = df.rolling(21).median()
    #x = df.plot(x='col1',y='col2')
    #df_median.plot(x='col1',y='col2',ax = x,figsize=(5,4))
    
    # Interpolate the missing NaN in the track
    df1 = df.interpolate(method ='linear', limit_direction ='forward') 
    NaN_count1 = df1.count()
    print(NaN_count1)
    #df1.plot(x='col1',y='col2',figsize=(5,4))
    
    #Filtering
    window = 121
    df1['median']= df1['col2'].rolling(window).median()
    df1['std'] = df1['col2'].rolling(window).std()

    #filter setup
    df1[(df1.col2 > df1['median']+1*df1['std']) | (df1.col2 < df1['median']-1*df1['std'])] = np.NaN
    
    df2 = df1.interpolate(method ='linear', limit_direction ='forward') 
    NaN_count3 = df2.count()
    print(NaN_count3)
    #df1.plot(x='col1',y='col2',figsize=(5,4))
    
    x1 = df.plot(x='col1',y='col2',figsize=(5,4))
    df2.plot(x='col1',y='col2',figsize=(5,4),ax=x1)
    NaN_count2 = df2.count()
    print(NaN_count2)
    

['/home/jovyan/ATL06_data/ATL06/processed_ATL06_20181206134817_10530103_001_01.h5', '/home/jovyan/ATL06_data/ATL06/processed_ATL06_20181206025850_10460105_001_01.h5', '/home/jovyan/ATL06_data/ATL06/processed_ATL06_20181128140455_09310103_001_01.h5', '/home/jovyan/ATL06_data/ATL06/processed_ATL06_20181222022533_12900105_001_01.h5', '/home/jovyan/ATL06_data/ATL06/processed_ATL06_20181214133137_11750103_001_01.h5']


FigureCanvasNbAgg()

col1    7121
col2    7107
dtype: int64
col1    7121
col2    7121
dtype: int64
col1      7121
col2      7121
median    6996
std       6996
dtype: int64


FigureCanvasNbAgg()

col1      7121
col2      7121
median    6996
std       6996
dtype: int64


FigureCanvasNbAgg()

col1    5971
col2    5954
dtype: int64
col1    5971
col2    5971
dtype: int64
col1      5971
col2      5971
median    5851
std       5851
dtype: int64


FigureCanvasNbAgg()

col1      5971
col2      5971
median    5851
std       5851
dtype: int64


FigureCanvasNbAgg()

col1    6832
col2    6809
dtype: int64
col1    6832
col2    6832
dtype: int64
col1      6832
col2      6832
median    6712
std       6712
dtype: int64


FigureCanvasNbAgg()

col1      6832
col2      6832
median    6712
std       6712
dtype: int64


FigureCanvasNbAgg()

col1    6460
col2    6441
dtype: int64
col1    6460
col2    6459
dtype: int64
col1      6460
col2      6459
median    6283
std       6283
dtype: int64


FigureCanvasNbAgg()

col1      6460
col2      6459
median    6283
std       6283
dtype: int64


FigureCanvasNbAgg()

col1    4124
col2    4124
dtype: int64
col1    4124
col2    4124
dtype: int64
col1      4124
col2      4124
median    3987
std       3987
dtype: int64


FigureCanvasNbAgg()

col1      4124
col2      4124
median    3987
std       3987
dtype: int64


<font size="5">Code to read the data from Evan's folder</font>


In [None]:
# NOTE: From now on, we'll be working in matplotlib's widget mode, which lets us zoom in on our plots.  
# This means that the figures won't be rendered in the notebook until you run them.
# That means no more spoiler plots (for now...)
%matplotlib widget


data_dir='/home/jovyan/xtrak/data/'
D6=[]
pairs=[1, 2, 3]
beams=['l','r']

files=glob(data_dir+'/*.h5')
for file in files:
    this_name=os.path.basename(file)
    D6 += ATL06_to_dict(file, dataset_dict)
print("read %d beam/pair combinations" % (len(D6)))

# now plot the results:
plt.figure();
for Di in D6:
    plt.scatter(Di['longitude'], Di['latitude'], c=Di['h_li'], vmin=0, vmax=2000, linewidth=0)
plt.xlabel('longitude')
plt.ylabel('latitude')
plt.colorbar()

Now, lets look at few granules from a file

In [None]:
dataset_dict={'land_ice_segments':['latitude','longitude','h_li'], 'land_ice_segments/ground_track':['x_atc']}

%matplotlib widget
for file in files[9:15]:
    this_D6=ATL06_to_dict(file, dataset_dict)
    plt.figure()
    plt.plot(this_D6[1]['x_atc'], this_D6[1]['h_li'],'.')
    plt.title(this_D6[1]['filename'])