### SEGTRAX ATL10 read in

In [84]:
# Import the functions from Amy's NSIDC DAAC notebook

import requests
import getpass
import socket
import json
import zipfile
import io
import math
import os
import shutil
import pprint
import time
import geopandas as gpd
import matplotlib.pyplot as plt
import fiona
import h5py
import re
# To read KML files with geopandas, we will need to enable KML support in fiona (disabled by default)
fiona.drvsupport.supported_drivers['LIBKML'] = 'rw'
from shapely.geometry import Polygon, mapping
from shapely.geometry.polygon import orient
from statistics import mean
from requests.auth import HTTPBasicAuth

### Accessing data from the NSIDC DAAC with the code from Amy's tutorial

Need to generate a token to access the Earthdata login credentials and apply to future queries.

In [85]:
# Create a token
# Earthdata Login credentials
# Enter your Earthdata Login user name
uid = 'davidbabb'
# Enter your email address associated with your Earthdata Login account
email = 'david.babb@umanitoba.ca'
pswd = getpass.getpass('Earthdata Login password:')

Earthdata Login password: ·········


In [86]:
# Request token from Common Metadata Repository using Earthdata credentials
token_api_url = 'https://cmr.earthdata.nasa.gov/legacy-services/rest/tokens'
hostname = socket.gethostname()
ip = socket.gethostbyname(hostname)

data = {
    'token': {
        'username': uid,
        'password': pswd,
        'client_id': 'NSIDC_client_id',
        'user_ip_address': ip
    }
}
headers={'Accept': 'application/json'}
response = requests.post(token_api_url, json=data, headers=headers)
token = json.loads(response.content)['token']['id']
print(token)
#4351E4A7-9907-AEA1-82B3-FE2A36C88659

0A5D25AF-2421-2A2A-B9B8-2955A319E87E


### Select the dataset of interest and queiry the available files

In [87]:
# Input data set ID (e.g. ATL07) of interest here, also known as "short name".
# ATL07 - Sea ice height - https://nsidc.org/data/atl07
# ATL10 - Sea ice freeboard - https://nsidc.org/data/atl10

short_name = 'ATL10'

# Determine the number and size of granules available within a time range and location

Collect the metadata

In [88]:
# Get json response from CMR collection metadata and print results. This provides high-level metadata on a data set or "collection", provide in json format.

params = {
    'short_name': short_name
}

cmr_collections_url = 'https://cmr.earthdata.nasa.gov/search/collections.json'
response = requests.get(cmr_collections_url, params=params)
results = json.loads(response.content)
pprint.pprint(results)

{'feed': {'entry': [{'archive_center': 'NASA NSIDC DAAC',
                     'associations': {'services': ['S1568899363-NSIDC_ECS',
                                                   'S1613689509-NSIDC_ECS',
                                                   'S1613669681-NSIDC_ECS']},
                     'boxes': ['-90 -180 90 180'],
                     'browse_flag': False,
                     'coordinate_system': 'CARTESIAN',
                     'data_center': 'NSIDC_ECS',
                     'dataset_id': 'ATLAS/ICESat-2 L3A Sea Ice Freeboard V001',
                     'has_formats': True,
                     'has_spatial_subsetting': True,
                     'has_temporal_subsetting': True,
                     'has_transforms': False,
                     'has_variables': True,
                     'id': 'C1511851914-NSIDC_ECS',
                     'links': [{'href': 'https://n5eil01u.ecs.nsidc.org/ATLAS/ATL10.001/',
                                'hreflang': 'en-US',

There may be cases with various versions of the data

In [89]:
# Find all instances of 'version_id' in metadata and print most recent version number

versions = [i['version_id'] for i in results['feed']['entry']]
latest_version = max(versions)
print(latest_version)

001


#### Now set the temporal range to search

In [90]:
# Input temporal range 

# Input start date in yyyy-MM-dd format
start_date = '2018-11-01'
# Input start time in HH:mm:ss format
start_time = '00:00:00'
# Input end date in yyyy-MM-dd format
end_date = '2018-11-07'
# Input end time in HH:mm:ss format
end_time = '23:59:59'

temporal = start_date + 'T' + start_time + 'Z' + ',' + end_date + 'T' + end_time + 'Z'
print(temporal)

2018-11-01T00:00:00Z,2018-11-07T23:59:59Z


#### Now set the area of interest

##### There are three different options

    1) Bounding box 
    2) Polygon coordinate pairs
    3) Spatial file (kml)


In [91]:
# # Input bounding box:
# # Input lower left longitude in decimal degrees
# LL_lon = '-95'
# # Input lower left latitude in decimal degrees
# LL_lat = '55'
# # Input upper right longitude in decimal degrees
# UR_lon = '-70'
# # Input upper right latitude in decimal degrees
# UR_lat = '65'

# bounding_box = LL_lon + ',' + LL_lat + ',' + UR_lon + ',' + UR_lat
# # aoi value used for CMR params below
# aoi = '1'
# print(bounding_box)

Polygon

In [92]:
# Create a list of polygon coordinate pairs

#create list of x (longitude) values in decimal degrees
x = [130.0,180.0,180.0,130.0,130.0]
#create list of y (latitude) values in decimal degrees
y = [65.0,65.0,88.0,88.0,65.0]
xylist = list(zip(x, y))
# Polygon points need to be provided in counter-clockwise order. The last point should match the first point to close the polygon. 
# Input polygon coordinates as comma separated values in longitude latitude order, i.e. lon1, lat1, lon2, lat2, lon3, lat3, and so on.
polygon = ','.join(map(str, list(sum(xylist, ()))))
print(polygon)

# For the polygon method aoi=3
aoi = 3


# Get polygon bounds to be used as bounding box input
# Create shapely Polygon object from x y list
p = Polygon(tuple(xylist))
# Extract the point values that define the perimeter of the polygon
bounds = p.bounds
bbox = ','.join(map(str, list(bounds)))

130.0,65.0,180.0,65.0,180.0,88.0,130.0,88.0,130.0,65.0


In [93]:
print(bbox)

130.0,65.0,180.0,88.0


In [94]:
# %matplotlib inline

# # Load "Natural Earth” countries dataset, bundled with GeoPandas
# world = gpd.read_file(gpd.datasets.get_path('naturalearth_lowres'))

# # Overlay glacier outline
# f, ax = plt.subplots(1, figsize=(12, 6))
# world.plot(ax=ax, facecolor='lightgray', edgecolor='gray')
# polygon.plot(ax=ax, cmap='Set2')
# ax.set_ylim([0, 90])
# ax.set_xlim([-180,0]);

## Now that we have our time and area defined we will create a dictionary to be applied to our search query below based on the spatial and temporal inputs defined above

In [95]:
#Create CMR parameters used for granule search. Modify params depending on bounding_box or polygon input.

params = {
'short_name': short_name,
'version': latest_version,
'temporal': temporal,
'page_size': 100,
'page_num': 1,
'polygon': polygon,
}

print('CMR search parameters: ', params)

CMR search parameters:  {'short_name': 'ATL10', 'version': '001', 'temporal': '2018-11-01T00:00:00Z,2018-11-07T23:59:59Z', 'page_size': 100, 'page_num': 1, 'polygon': '130.0,65.0,180.0,65.0,180.0,88.0,130.0,88.0,130.0,65.0'}


#### Input the parameter dictionary to the CMR granula search to query all granules that meet the criteria based on the granule metadata. Print the number of granules returned

In [96]:
granule_search_url = 'https://cmr.earthdata.nasa.gov/search/granules'

granules = []
while True:
    response = requests.get(granule_search_url, params=params, headers=headers)
    results = json.loads(response.content)

    if len(results['feed']['entry']) == 0:
        # Out of results, so break out of loop
        break

    # Collect results and increment page_num
    granules.extend(results['feed']['entry'])
    params['page_num'] += 1

    
# Get number of granules over my area and time of interest
len(granules)

93

#### Now query the average size of those granules (MB)

In [97]:
granule_sizes = [float(granule['granule_size']) for granule in granules]

# Average size of granules in MB
mean(granule_sizes)

81.48081187791828

#### As well as the total volume

In [98]:
# Total volume in MB
sum(granule_sizes)

7577.715504646401

## Select the subsetting and reformatting sevrices enabled for your data set of interest

The NSIDC DAAC supports customization services on many of our NASA Earthdata mission collections. Reformatting and subsetting are available on all Level-2 and -3 ICESat-2 data sets. Let's discover the specific service options supported for this data set and select which of these services we want to request. 

We will start by querying the service capability to gather and select customization options.

In [99]:
# Query service capability URL 

from xml.etree import ElementTree as ET

capability_url = f'https://n5eil02u.ecs.nsidc.org/egi/capabilities/{short_name}.{latest_version}.xml'

print(capability_url)

https://n5eil02u.ecs.nsidc.org/egi/capabilities/ATL10.001.xml


All of NSIDC's service endpoints are behind NASA Earthdata Login. We need to create a session to store cookies and pass Earthdata Login credentials to capabilities url.

In [100]:
# Create session to store cookie and pass credentials to capabilities url

session = requests.session()
s = session.get(capability_url)
response = session.get(s.url,auth=(uid,pswd))

root = ET.fromstring(response.content)

From the service capability XML, we can collect lists with each service option to gather service information.

In [101]:
# collect lists with each service option

subagent = [subset_agent.attrib for subset_agent in root.iter('SubsetAgent')]

# variable subsetting
variables = [SubsetVariable.attrib for SubsetVariable in root.iter('SubsetVariable')]  
variables_raw = [variables[i]['value'] for i in range(len(variables))]
variables_join = [''.join(('/',v)) if v.startswith('/') == False else v for v in variables_raw] 
variable_vals = [v.replace(':', '/') for v in variables_join]

# reformatting
formats = [Format.attrib for Format in root.iter('Format')]
format_vals = [formats[i]['value'] for i in range(len(formats))]
format_vals.remove('')

# reprojection only applicable on ICESat-2 L3B products, yet to be available. 

# reformatting options that support reprojection
normalproj = [Projections.attrib for Projections in root.iter('Projections')]
normalproj_vals = []
normalproj_vals.append(normalproj[0]['normalProj'])
format_proj = normalproj_vals[0].split(',')
format_proj.remove('')
format_proj.append('No reformatting')

#reprojection options
projections = [Projection.attrib for Projection in root.iter('Projection')]
proj_vals = []
for i in range(len(projections)):
    if (projections[i]['value']) != 'NO_CHANGE' :
        proj_vals.append(projections[i]['value'])
        
# reformatting options that do not support reprojection
no_proj = [i for i in format_vals if i not in format_proj]

#### Let's confirm that subset services exist for our data set by reviewing the `subagent` list. If the list contains service information, we know that services are available. If not, we need to set the `agent` API parameter to `NO` to indicate that our request will bypass the subsetter. This will quickly send back the data "natively" without any customization applied.

In [102]:
print(subagent)
if len(subagent) < 1 :
    agent = 'NO'

[{'id': 'ICESAT2', 'spatialSubsetting': 'true', 'spatialSubsettingShapefile': 'true', 'temporalSubsetting': 'true', 'type': 'both', 'maxGransSyncRequest': '100', 'maxGransAsyncRequest': '2000'}]


In [103]:
# Temporal subsetting KVP

timevar = start_date + 'T' + start_time + ',' + end_date + 'T' + end_time
print(timevar)

2018-11-01T00:00:00,2018-11-07T23:59:59


## Next, let's determine if variable subsetting is available by finding the length of the variable_vals lst we gathered from the capabilities URL

In [104]:
len(variable_vals)

609

 We can view the entire list of variables if desired:

In [105]:
# pprint.pprint(variable_vals)

But it's a long list so it may be best to enter a list of variables to subset our data. (Use the ATL10-data-dictionary-v001.pdf for reference)

For this project we were only using 1 beam and for the first week of November. At that time ICESat-2 was flying in the backward orientation so "gtl1" is the strong beam.

* Note that the satellite flipped on December 28, 2018 and switched gt1r to the strong beam. the satellite flips every 6 months. See the ICESat-2 documentation at NSIDC.

In [106]:
# Define the variables wanted
coverage = '/ancillary_data/atlas_sdp_gps_epoch,\
/gt1l/freeboard_beam_segment/beam_fb_height,\
/gt1l/freeboard_beam_segment/beam_fb_length,\
/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_confidence,\
/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_height,\
/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_quality_flag,\
/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_sigma,\
/gt1l/freeboard_beam_segment/beam_freeboard/delta_time,\
/gt1l/freeboard_beam_segment/beam_freeboard/height_segment_id,\
/gt1l/freeboard_beam_segment/beam_freeboard/longitude,\
/gt1l/freeboard_beam_segment/beam_freeboard/latitude,\
/gt1l/freeboard_beam_segment/beam_freeboard/seg_dist_x,\
/orbit_info/cycle_number,\
/orbit_info/rgt,\
/orbit_info/orbit_number'


# Reference for the variables downloaded:
-atlas_sdp_gps_epoch: The number of GPS Seconds between the GPS epoch (1980-01-06T00:00:00 UTC) and the ATLAS Standard Data Product (SDP) epoch (2018-01-01T00:00:00 UTS). Add this value to the delta time parameters to compute full gps_seconds (relative to the GPS epoch) for each data points. <br>

/beam/freeboard_beam_segment <br>
-beam_fb_height: Mean of the Freeboard heightsegments
in freeboard beamsegment <br>
-beam_fb_length: Length of freeboard beamsegment <br>

/beam/freeboard_beam_segment/beam_freeboard <br>
-beam_fb_confidence: Confidence level in the freeboard estimate <br>
-beam_fb_height: Estimate of the freeboard height based on the beam h reference <br>
-beam_fb_quailty_flag: Flag describing the quality of the results of the alongtrack fit. (1= height value is invalid; 1=ngrid_w < wlength/2; 2=ngrid_w >= wlength/2; 3=ngrid_dt < dtlength/2; 4=ngrid_dt >= dtlength/2; 5=ngrid_dt >= (dtlength2): where 1 is best and 5 is poor). <br>
flag_values: -1, 1, 2, 3, 4, 5 <br>
flag_meanings : invalid bes t high med low poor<br>
-beam_fb_sigma: estimate of the sigma ( standard deviation) for each beam freeboard height <br>
-delta_time: Number of GPS seconds since the ATLAS SDP epoch. The
ATLAS Standard Data Products (SDP) epoch offset is defined within /ancillary_data/atlas_sdp_gps_epoch as the number of GPS seconds between the GPS epoch (19800106T00:00:00.000000Z UTC) and the ATLAS SDP epoch. By adding the offset contained within atlas_sdp_gps_epoch to delta time parameters, the time in gps_seconds relative to the GPS epoch can be computed. <br>
-height_segment_id: Identifier of each height segment <br>
-longitude: Longitude, WGS84, East=+,Lon of segment center <br>
-latitude: Latitude, WGS84, North=+, Lat of segment center <br>
-seg_dist_x: Alongtrack distance from the equator crossing to the
segment center. <br>
-cycle_number: A count of the number of exact repeats of this reference orbit. <br>

orbit info <br>
-rgt: The reference ground track (RGT) is the track on the earth
at which a specified unit vector within the observatory is
pointed. Under nominal operating conditions, there will be
no data collected along the RGT, as the RGT is spanned by
GT3 and GT4. During slews or offpointing,it is possible
that ground tracks may intersect the RGT. The ICESat2
mission has 1387 RGTs.<br>
-orbit_number: Unique identifying number for each planned ICESat2
orbit.
        

# Request the data from the NSIDC data access API

#### As described above, the API is structured as a URL with a base plus individual key-value-pairs (KVPs) separated by ‘&’. The base URL of the NSIDC API is:
`https://n5eil02u.ecs.nsidc.org/egi/request`

In [107]:
#Set NSIDC data access base URL
base_url = 'https://n5eil02u.ecs.nsidc.org/egi/request'

#### Let's go over the configuration parameters:

* `request_mode`
* `page_size`
* `page_num`

`request_mode` is "synchronous" by default, meaning that the request relies on a direct, continous connection between you and the API endpoint. Outputs are directly downloaded, or "streamed" to your working directory. For this tutorial, we will set the request mode to asynchronous ('async'), which will allow concurrent requests to be queued and processed without the need for a continuous connection.

**Use the streaming `request_mode` with caution: While it can be beneficial to stream outputs directly to your local directory, note that timeout errors can result depending on the size of the request, and your request will not be queued in the system if NSIDC is experiencing high request volume. For best performance, I recommend setting `page_size=1` to download individual outputs, which will eliminate extra time needed to zip outputs and will ensure faster processing times per request. An example streaming request loop is available at the bottom of the tutorial below. **

Recall that we queried the total number and volume of granules prior to applying customization services. `page_size` and `page_num` can be used to adjust the number of granules per request up to a limit of 2000 granules for asynchronous, and 100 granules for synchronous (streaming). For now, let's select 10 granules to be processed in each zipped request. For ATL06, the granule size can exceed 100 MB so we want to choose a granule count that provides us with a reasonable zipped download size. 

In [108]:
# Set number of granules requested per order, which we will initially set to 10.
page_size = 10

#Determine number of pages basd on page_size and total granules. Loop requests by this value
page_num = math.ceil(len(granules)/page_size)

#Set request mode. 
request_mode = 'async'

# Determine how many individual orders we will request based on the number of granules requested

print(page_num)

10


#### If we were to create an API request based on our request parameters and submit into a web browser for example, here's what we end up with:

In [109]:
#Print API base URL + request parameters
API_request = f'{base_url}?short_name={short_name}&version={latest_version}&temporal={temporal}&time={timevar}&polygon={polygon}&Coverage={coverage}&request_mode={request_mode}&page_size={page_size}&page_num={page_num}&token={token}&email={email}'
print(API_request)

https://n5eil02u.ecs.nsidc.org/egi/request?short_name=ATL10&version=001&temporal=2018-11-01T00:00:00Z,2018-11-07T23:59:59Z&time=2018-11-01T00:00:00,2018-11-07T23:59:59&polygon=130.0,65.0,180.0,65.0,180.0,88.0,130.0,88.0,130.0,65.0&Coverage=/ancillary_data/atlas_sdp_gps_epoch,/gt1l/freeboard_beam_segment/beam_fb_height,/gt1l/freeboard_beam_segment/beam_fb_length,/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_confidence,/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_height,/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_quality_flag,/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_sigma,/gt1l/freeboard_beam_segment/beam_freeboard/delta_time,/gt1l/freeboard_beam_segment/beam_freeboard/height_segment_id,/gt1l/freeboard_beam_segment/beam_freeboard/longitude,/gt1l/freeboard_beam_segment/beam_freeboard/latitude,/gt1l/freeboard_beam_segment/beam_freeboard/seg_dist_x,/orbit_info/cycle_number,/orbit_info/rgt,/orbit_info/orbit_number&request_mode=async&page_size=10&page_num=10&to

#### Define the parameters to use when searching for data on the NSIDC site. Each of these parameters has been defined above. 

In [110]:
subset_params = {
    'short_name': short_name, 
    'version': latest_version, 
    'temporal': temporal, 
    'time': timevar, 
    'polygon': polygon, 
    'Coverage': coverage, 
    'request_mode': request_mode, 
    'page_size': page_size,  
    'token': token, 
    'email': email,
    'bbox': bbox
    }
print(subset_params)

{'short_name': 'ATL10', 'version': '001', 'temporal': '2018-11-01T00:00:00Z,2018-11-07T23:59:59Z', 'time': '2018-11-01T00:00:00,2018-11-07T23:59:59', 'polygon': '130.0,65.0,180.0,65.0,180.0,88.0,130.0,88.0,130.0,65.0', 'Coverage': '/ancillary_data/atlas_sdp_gps_epoch,/gt1l/freeboard_beam_segment/beam_fb_height,/gt1l/freeboard_beam_segment/beam_fb_length,/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_confidence,/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_height,/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_quality_flag,/gt1l/freeboard_beam_segment/beam_freeboard/beam_fb_sigma,/gt1l/freeboard_beam_segment/beam_freeboard/delta_time,/gt1l/freeboard_beam_segment/beam_freeboard/height_segment_id,/gt1l/freeboard_beam_segment/beam_freeboard/longitude,/gt1l/freeboard_beam_segment/beam_freeboard/latitude,/gt1l/freeboard_beam_segment/beam_freeboard/seg_dist_x,/orbit_info/cycle_number,/orbit_info/rgt,/orbit_info/orbit_number', 'request_mode': 'async', 'page_size': 10, 'token': '

# Request Data

#### Finally, we'll download the data directly to this notebook directory in a new Outputs folder. The progress of each order will be reported.

We'll start by creating an output folder if the folder does not already exist.

In [111]:
path = str(os.getcwd() + '/Outputs')
if not os.path.exists(path):
    os.mkdir(path)

Run our request loop with subsetting services applied using the polygon


In [112]:
# Request data service for each page number, and unzip outputs

for i in range(page_num):
    page_val = i + 1
    print('Order: ', page_val)
    subset_params.update( {'page_num': page_val} )
    
# # Post polygon to API endpoint for polygon subsetting to subset based on original, non-simplified KML file

#     shape_post = {'shapefile': open(kml_filepath, 'rb')}
#     request = session.post(base_url, params=subset_params, files=shape_post) 
    
# FOR ALL OTHER REQUESTS THAT DO NOT UTILIZED AN UPLOADED POLYGON FILE, USE A GET REQUEST INSTEAD OF POST:
    request = session.get(base_url, params=subset_params)
    
    print('Request HTTP response: ', request.status_code)

# Raise bad request: Loop will stop for bad response code.
    request.raise_for_status()
    print('Order request URL: ', request.url)
    esir_root = ET.fromstring(request.content)
    print('Order request response XML content: ', request.content)

# Look up order ID
    orderlist = []   
    for order in esir_root.findall("./order/"):
        orderlist.append(order.text)
    orderID = orderlist[0]
    print('order ID: ', orderID)

# Create status URL
    statusURL = base_url + '/' + orderID
    print('status URL: ', statusURL)

# Find order status
    request_response = session.get(statusURL)    
    print('HTTP response from order response URL: ', request_response.status_code)
    
# Raise bad request: Loop will stop for bad response code.
    request_response.raise_for_status()
    request_root = ET.fromstring(request_response.content)
    statuslist = []
    for status in request_root.findall("./requestStatus/"):
        statuslist.append(status.text)
    status = statuslist[0]
    print('Data request ', page_val, ' is submitting...')
    print('Initial request status is ', status)

# Continue to loop while request is still processing
    while status == 'pending' or status == 'processing': 
        print('Status is not complete. Trying again.')
        time.sleep(10)
        loop_response = session.get(statusURL)

# Raise bad request: Loop will stop for bad response code.
        loop_response.raise_for_status()
        loop_root = ET.fromstring(loop_response.content)

# Find status
        statuslist = []
        for status in loop_root.findall("./requestStatus/"):
            statuslist.append(status.text)
        status = statuslist[0]
        print('Retry request status is: ', status)
        if status == 'pending' or status == 'processing':
            continue

# Order can either complete, complete_with_errors, or fail:
# Provide complete_with_errors error message:
    if status == 'complete_with_errors' or status == 'failed':
        messagelist = []
        for message in loop_root.findall("./processInfo/"):
            messagelist.append(message.text)
        print('error messages:')
        pprint.pprint(messagelist)

# Download zipped order if status is complete or complete_with_errors
    if status == 'complete' or status == 'complete_with_errors':
        downloadURL = 'https://n5eil02u.ecs.nsidc.org/esir/' + orderID + '.zip'
        print('Zip download URL: ', downloadURL)
        print('Beginning download of zipped output...')
        zip_response = session.get(downloadURL)
        # Raise bad request: Loop will stop for bad response code.
        zip_response.raise_for_status()
        with zipfile.ZipFile(io.BytesIO(zip_response.content)) as z:
            z.extractall(path)
        print('Data request', page_val, 'is complete.')
    else: print('Request failed.')


Order:  1
Request HTTP response:  201
Order request URL:  https://n5eil02u.ecs.nsidc.org/egi/request?short_name=ATL10&version=001&temporal=2018-11-01T00%3A00%3A00Z%2C2018-11-07T23%3A59%3A59Z&time=2018-11-01T00%3A00%3A00%2C2018-11-07T23%3A59%3A59&polygon=130.0%2C65.0%2C180.0%2C65.0%2C180.0%2C88.0%2C130.0%2C88.0%2C130.0%2C65.0&Coverage=%2Fancillary_data%2Fatlas_sdp_gps_epoch%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_fb_height%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_fb_length%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_freeboard%2Fbeam_fb_confidence%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_freeboard%2Fbeam_fb_height%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_freeboard%2Fbeam_fb_quality_flag%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_freeboard%2Fbeam_fb_sigma%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_freeboard%2Fdelta_time%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_freeboard%2Fheight_segment_id%2C%2Fgt1l%2Ffreeboard_beam_segment%2Fbeam_freeboard%2Flongitude%2C%2Fgt1l%2Ffreeboard_beam_segment

# Clean up the folders

Extracts the individual files out of the folders and dumps them into the outputs folder

In [114]:
#Clean up Outputs folder by removing individual granule folders 

for root, dirs, files in os.walk(path, topdown=False):
    for file in files:
        try:
            shutil.move(os.path.join(root, file), path)
        except OSError:
            pass
        
for root, dirs, files in os.walk(path):
    for name in dirs:
        os.rmdir(os.path.join(root, name))

List the folders and give the length

In [115]:
sorted(os.listdir(path))

data = sorted(os.listdir(path))
len(data)

54

# Now to share the data via s3

In the terminal type the following to zip the data together: <br>
tar -czvf Outputs.tar.gz Outputs

And upload the data to the s3 drive. <br>
aws s3 cp Outputs.tar.gz s3://pangeo-data-upload-oregon/icesa
t2/segtrax/

Then the team members can access the data via the following commands: <br>
aws s3 cp s3://pangeo-data-upload-oregon/icesat2/segtrax/Outputs.tar.gz . <br>
tar -xzvf Outputs.tar.gz