# Download required data directly from  NSIDC

## Set up local / personal environment

In [None]:
# Import dependancies

from earthdata import Auth, DataCollections, DataGranules, Store

In [None]:
# Set working directories (local)

data_loc = "/home/jovyan/data/"

In [None]:
# Authenticate Earthdata credentials

auth = Auth().login(strategy='netrc')
if auth.authenticated is False:
    auth = Auth().login(strategy='interactive')

# Import MEaSUREs BedMachine Data from NSIDC (Step 3)

In [None]:
# Identify data collections available based on keyword (here, use "bed topography" to identify collections with bed topography (e.g., BedMachine)
Query = DataCollections().keyword('bed topography').provider("NSIDC_ECS")

print(f'Collections found: {Query.hits()}')

In [None]:
# Review the shortName and Abstract for all collections in order to identify the best dataset to use.
collections = Query.fields(['ShortName','Abstract']).get()
print(collections)

In [None]:
# For MEaSUREs BedMachine data, concept-id: C1664160168-NSIDC_ECS
bedmachine = 'C1664160168-NSIDC_ECS'

# TODO: Define a bounding box from a IMBIE / MEaSUREs boundary
# Query = DataGranules().concept_id(bedmachine).bounding_box(-134.7,58.9,-133.9,59.2)

# For testing, download without a bounding box
Query = DataGranules().concept_id(bedmachine)

print(f'Granules found: {Query.hits()}')

In [None]:
# Get the relevant granules for the collectin
granules = Query.get()
print(granules)

In [None]:
# Download the data! 

# How long does this take?
#%%time

# Access the data from on-prem at NSIDC. Store this at "data_loc" locally.
access = Store(auth)
files = access.get(granules, local_path = data_loc)

# Import MEaSUREs Boundaries from NSIDC (Step 1)

In [None]:
# Identify data collections available based on keyword (here, use "bed topography" to identify collections with bed topography (e.g., BedMachine)
Query = DataCollections().keyword('boundaries').provider("NSIDC_ECS")

print(f'Collections found: {Query.hits()}')

In [None]:
# Review the shortName and Abstract for all collections in order to identify the best dataset to use.
collections = Query.fields(['ShortName','Abstract']).get()
print(collections)

In [None]:
# For MEaSUREs boundaries, concept-id: C1454773262-NSIDC_ECS
boundaries = 'C1454773262-NSIDC_ECS'

# TODO: Define a bounding box from a IMBIE / MEaSUREs boundary
# Query = DataGranules().concept_id(bedmachine).bounding_box(-134.7,58.9,-133.9,59.2)

# For testing, download without a bounding box
Query = DataGranules().concept_id(boundaries)

print(f'Granules found: {Query.hits()}')

In [None]:
# Get the relevant granules for the collectin
granules = Query.get()
print(granules)

# NOTE: The following datasets are contained here:
# - "Mask_Antarctica_v02" is the 1st element
# - "Basins_IMBIE_Antarctica_v02" is the 2nd element
# - "IceBoundaries_Antarctica_v02" is the 3rd element
# - "GroundingLine_Antarctica_v02" is the 4th element
# - "Basins_Antarctica_v02" is the 5th element
# - "IceShelf_Antarctica_v02" is the 6th element
# - "Coastline_Antarctica_v02" is the 7th element

In [None]:
## NOTE - Thi Cell does not currently behave as expected. This returns the first file in each granule.
## Work-around provided in following cell, using wget to download each file from data_links()

# TODO: How do I download only one granuale from the list?
# TODO: Confirm how to download multiple files within a given granule. The below code only returns the first element/file from each granule. It does not return all files/elements associated with the shapefile.

# Access the data from on-prem at NSIDC. Store this at "data_loc" locally.
# access = Store(auth)
# files = access.get(granules, local_path = data_loc+"boundaries/")

In [None]:
# Create a list of links to individual data files from each granule
data_links = [granule.data_links() for granule in granules]

# Loop over the list of data links for each granule - for each file, wget the file.
# TODO: How to feed "data_loc" (or other) directly into the wget command rather than hardcoding this directory here?
for i in data_links:
    tmp = i
    for j in tmp:
        #print(j)
        ! wget -nc {j} -P /home/jovyan/data/boundaries

# Subset the Antarctica Ice Boundaries for a given region (Step 2)