The notebook `0-Download-USGS-DSWE.ipynb` can take a significant amount of time to execute, since we are searching over a large area for overlapping ALOS and DSWE data that meet our search criteria. To sidestep this issue, we provide a list of ALOS granule names along with the corresponding DSWE tile names that have been found to meet our requirements. This notebook demonstrates how to query and download these datasets from the relevant DAACs.

In [None]:
# To query the ASF DAAC
import asf_search as asf

# To query USGS
from pystac_client import Client
import pystac
import requests
from bs4 import BeautifulSoup
import netrc

# Misc imports
import zipfile
from pathlib import Path 
import pandas as pd
from typing import Union, Iterable
import os

In [None]:
# Download path for SAR scenes
sar_download_path = Path('../data/scenes')

In [None]:
# Read pre-compiled list of ALOS scene names and corresponding DSWE tiles
df = pd.read_csv('../data/alos_dswe_database.csv')

In [None]:
df.head()

In [None]:
# Setting up helper methods to download DSWE data
def download_asset(item:str, download_path:str='.'):
    '''
    Given the URL to a USGS DSWE asset, download it and return the download path
    '''
    download_path = Path(download_path)
    download_path.mkdir(exist_ok = True)
    
    creds = netrc.netrc()
    user,account,password = creds.authenticators('ers.cr.usgs.gov')

    url = 'https://ers.cr.usgs.gov/login'
    with requests.Session() as s:
        
        r = s.get(url)
        soup = BeautifulSoup(r.content, 'html.parser') 
        sval = soup.find('input', attrs={'name':'csrf'})['value']

        data = {"username": user, 
            "password": password,
            "csrf": sval}

        bf = s.post(url, data = data)

        filename = item.split('/')[-1]
        
        # sometimes we need to try a few times before the file will download
        respb, count = None, 0
        while respb != 200 and count < 10:
            respb = s.get(item,
                        allow_redirects=True,
                        headers = {'content-type': 'image/tiff'})

            with open(Path(download_path) / filename, 'wb') as src:
                src.write(respb.content)

            count += 1

    return (download_path) / filename

# download DSWE INWAM data given a list of granule ids
def download_dswe_asset(filename:str):
    '''
    Given a USGS DSWE granule id, download it and return the download path
    '''
    usgs_stac_url = 'https://landsatlook.usgs.gov/stac-server'
    catalog = Client.open(usgs_stac_url)

    opts = {
        'ids':[filename]
    }

    search = catalog.search(**opts)
    items = search.item_collection()
    items = [r.assets['inwam'].href for r in items]

    assert len(items) == 1, "More than one URL returned for given granule id"

    # download files
    items = download_asset(items[0])

    return items

In [None]:
def return_granule_ids(filename):
    return '_'.join(filename.split('_')[:-1]) + '_DSWE'

In [None]:
for _, row in df.iterrows():
    
    bits = row.ALOS_scene.split('_')
    granule_name = f"ALPSRP{bits[1]}{bits[3][1:]}"
    results = asf.granule_search(granule_name)
    
    for d_ in results.geojson()['features']:
        for i, d_ in enumerate(results.geojson()['features']):
            if 'RTC_HI_RES' in d_['properties']['fileID']:
                break

    filename = d_['properties']['fileName']
    results[i].download(sar_download_path)

    _downloaded_file = sar_download_path/filename
    assert _downloaded_file.exists(), 'Error, file does not exist'

    with zipfile.ZipFile(_downloaded_file) as f:
        f.extractall(sar_download_path)

    # delete zip file after extracting contents
    _downloaded_file.unlink()

    # create subfolder for dswe data and move downloaded DSWE data to it
    alos_path = (_downloaded_file).with_suffix('')
    _usgs_folder_path = (alos_path/'usgs_dswe')
    _usgs_folder_path.mkdir(exist_ok=True)

    usgs_ids = list(map(return_granule_ids, row.DSWE_tiles.split(" ")))
    usgs_filenames = list(map(download_dswe_asset, usgs_ids))  

    _ = [os.rename(str(x), _usgs_folder_path/x.name) for x in usgs_filenames]
