This notebook downloads coincident ALOS SAR images USGS DSWE data for this project. The ALOS images are limited to high-resolution, RTC corrected, Fine Beam Dual pol (FBD) data that overlap with provided AOIs. The USGS DSWE data is limited to the INterpreted layer With All Masking applied (INWAM) product, containing and obtained within +/- 7 days of a given SAR acquisition, containing at least 70% not-cloud data, and at least 5% water surfaces.

Pseudocode for this notebook: 

**Iterate over AOIs**
> Search ASF vertex for ALOS PALSAR data, filtering for RTC corrected FBD data <br>
>>**Iterate over returned ASF results**<br>
>>>For a given ALOS PALSAR scene, find overlapping DSWE results within +/- 7 days of the SAR acquisition and having less than 30% cloud cover<br>

>>>If no DSWE data meets this criteria, move to the next ASF result for the same AOI<br>

>>>If overlapping DSWE data is available, verify that the raster extent defined by the SAR scene contains at least 50% non-cloud data, and at least 5% water<br> 


In [1]:
# ASF and stac API libraries
import asf_search as asf
from pystac_client import Client
import pystac

# gis libraries
import geopandas as gpd
from shapely.geometry import shape, box
import rasterio
from rasterio.merge import merge
from rasterio.warp import transform_bounds
from rasterio.crs import CRS

# math imports
import numpy as np

# misc libraries
import datetime
from pathlib import Path
from collections import defaultdict
from dateutil.tz import tzutc
import netrc
import requests
from bs4 import BeautifulSoup
from typing import Union, Iterable
import os
import zipfile

In [2]:
# set up folder structure
sar_output_path = Path('../data/scenes')
sar_output_path.mkdir(exist_ok=True)

In [3]:
# Load file containing training AOIs
aoi_file = Path('../data/training_scenes.geojson')
assert aoi_file.exists(), "Missing AOI file"
training_aois = gpd.read_file(aoi_file)

In [12]:
for idx, aoi in training_aois.iterrows():
    print(aoi.geometry.wkt)
    break

POLYGON ((-120.29750955041628 38.93464225417948, -119.76223334928117 40.14031434021133, -119.3465102351037 39.93081607794866, -119.87025589076036 38.7294744802862, -120.29750955041628 38.93464225417948))


In [None]:
def return_nearest_dswe_search(result):
    '''
    For an ALOS acquisition (returned from an ASF search), return a list of USGS DSWE (INWAM) of scene names that 
    overlap the acquisition and meet the search criteria (less than 30% cloud cover, within +/- 7 days of acquisition.
    Collate results by acquisition date and return list sorted by increasing timedelta from SAR acquisition date. 
    Return an empty list if no DSWE results meet the search criteria
    '''
    geometry, startTime = shape(result['geometry']), result['properties']['startTime']

    year, month, day = (int(x) for x in startTime.split('T')[0].split('-'))
    ref_date = datetime.datetime(year=year, month=month, day=day, tzinfo=tzutc())

    start_day = ref_date - datetime.timedelta(days=7)
    end_day = ref_date + datetime.timedelta(days=7)

    search_date_str = f"{start_day.strftime('%Y-%m-%d')}/{end_day.strftime('%Y-%m-%d')}"
    print(f"Acquisition date: {ref_date}, search range: {search_date_str}")
    usgs_stac_url = 'https://landsatlook.usgs.gov/stac-server'
    catalog = Client.open(usgs_stac_url)

    opts = {
    'intersects' : geometry,
    'collections':'landsat-c2l3-dswe',
    'datetime' : search_date_str,
    'max_items' : 100,
    'query':{
        'eo:cloud_cover':{'lt': 30},
            }
    }

    search = catalog.search(**opts)
    items = search.get_all_items()
    
    # group the results together by acquisition date
    # A single ALOS acquisition may correspond to multiple DSWE files
    def collate_results(results):
        collated_dict = defaultdict(list)
        for r in results:
            if r.assets['inwam'].href not in collated_dict[r.datetime]:
                collated_dict[r.datetime].append(r.assets['inwam'].href)

        return collated_dict
    
    # Sort by timedelta and return the nearest result (before or after reference date)
    items = collate_results(items)

    if len(items) > 0:
        sorted_keys = sorted(items.keys(), key=lambda x:abs((x-ref_date).days))   
        return items[sorted_keys[0]]
    else:
        return []

In [None]:
# function to download a USGS asset
def download_asset(item:Union[str, Iterable[str]], download_path:str='.'):

    if type(item) is not list : item = [item]

    download_path = Path(download_path)
    download_path.mkdir(exist_ok = True)
    
    creds = netrc.netrc()
    user,account,password = creds.authenticators('ers.cr.usgs.gov')

    url = 'https://ers.cr.usgs.gov/login'
    with requests.Session() as s:
        
        r = s.get(url)
        soup = BeautifulSoup(r.content, 'html.parser') 
        sval = soup.find('input', attrs={'name':'csrf'})['value']

        data = {"username": user, 
            "password": password,
            "csrf": sval}

        bf = s.post(url, data = data)

    downloaded_filepaths = []
    for _item in item:
        filename = _item.split('/')[-1]

        respb = s.get(_item,
                    allow_redirects=True,
                    headers = {'content-type': 'image/tiff'})

        with open(Path(download_path) / filename, 'wb') as src:
            src.write(respb.content)

        downloaded_filepaths.append(Path(download_path) / filename)
    
    return downloaded_filepaths

# Function that returns % of valid pixels in a raster
def return_pixel_stats(filepaths, bounds, cloud_val=9):
    
    crs = CRS.from_epsg(4326)
    
    with rasterio.open(filepaths[0]) as ds:
        nodata = ds.profile['nodata']
        dst_crs = ds.crs

    bounds = transform_bounds(crs, dst_crs, *bounds)
    merged_raster, _ = merge(filepaths, bounds=bounds, nodata=nodata)

    valid_fraction = 1 - (np.sum(merged_raster == nodata) + np.sum(merged_raster == cloud_val))/merged_raster.size
    water_fraction = (np.sum(merged_raster == 1) + np.sum(merged_raster == 2))/merged_raster.size

    return valid_fraction, water_fraction

In [None]:
# Track ALOS scenes that have already been downloaded 
already_downloaded = [x.name for x in sar_output_path.iterdir() if x.is_dir()]

# search parameters for RTC corrected data
alos_opts = {'platform':asf.PLATFORM.ALOS, 'processingLevel':asf.PRODUCT_TYPE.RTC_HIGH_RES,'polarization':asf.POLARIZATION.HH_HV}
downloaded_alos_paths = []

for _, aoi in training_aois.iterrows():
    print(f"Finding overlapping DSWE data for {aoi['name']}")
    results = asf.geo_search(intersectsWith=aoi.geometry.wkt, **alos_opts)

    for r in results:
        valid_data_percentage, water_data_percentage = 0, 0
        if r.geojson()['properties']['fileName'][:-4] in already_downloaded:
            continue
        
        dswe_results = return_nearest_dswe_search(r.geojson())
        if len(dswe_results) == 0:
            continue
        else:
            # Download ASF file and unzip, download DSWE data
            filepaths = download_asset(dswe_results)
            valid_data_percentage, water_data_percentage = return_pixel_stats(filepaths, shape(r.geojson()['geometry']).bounds) 
            if (valid_data_percentage >= 0.5) and (water_data_percentage >= 0.05):
                break
            else:
                _ = [x.unlink() for x in filepaths]
                continue
    
    # if there was a valid ALOS and DSWE combination, valid_data_percentage should be >= 0.5
    if (valid_data_percentage >= 0.5) and (water_data_percentage >= 0.05):
        # Download ALOS acquisition zip file and extract
        r.download(sar_output_path)
        _downloaded_file = sar_output_path/r.geojson()['properties']['fileName']
        assert _downloaded_file.exists(), 'Error, file does not exist'
        with zipfile.ZipFile(_downloaded_file) as f:
            f.extractall(sar_output_path)
        
        # delete zip file
        _downloaded_file.unlink()
        
        # create subfolder for dswe data and move downloaded DSWE data to it
        alos_path = (_downloaded_file).with_suffix('')
        _usgs_folder_path = (alos_path/'usgs_dswe')
        _usgs_folder_path.mkdir()
        _ = [os.rename(str(x), _usgs_folder_path/x.name) for x in filepaths]

        downloaded_alos_paths.append(alos_path.name)
    else:
        print(f"Valid ALOS/DSWE pair unavailable for {aoi.geometry.wkt}")

In [None]:
# print names of created folders
print(downloaded_alos_paths)