# Get HLS and DSWE

Get some related data

In [None]:
import geopandas as gpd
import os
import rasterio
import rasterio.mask
from pathlib import Path
from rasterio.plot import show
from rasterio.crs import CRS
import matplotlib.pyplot as plt
import pandas as pd
from pystac_client import Client  
from osgeo import gdal
from tqdm import tqdm
import numpy as np
from shapely.geometry import shape

# Parameters

Specify *exactly* one. The `site_name` or the `planet_id`. The former is given to the chip by the validation team. Because we are not selecting multiple planet scenes per chip and not selecting planet images that cover multiple chips (they are sufficiently spaced apart), this should be a 1 to 1 mapping.

In [None]:
PLANET_ID = ''
SITE_NAME = '3_28'

# ^ is exclusive or; makes sure you only specified one
assert((len(PLANET_ID) == 0) ^ (len(SITE_NAME) == 0))

In [None]:
download_dir = Path('planet_images')

# Environment

In [None]:
os.environ["AWS_NO_SIGN_REQUEST"] = "YES"

# Get Image Database

In [None]:
df_images = gpd.read_file('s3://opera-calval-database-dswx/image.geojson')
df_images.dropna(inplace=True)
df_images.head()

In [None]:
temp = df_images[['image_name', 'site_name']]
df_site2image = temp.set_index('site_name')
df_image2site = temp.set_index('image_name')
df_site2image.head()

In [None]:
df_site = gpd.read_file('s3://opera-calval-database-dswx/site.geojson')
df_site.dropna(inplace=True)
df_site.head()

In [None]:
cols_to_merge = [col for col in df_images.columns if col != 'geometry']
df_temp = df_images[cols_to_merge]
df_chips = pd.merge(df_site, df_temp , on='site_name', how='left')
df_chips.head()

In [None]:
if not PLANET_ID:
    values = PLANET_ID = df_site2image.loc[SITE_NAME].tolist()
    PLANET_ID = values[0]
    print(f'There was {len(values)} planet images for this chip')
else:
    values = df_image2site.loc[PLANET_ID].tolist()
    SITE_NAME = values[0]
    print(f'There were {len(values)} chips for this planet_image')

(SITE_NAME, PLANET_ID)

In [None]:
HLS_ID = df_images[df_images['image_name'] == PLANET_ID]['collocated_dswx'].values[0]
HLS_ID

# Get HLS

You will need to have logged into Earthdata Login credentials in your `.netrc`.

In [None]:
STAC_URL = 'https://cmr.earthdata.nasa.gov/stac'
api = Client.open(f'{STAC_URL}/LPCLOUD/')
hls_collections = ['HLSL30.v2.0', 'HLSS30.v2.0']

search_params = {"collections": hls_collections,
                 "ids": HLS_ID}
search_hls = api.search(**search_params)
search_hls

In [None]:
search_hls.matched()

In [None]:
hls_collection = list(search_hls.get_all_items())
metadata = hls_collection[0].to_dict()
metadata

# HLS Imagery

First, make sure you have logged into [Earthdata Login](https://urs.earthdata.nasa.gov/) (it will store cookies in ~/cookies.txt). Below, I copied code from this [link](https://lpdaac.usgs.gov/resources/e-learning/getting-started-cloud-native-hls-data-python/) in section 4. It seems to work.

In [None]:
# GDAL configs used to successfully access LP DAAC Cloud Assets via vsicurl 
gdal.SetConfigOption("GDAL_HTTP_UNSAFESSL", "YES")
gdal.SetConfigOption('GDAL_HTTP_COOKIEFILE','~/cookies.txt')
gdal.SetConfigOption('GDAL_HTTP_COOKIEJAR', '~/cookies.txt')
gdal.SetConfigOption('GDAL_DISABLE_READDIR_ON_OPEN','YES')
gdal.SetConfigOption('GDAL_DISABLE_READDIR_ON_OPEN','FALSE')
gdal.SetConfigOption('CPL_VSIL_CURL_ALLOWED_EXTENSIONS','TIF')

In [None]:
urls = [metadata['assets'][band]['href'] for band in ['B02', 'B03', 'B04']]
urls

In [None]:
def read_one(url):
    with rasterio.open(url) as ds:
        p = ds.profile
        X = ds.read(1)
    return X, p

images, profiles = zip(*[read_one(url) for url in tqdm(urls)])
plt.imshow(images[0])

# Save

In [None]:
data_dir = Path(f'data/{PLANET_ID}/')
data_dir.mkdir(exist_ok=True, parents=True)

In [None]:
rgb = np.stack(images, axis=2)
plt.imshow(rgb)

In [None]:
p = profiles[0].copy()
p['count'] = 3

with rasterio.open(data_dir/f'rgb_{HLS_ID}.tif', 'w', **p) as ds:
    ds.write(rgb.transpose([2, 0, 1]))

# Visualize Data Boudnaries

In [None]:
hls_boundary = shape(metadata['geometry'])
hls_boundary

In [None]:
planet_image = df_images[df_images.image_name == PLANET_ID].geometry.values[0]
planet_image

In [None]:
validation_area = df_chips[df_chips.image_name == PLANET_ID].geometry.values[0]
validation_area

In [None]:
df_all = gpd.GeoDataFrame({'extent_label': ['hls', 'planet', 'validation']},
                          geometry=[hls_boundary, planet_image, validation_area],
                          crs=df_chips.crs)

In [None]:
fig, ax = plt.subplots(figsize=(10, 10))
df_all.plot(column='extent_label', legend=True, ax=ax, alpha=.5)