#### Download Planet data, crop it, and also download associated hand drawn classifications. Also download co-incident OPERA DSWx data

This notebook expects a co-located '.env' file containing a planet API key in the format 
> PLANET_API_KEY='[key]'

In [1]:
# gis imports
import geopandas as gpd
import rasterio
import rasterio.mask
from rasterio.plot import show
from rasterio.warp import transform_bounds

# planet api imports
from planet import api
from planet.api import downloader
from planet.api.downloader import create

# misc imports
import os
from pathlib import Path
from dotenv import dotenv_values
from tools import addImageCalc
from pathlib import Path

# data science imports
import matplotlib.pyplot as plt
import pandas as pd

# aws imports
import boto3
from botocore.handlers import disable_signing

# pySTAC imports
from pystac_client import Client

os.environ["AWS_NO_SIGN_REQUEST"] = "YES"

  from planet import api


In [2]:
# Chip IDs that we will test in this notebook
# these should be chip_ids for which hand-classifications were made

df = pd.read_csv('../data/validation_table.csv')
chip_ids = df.site_name.unique()

print(chip_ids)

['4_21' '4_11' '1_31' '3_28' '1_37' '1_34' '3_30' '4_8' '1_43' '2_28'
 '4_9' '1_9' '1_38' '1_41' '4_5' '4_22' '4_14' '2_26' '4_42' '4_37' '4_6'
 '3_1' '3_15' '1_32' '2_1' '3_43' '2_15' '2_7' '1_47' '4_26' '3_40' '2_13'
 '1_19' '2_9' '3_8' '4_1' '3_10' '1_18' '2_4' '4_28' '1_15' '3_3' '3_4'
 '3_5' '4_7' '2_38' '1_1' '3_12' '3_32' '1_5' '2_29' '2_8']


In [3]:
# Planet data downloader client
PLANET_API_KEY = dotenv_values()['PLANET_API_KEY']

# setup AWS boto3 client
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
s3_client.meta.events.register('choose-signer.s3.*', disable_signing)
s3.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)

In [None]:
def download_planet_imagery(chip_id):
    """ 
    Given a Planet image id, download the available planetscope imagery
    """
    client = api.ClientV1(api_key=PLANET_API_KEY)
    planet_data_downloader = downloader.create(client)

    df_images = gpd.read_file('s3://opera-calval-database-dswx/image.geojson')
    df_images.dropna(inplace=True)
    df_images[df_images.site_name == chip_id]

    temp = df_images[['image_name', 'site_name']]
    df_site2image = temp.set_index('site_name')
    df_image2site = temp.set_index('image_name')
    df_site2image.head()

    values = PLANET_ID = df_site2image.loc[chip_id].tolist()
    PLANET_ID = values[0]

    folder_name = Path(f"../data/{df_images[df_images.site_name == chip_id].image_name.to_list()[0]}")
    if not folder_name.exists():

        data_dir = Path(f'../data/{PLANET_ID}/')
        data_dir.mkdir(exist_ok=True, parents=True)

        data_dir = Path(f'../data/{PLANET_ID}/')
        data_dir.mkdir(exist_ok=True, parents=True)

        ITEM_TYPE = 'PSScene'
        ASSET_TYPES = ['ortho_analytic_8b_sr', 
                    'ortho_analytic_8b_xml']

        req = client.get_item(ITEM_TYPE, PLANET_ID)
        # activate assets
        resp = req.get()
        if 'ortho_analytic_8b_sr' not in resp['assets']:
            # download 4b_sr if 8b_sr is not available
            ASSET_TYPES = [ 'ortho_analytic_4b_sr', 'ortho_analytic_4b_xml']
        
        items_to_download = [resp] * len(ASSET_TYPES)
        resp_ac = planet_data_downloader.activate(iter(items_to_download), ASSET_TYPES)

        resp_dl = planet_data_downloader.download(iter(items_to_download), ASSET_TYPES, str(data_dir))

        return PLANET_ID
        

def crop_planet_imagery(PLANET_ID):
    df_images = gpd.read_file('s3://opera-calval-database-dswx/image.geojson')
    df_images.dropna(inplace=True)
    df_site = gpd.read_file('s3://opera-calval-database-dswx/site.geojson')
    df_site.dropna(inplace=True)

    cols_to_merge = [col for col in df_images.columns if col != 'geometry']
    df_temp = df_images[cols_to_merge]
    df_chips = pd.merge(df_site, df_temp , on='site_name', how='left')
    temp = df_chips[['image_name', 'site_name']]
    df_site2image = temp.set_index('site_name')
    df_image2site = temp.set_index('image_name')

    data_dir = Path(f'../data/{PLANET_ID}/')
    data_dir.mkdir(exist_ok=True, parents=True)

    cropped_dir = Path(f'../data/planet_images_cropped/{PLANET_ID}/')
    cropped_file = list(cropped_dir.glob(f"cropped_{PLANET_ID}*.tif"))
    if len(cropped_file) == 0:
        cropped_dir.mkdir(exist_ok=True, parents=True)

        n = len(PLANET_ID)
        planet_images = list(data_dir.glob('*.tif'))
        planet_image_path = list(filter(lambda x: x.name[:n] == PLANET_ID, planet_images))[0]

        with rasterio.open(planet_image_path) as ds:
            planet_crs = ds.crs
            planet_profile = ds.profile
        
        df_chip = df_chips[df_chips.image_name == PLANET_ID]

        # 500 meter buffer
        df_chip_utm = df_chip.to_crs(planet_crs).buffer(500, join_style=2)

        with rasterio.open(planet_image_path) as src:
            out_image, out_transform = rasterio.mask.mask(src, df_chip_utm.geometry, crop=True)
            out_meta = src.meta

        out_meta.update({"driver": "GTiff",
                "height": out_image.shape[1],
                "width": out_image.shape[2],
                "transform": out_transform,
                "compress": "lzw"})

        with rasterio.open(cropped_dir / f'cropped_{PLANET_ID}.tif', "w", **out_meta) as dest:
            dest.write(out_image)

def download_validation_data(PLANET_ID, cropped_dir):
    classification_file = list(cropped_dir.glob(f"classification_{PLANET_ID}*.tif"))
    if len(classification_file) == 0:

        imageTable = gpd.read_file('s3://opera-calval-database-dswx/image.geojson')
        image_calcs = gpd.read_file('s3://opera-calval-database-dswx/image_calc.geojson')

        download_dir = Path(f'../data/planet_images_cropped/{PLANET_ID}').absolute()
        download_dir.mkdir(exist_ok=True, parents=True)

        def downloadImage_calc(row,download_dir):
            bucket = row.bucket.iloc[0]
            keys = row.s3_keys.iloc[0]
            keys = keys.split(',')
            for key in keys:
                filename = key.split('/')[-1]
                response = s3_client.download_file(bucket,
                                                key,
                                                str(download_dir / filename))

        search = image_calcs[image_calcs.image_name == PLANET_ID]

        try:
            search_iter = search[search.version==search['version'].max()]
            search_iter = search_iter.iloc[[0]]
        except IndexError:
            search_iter = search[search.upload_date.values==search.upload_date.values.max()]
            imagecalc_row = search_iter

        imagecalc_row = search_iter
        image_calc_name = imagecalc_row.image_calc_name.iloc[0]
        version = imagecalc_row.version.iloc[0]
        imagecalc_row.to_file(download_dir / f'metadata_{PLANET_ID}_v{version}.geojson', driver='GeoJSON')
        downloadImage_calc(imagecalc_row,download_dir)

In [None]:
# Helper functions to process DSWx data
def get_fmask_url(hls_id: str) -> str:
    BANDS = ['Fmask']
    STAC_URL = 'https://cmr.earthdata.nasa.gov/stac'

    api = Client.open(f'{STAC_URL}/LPCLOUD/')
    hls_collections = ['HLSL30.v2.0', 'HLSS30.v2.0']

    search_params = {"collections": hls_collections, "ids": hls_id}
    search_hls = api.search(**search_params)

    assert(search_hls.matched() == 1)
    hls_collection = list(search_hls.get_all_items())
    metadata = hls_collection[0].to_dict()
    urls = [metadata['assets'].get(band, {'href': ''})['href'] for band in BANDS]
    return urls[0]

def download_dswx_data(planet_id):
    save_path = Path('../data') / planet_id
    df = pd.read_csv('../data/validation_table.csv')
    row = df[df['planet_id'] == planet_id]
  
    dswx_file = row.dswx_urls.values[0].split()[0]
    fmask_file = get_fmask_url(row.hls_id.values[0])
    conf_file = row.dswx_urls.values[0].split()[2]

    os.system(f"wget {dswx_file} -q -nc -P {save_path}")
    os.system(f"wget {fmask_file} -q -nc -P {save_path}")
    os.system(f"wget {conf_file} -q -nc -P {save_path}")   

In [None]:
def main(chip_id):
    
    # download planet data
    planet_id = download_planet_imagery(chip_id)
    
    # crop planet data
    cropped_dir = crop_planet_imagery(planet_id)
    
    # download validation data
    download_validation_data(chip_id, cropped_dir)

    # download overlapping DSWx tile
    download_dswx_data(planet_id)

In [None]:
# Process all chips
_ = list(map(main, chip_ids))