#### Download Planet data, crop it, and also download associated hand drawn classifications

This notebook expects a co-located '.env' file containing a planet API key in the format 
> PLANET_API_KEY='[key]'

In [7]:
# gis imports
import geopandas as gpd
import rasterio
import rasterio.mask
from rasterio.plot import show
from rasterio.warp import transform_bounds

# planet api imports
from planet import api
from planet.api import downloader
from planet.api.downloader import create

# misc imports
import os
from pathlib import Path
from dotenv import dotenv_values
from tools import addImageCalc
from pathlib import Path

# data science imports
import matplotlib.pyplot as plt
import pandas as pd

# aws imports
import boto3
from botocore.handlers import disable_signing

os.environ["AWS_NO_SIGN_REQUEST"] = "YES"

In [3]:
# Chip IDs that we will test in this notebook
# these should be chip_ids for which hand-classifications were made

chip_ids = ['3_5', '3_28']

In [4]:
# Planet data downloader client
PLANET_API_KEY = dotenv_values()['PLANET_API_KEY']
client = api.ClientV1(api_key=PLANET_API_KEY)
planet_data_downloader = downloader.create(client)

# AWS boto3 client
s3 = boto3.resource('s3')
s3_client = boto3.client('s3')
s3_client.meta.events.register('choose-signer.s3.*', disable_signing)
s3.meta.client.meta.events.register('choose-signer.s3.*', disable_signing)

In [13]:
def download_and_crop_imagery(chip_id):
    ###########################################
    ##### first download the planet image ##### 
    ###########################################

    df_images = gpd.read_file('s3://opera-calval-database-dswx/image.geojson')
    df_images.dropna(inplace=True)
    df_images[df_images.site_name == chip_id]

    temp = df_images[['image_name', 'site_name']]
    df_site2image = temp.set_index('site_name')
    df_image2site = temp.set_index('image_name')
    df_site2image.head()

    values = PLANET_ID = df_site2image.loc[chip_id].tolist()
    PLANET_ID = values[0]

    data_dir = Path(f'../data/{PLANET_ID}/')
    data_dir.mkdir(exist_ok=True, parents=True)

    data_dir = Path(f'../data/{PLANET_ID}/')
    data_dir.mkdir(exist_ok=True, parents=True)

    ITEM_TYPE = 'PSScene'
    ASSET_TYPES = ['ortho_analytic_8b_sr', 
                'ortho_analytic_8b_xml']

    req = client.get_item(ITEM_TYPE, 
                        PLANET_ID)
    # activate assets
    resp = req.get()
    items_to_download = [resp] * len(ASSET_TYPES)
    resp_ac = planet_data_downloader.activate(iter(items_to_download), ASSET_TYPES)

    resp_dl = planet_data_downloader.download(iter(items_to_download), ASSET_TYPES, str(data_dir))

    ###########################
    ##### download assets #####
    ###########################

    # then, crop the image
    df_images = gpd.read_file('s3://opera-calval-database-dswx/image.geojson')
    df_images.dropna(inplace=True)
    df_site = gpd.read_file('s3://opera-calval-database-dswx/site.geojson')
    df_site.dropna(inplace=True)

    cols_to_merge = [col for col in df_images.columns if col != 'geometry']
    df_temp = df_images[cols_to_merge]
    df_chips = pd.merge(df_site, df_temp , on='site_name', how='left')
    temp = df_chips[['image_name', 'site_name']]
    df_site2image = temp.set_index('site_name')
    df_image2site = temp.set_index('image_name')

    data_dir = Path(f'../data/{PLANET_ID}/')
    data_dir.mkdir(exist_ok=True, parents=True)

    cropped_dir = Path(f'../data/planet_images_cropped/{PLANET_ID}/')
    cropped_dir.mkdir(exist_ok=True, parents=True)

    n = len(PLANET_ID)
    planet_images = list(data_dir.glob('*.tif'))
    planet_image_path = list(filter(lambda x: x.name[:n] == PLANET_ID, planet_images))[0]

    with rasterio.open(planet_image_path) as ds:
        planet_crs = ds.crs
        planet_profile = ds.profile
    
    df_chip = df_chips[df_chips.image_name == PLANET_ID]

    # 500 meter buffer
    df_chip_utm = df_chip.to_crs(planet_crs).buffer(500, join_style=2)

    with rasterio.open(planet_image_path) as src:
        out_image, out_transform = rasterio.mask.mask(src, df_chip_utm.geometry, crop=True)
        out_meta = src.meta

    out_meta.update({"driver": "GTiff",
            "height": out_image.shape[1],
            "width": out_image.shape[2],
            "transform": out_transform,
            "compress": "lzw"})

    with rasterio.open(cropped_dir / f'cropped_{PLANET_ID}.tif', "w", **out_meta) as dest:
        dest.write(out_image)

    ##########################################################################
    ##### finally, download the hand classification for the cropped chip #####
    ##########################################################################

    bucket_name = 'opera-calval-database-dswx'
    obj = s3.Object(bucket_name,'image.geojson')
    image_table_data = obj.get()['Body']
    imageTable = gpd.read_file(image_table_data)
    obj = s3.Object(bucket_name,'image_calc.geojson')
    image_table_data = obj.get()['Body']
    image_calcs = gpd.read_file(image_table_data)

    download_dir = Path(f'../data/planet_images_cropped/{PLANET_ID}').absolute()
    download_dir.mkdir(exist_ok=True, parents=True)

    def downloadImage_calc(row,download_dir):
        bucket = row.bucket.iloc[0]
        keys = row.s3_keys.iloc[0]
        keys = keys.split(',')
        for key in keys:
            filename = key.split('/')[-1]
            response = s3_client.download_file(bucket,
                                            key,
                                            str(download_dir / filename))

    search = image_calcs[image_calcs.image_name == PLANET_ID]

    try:
        search_iter = search[search.version==search['version'].max()]
        search_iter = search_iter.iloc[[0]]
    except IndexError:
        search_iter = search[search.upload_date.values==search.upload_date.values.max()]
        imagecalc_row = search_iter

    imagecalc_row = search_iter
    image_calc_name = imagecalc_row.image_calc_name.iloc[0]
    version = imagecalc_row.version.iloc[0]
    imagecalc_row.to_file(download_dir / f'metadata_{PLANET_ID}_v{version}.geojson', driver='GeoJSON')
    downloadImage_calc(imagecalc_row,download_dir)

In [14]:
_ = list(map(download_and_crop_imagery, chip_ids))