# Download Images from Planet
Specify start and end date to search through, along with item type. Will loop over csv and download a n_composite image composite of clear images over that period. Can set clear percent/cloud tolerance in dl.search_api() call as well.

In [None]:
import os
import multiprocessing as mp
import pandas as pd
import download as dl
import numpy as np

from tqdm.notebook import tqdm
import rasterio
from rasterio.plot import show

In [None]:
ITEM_TYPE = 'PSScene4Band' # Either 'PSScene3Band' or 'PSScene4Band'
START_DATE = "2019-06-01T00:00:00.000Z"
END_DATE = "2019-07-01T00:00:00.000Z"

# make sure environmental variable PL_API_KEY is set
# conda example https://docs.conda.io/projects/conda/en/latest/user-guide/tasks/manage-environments.html#setting-environment-variables

## Download locations with multiprocess

In [None]:
def retrieve_planet_image(coordinates, out_file, start_date, end_date, item_type, n_composites=4):
    results = dl.search_api(coordinates, start_date, end_date, item_type)
    if len(results) < n_composites:
        raise Exception(f"Less than n_composites={n_composites} results were returned for {out_file}")
    order_ids = results[:n_composites]
    order_uuid = dl.create_order(coordinates, order_ids, item_type)
    url = dl.check_order(order_uuid)
    dl.download_image(url, out_file)
    with rasterio.open(out_file) as src:
        height, width = src.shape
        if height > 400 or width > 400:
            raise Exception(f"Anomalous image shape with uuid: {order_uuid}, {out_file} is likely corrupted")
        if height < 320 or width < 320:
            raise Exception(f"Image incomplete, increase n_composites beyond {n_composites} for {out_file}")

In [None]:
def wrapper(kwargs):
    try:
        retrieve_planet_image(**kwargs)

    except Exception as e:
        print(e)

### Create jobs from csv

In [None]:
il_df = pd.read_csv('/datadrive/ewg_imagery_07-15-20_ind.csv')

base_directory_name = './planet_images'

jobs = []
# loop over location
for i, location in il_df.iterrows():
    lat, lon = location.lat, location.lon
    coords = dl.gen_box_coords(lat, lon, height=.009, width=.01184) #should be around 1km x 1km
    
    state = location.state # else can specify what to add for directory location label
    directory = base_directory_name + '_' + state.lower() + '-' + END_DATE[0:7] + '/'
    
    if not os.path.exists(directory):
        os.makedirs(directory)
    
    config = {
        'start_date': START_DATE,
        'end_date': END_DATE,
        'item_type': ITEM_TYPE,
        'out_file': directory + 'planet_loc_{0}-date_{1}.tif'.format(str(i).zfill(3), END_DATE.split('T')[0]),
        'coordinates': coords
    }
    
    if not os.path.exists(config['out_file']):
        jobs += [config]
        
len(jobs)

### Run jobs
It will take a few minutes for the progress bar to update. For reference, downloading 229 locations with 3 composited images/location takes about and hour and a half.

In [None]:
n_proc = mp.cpu_count() - 1 # desired number of processes here
pool = mp.Pool(processes=n_proc)

for result in tqdm(pool.imap_unordered(func=wrapper, iterable=jobs), total=len(jobs)):
    pass

pool.close()
pool.join()

## Show Images

In [None]:
# display image helper
def show_rgb(img_file):
    with rasterio.open(img_file) as src:
        b,g,r,n = src.read()

    rgb = np.stack((r,g,b), axis=0)
    show(rgb/rgb.max())

In [None]:
import glob
fps = glob.glob(directory + '*')

assert(len(fps) != 0)

In [None]:
for fp in fps:
    show_rgb(fp)