### The notebook is used to download planetscope images with custom requiremnets (Kehan Yang, kyang33@uw.edu)

#### Load packages and set up directories
The script is built upon package planet Version 1 and has not been adapted to the recently launched planet Version 2.

In [2]:
import os
import glob
from get_planet import *
from os.path import exists

Setup OK: API key valid


### Authorize Planet account.
You can copy and paste your planet API from your Planet Account setting.

In [3]:
# If you're following along with this notebook, you can enter your API Key on the following line, and uncomment it:
# os.environ['PLANET_API_KEY']='XX'
# Setup the API Key from the `PL_API_KEY` environment variable
PLANET_API_KEY = os.getenv('PLANET_API_KEY')

#### Get your API Key and run validity check
# This gets your API key and prompts you incase your API key is missing or if there are authentication issues



### Set up directories

In [11]:
# directory for the geomtry, the format has to be geojson
dir_geom = "../../../research/pc2_meadows/data/Sierra_meadows/geojson_test/"

dir_root = './'
# directory where the images will be downloaded. 
dir_download = dir_root + 'temp/download/'

# directory for the download links and image ids 
dir_order_url =  dir_root + 'temp/links/'

# change the flag if search and/or download data are required.
flag_search = True
flag_order = True
flag_download = False


mkdir: dir_download: File exists


### Start to search and/or download data
If flag_download is set to False, the order will not be placed, and your quote will not be consumed. The total areas will be saved in a CSV file, allowing you to estimate the total size of the areas

In [38]:
# define the searching period
ID_period = [str(i) for i in range(2018,2024)]

if flag_search:
    for yr in ID_period:
        df_search = pd.DataFrame() # save all image ids
        
        #file to store url -- planet data download links
        file_orders = dir_order_url + yr + '.txt'
        start_time = yr + '-01-01T00:00:00'
        end_time = yr + '-12-02T12:00:00'
        
        overlap = 99 # at least with 99% overlap 
        cloud_pct = 0.05 # no more than 5% cloud cover


        #search for geojson file
        fn = glob.glob(dir_geom + "*json")
        ID_shp = [id.split("/")[-1] for id in fn]
        df = pd.DataFrame(data = {
            "file": fn, 
            "index":  [i.split("/")[-1] for i in fn],
            "ID": [id.split("/")[-1].split('.')[0] for id in fn]
            })
        df = df.sort_values("index", ascending = True)

        print(df.head())

    # check whether the order url txt file is exist. if exist, read data; otherwise, creat file.
        idx = 0 
        if exists(file_orders):
            order_urls = pd.read_csv(file_orders)
        else:
            order_urls = pd.DataFrame(columns = ["index","ID_geom", "order_url"])


        for irow in df.itertuples():

        # Search id 
            print(irow)
            ID_geom = irow.ID.split(".")[0]+ '_' + yr
            print(ID_geom)

            if ID_geom not in order_urls.ID_geom.to_list():

                print('Searching available images ------- ')
                idlist = ft_iterate(item_type='PSScene', # planet has changed the product item type from 'PSScene4Band' with PSScene
                        asset_type= 'ortho_analytic_4b',
                        geom = read_geom(irow.file),#".json"),
                        start = start_time,
                        end = end_time,
                        cloud_cover = cloud_pct, #cloud cover range 0-1 represting 0-100% so 0.5 means max allowed 50% cloud cover
                        ovp = overlap) #% minimum % overlap 0-100

                idlist['ID_geom'] = ID_geom
                print(idlist.shape)
                idlist.sort_values("date")
                df_search = pd.concat([df_search, idlist])


                # print(irow.file)
                if(flag_order):
                    payload_info = order_payload(Name_download = ID_geom, ID_imgs = idlist.id.values.tolist(), File_geom = irow.file)
                    # print(payload_info)
                    print("Pay order:".format(),ID_geom)


                    order_url = order_now(payload_info) # error response 400  

                    order_urls.loc[idx, "index"] = idx        
                    order_urls.loc[idx, "ID_geom"] = ID_geom
                    order_urls.loc[idx, "order_url"] = order_url
                    order_urls.loc[idx, "NUM"] = idlist.shape[0]  
                    order_urls.loc[idx, "Total area"] = sum(idlist['estimated area'])  


                    # order_urls.append(order_url)  # save all URLs
                    order_urls.to_csv(file_orders, index = None)# save all URLs


            idx = idx + 1
        df_search.to_csv(dir_order_url+'idlist.csv')



### Start to download data
After receiving email notifications, you can use the following code to download the data.

In [37]:
# read order URL from file_orders
flag_download = True
dir_download = '/Volumes/My Book/data/Farmponds/GIS/Planet_Easternshore/'
fn = glob.glob(dir_order_url + '*.txt')
fn.sort()

if flag_download:
    for file_orders in fn:
        order_urls_read = pd.read_csv(file_orders)

        for url in order_urls_read.itertuples():
            print(url.order_url)
            # if poll_for_success(url.order_url):
            if os.path.exists(dir_download + url.ID_geom):
                print("Data have been downloaded".format(), dir_download + url.ID_geom)
            else:
                print("start downloading data to".format(), dir_download + url.ID_geom)
                download_results(url.order_url,folder = dir_download + url.ID_geom)





### Check downloaded data
Check the data to determine if it has been downloaded completely. If not, download the missing data.

In [None]:
#check whether all data have been downloaded 
# read search csv 
dir_search = '/Users/kehanyang/Documents/resarch/pc2_meadows/data/planet/orders/Meadows/'
fn = glob.glob(dir_search + '*.csv')
id_miss = []
for i in range(0, len(fn)-1):
    data = pd.read_csv(fn[i])
    id = os.path.basename(fn[i]).split('.csv')[0]
    # print(id)
    # print(data[["id",'date','instrument']])
    data['id_three'] = [(i.split("_")[0] + '_' +  i.split("_")[1] + '_' + i.split("_")[2]) for i in data['id']]

    dir_image = dir_search + id
    # print(dir_image)
    fn_img = glob.glob(dir_image + '/**/**/*.tif', recursive = True)
    fn_img_names = [os.path.basename(f) for f in fn_img]
    id_downloaded = [(i.split("_")[0] + '_' +  i.split("_")[1] + '_' + i.split("_")[2]) for i in fn_img_names]

    not_downloaded = data[~data['id_three'].isin(id_downloaded)]

    if len(not_downloaded) > 0:
        print(id)
        print(not_downloaded)
        id_miss.append(id)

id_downloaded
not_downloaded, id_downloaded
print(len(id_miss))
id_miss