### The notebook is used to download planetscope images with custom requiremnets (Kehan Yang, kyang33@uw.edu)

#### Load packages and set up directories
Planet has updated its API in March 2023, so some of the functions may not be transferable. This script is based on Planet 1.5.0.

In [20]:
import os
import glob
from get_planet import *
from os.path import exists
from planet.api.auth import find_api_key

### Authorize Planet account.
You can copy and paste your planet API from your Planet Account setting.

In [21]:
# If you're following along with this notebook, you can enter your API Key on the following line, and uncomment it:
os.environ['PL_API_KEY']='XX'

# Setup the API Key from the `PL_API_KEY` environment variable
PLANET_API_KEY = os.getenv('PL_API_KEY')

#### Get your API Key and run validity check
# This gets your API key and prompts you incase your API key is missing or if there are authentication issues

## Get your API Key
try:
    PLANET_API_KEY = find_api_key() #remove find_api_key and place your api key like 'api-key'
except Exception as e:
    print("Failed to get Planet Key: Try planet init or install Planet Command line tool")
    sys.exit()

headers = {'Content-Type': 'application/json'}

# check if API key is valid 
response = requests.get('https://api.planet.com/compute/ops/orders/v2',auth=(PLANET_API_KEY, ""))
if response.status_code==200:
    print('Setup OK: API key valid')
else:
    print(f'Failed with response code {response.status_code}: reinitialize using planet init')


Setup OK: API key valid


### Set up directories

In [32]:
# directory for the geomtry, the format has to be geojson
dir_geom = "/Users/kehanyang/Documents/resarch/pc2_meadows/data/snotel/meadow_selected_extent_geojson_download2/"
# dir_geom = '../../Alaska_project/data/GIS/'
dir_geom = '../../FFLake/Farmponds/data/GIS/single/'

# directory where the images will be stored. 
dir_download = "/Users/kehanyang/Documents/resarch/pc2_meadows/data/planet/orders/Meadows/"
# dir_meadow_images = "/Users/kehanyang/Documents/resarch/pc2_meadows/data/planet/orders/Meadows/"

# directory for the download links and image ids 
dir_order_url =  '../../Alaska_project/data/planet/'
dir_order_url =  '../../FFLake/Farmponds/data/GIS/'

# change the flag if search and/or download data are required.
flag_search = False
flag_order = False
flag_download = False


# 
ID_period = '2016'
#file to store url -- planet data download links
file_orders = dir_order_url+ID_period+'.txt'

In [16]:
idlist

Unnamed: 0,id,date,instrument,estimated area
0,20200223_154148_0f52,20200223,PS2,0.117
1,20200223_160139_60_1059,20200223,PS2.SD,0.117
2,20200222_154222_1011,20200222,PS2,0.117
3,20200217_153931_103e,20200217,PS2,0.117
4,20200215_154434_0f34,20200215,PS2,0.117
5,20200208_135815_1_0f33,20200208,PS2,0.117
6,20200209_140234_0f2e,20200209,PS2,0.117
7,20200126_154426_0f34,20200126,PS2,0.117
8,20200121_155403_41_106a,20200121,PS2.SD,0.117
9,20200121_154236_1027,20200121,PS2,0.117


### Start to search and/or download data
If flag_download is set to False, the order will not be placed, and your quote will not be consumed. The total areas will be saved in a CSV file, allowing you to estimate the total size of the areas

In [33]:
if flag_search:
    df_search = pd.DataFrame() # save all image ids
    # start_time = ID_period + '-01-01T00:00:00'
    start_time = '2016-01-01T00:00:00'
    # end_time = '2021-12-31T12:00:00'
    # end_time = ID_period + '-12-02T12:00:00'
    end_time = '2022-12-02T12:00:00'
    overlap = 99 # at least with 99% overlap 
    cloud_pct = 0.05 # no more than 5% cloud cover
     
    
    #search for geojson file
    fn = glob.glob(dir_geom + "*geojson")
    ID_shp = [id.split("/")[-1] for id in fn]
    df = pd.DataFrame(data = {
        "file": fn, 
        "index":  [i.split("/")[-1] for i in fn],
        "ID": [id.split("/")[-1].split('.')[0] for id in fn]
        })
    df = df.sort_values("index", ascending = True)

    print(df.head())

# check whether the order url txt file is exist. if exist, read data; otherwise, creat file.
    idx = 0 
    if exists(file_orders):
        order_urls = pd.read_csv(file_orders)
    else:
        order_urls = pd.DataFrame(columns = {"index","ID_geom", "order_url"})


    for irow in df.itertuples():
    
    # Search id 
        print(irow)
        ID_geom = irow.ID.split(".")[0]+ '_' + ID_period
        print(ID_geom)

        if ID_geom not in order_urls.ID_geom.to_list():

            print('Searching available images ------- ')
            idlist = ft_iterate(item_type='PSScene', # planet has changed the product item type from 'PSScene4Band' with PSScene
                    asset_type= 'ortho_analytic_4b',
                    geom = read_geom(irow.file),#".json"),
                    start = start_time,
                    end = end_time,
                    cloud_cover = cloud_pct, #cloud cover range 0-1 represting 0-100% so 0.5 means max allowed 50% cloud cover
                    ovp = overlap) #% minimum % overlap 0-100

            idlist['ID_geom'] = ID_geom
            print(idlist.shape)
            idlist.sort_values("date")
            df_search = pd.concat([df_search, idlist])
            
            
            # print(irow.file)
            if(flag_order):
                payload_info = order_payload(Name_download = ID_geom, ID_imgs = idlist.id.values.tolist(), File_geom = irow.file)
                # print(payload_info)
                print("Pay order:".format(),ID_geom)


                order_url = order_now(payload_info) # error response 400  

                order_urls.loc[idx, "index"] = idx        
                order_urls.loc[idx, "ID_geom"] = ID_geom
                order_urls.loc[idx, "order_url"] = order_url


                order_urls.append(order_url)  # save all URLs
                order_urls.to_csv(file_orders, index = None)# save all URLs

            
        idx = idx + 1
    df_search.to_csv(dir_order_url+'idlist.csv')



In [31]:
df_search['estimated area'].sum()

6887.057

### Start to download data
After receiving email notifications, you can use the following code to download the data.

In [None]:

# read order URL from file_orders

if flag_download:
    order_urls_read = pd.read_csv(file_orders)

    for url in order_urls_read.itertuples():
        print(url.order_url)
        # if poll_for_success(url.order_url):
        if os.path.exists(dir_download + url.ID_geom):
            print("Data have been downloaded".format(), dir_download + url.ID_geom)
        else:
            print("start downloading data to".format(), dir_download + url.ID_geom)
            download_results(url.order_url,folder = dir_download + url.ID_geom)





### Check downloaded data
Check the data to determine if it has been downloaded completely. If not, download the missing data.

In [None]:

#check whether all data have been downloaded 
# read search csv 
dir_search = '/Users/kehanyang/Documents/resarch/pc2_meadows/data/planet/orders/Meadows/'
fn = glob.glob(dir_search + '*.csv')
id_miss = []
for i in range(0, len(fn)-1):
    data = pd.read_csv(fn[i])
    id = os.path.basename(fn[i]).split('.csv')[0]
    # print(id)
    # print(data[["id",'date','instrument']])
    data['id_three'] = [(i.split("_")[0] + '_' +  i.split("_")[1] + '_' + i.split("_")[2]) for i in data['id']]

    dir_image = dir_search + id
    # print(dir_image)
    fn_img = glob.glob(dir_image + '/**/**/*.tif', recursive = True)
    fn_img_names = [os.path.basename(f) for f in fn_img]
    id_downloaded = [(i.split("_")[0] + '_' +  i.split("_")[1] + '_' + i.split("_")[2]) for i in fn_img_names]

    not_downloaded = data[~data['id_three'].isin(id_downloaded)]

    if len(not_downloaded) > 0:
        print(id)
        print(not_downloaded)
        id_miss.append(id)

id_downloaded
not_downloaded, id_downloaded
print(len(id_miss))
id_miss