# Speed Up Basemap Ordering and Download

In [2]:
import os
import json
import requests
import urllib.request
import numpy as np
import pandas as pd
import geopandas as gpd
import shapely as shp
from pprint import pprint
import math
import time
import ast

In [3]:
# Get Planet API Key
%load_ext dotenv
%dotenv

api_key = os.getenv('PL_BM_API_KEY')

## Define Functions

In [4]:
def makemydir(dir_path):
    try:
        os.makedirs(dir_path)
    except OSError:
        pass

In [5]:
def recu_down(url, filename): # recurrent download with ContentTooShortError
    try:
        urllib.request.urlretrieve(url,filename)
    except urllib.error.ContentTooShortError:
        print('Download failed. Trying again...')
        recu_down(url, filename)

# Import Data

In [6]:
grids_filtered = gpd.read_file('../data/planet_grids_for_arts_closest_year.geojson')

In [7]:
print(grids_filtered.planet_basemap_year.sort_values().unique())
grids_filtered_annual = [
    grids_filtered[grids_filtered.planet_basemap_year == year] 
    for year 
    in grids_filtered.planet_basemap_year.sort_values().unique()
]
print([len(df.index) for df in grids_filtered_annual])

grids_filtered_annual_5000 = [
    np.array_split(annual_grids, math.ceil(len(annual_grids)/5000)) 
                   for annual_grids 
                   in grids_filtered_annual
                   ]
pprint([[len(df.index) for df in year] for year in grids_filtered_annual_5000])

[2016. 2017. 2018. 2019. 2020. 2021. 2022. 2023.]
[20720, 5079, 4175, 4331, 3949, 3962, 337, 252]
[[4144, 4144, 4144, 4144, 4144],
 [2540, 2539],
 [4175],
 [4331],
 [3949],
 [3962],
 [337],
 [252]]


# Download

In [12]:
order_info_path = '../data/download/planet_basemap_orders.csv'
order_download_path = '../data/download/planet_basemap_downloads.csv'
prior_orders = pd.read_csv(order_info_path,
                           converters = {'order_info': ast.literal_eval})
prior_orders = [row['order_info'] for idx, row in prior_orders.iterrows()]
prior_orders[0]

{'_links': {'_self': 'https://api.planet.com/compute/ops/orders/v2/2a2ddc73-f6e5-4a46-9e59-803d0ffc9011'},
 'created_on': '2024-09-04T22:30:49.725Z',
 'error_hints': [],
 'id': '2a2ddc73-f6e5-4a46-9e59-803d0ffc9011',
 'last_message': 'Preparing order',
 'last_modified': '2024-09-04T22:30:49.725Z',
 'name': 'global_quarterly_2016q3_mosaic_10-3133_1000-3037',
 'products': [{'mosaic_name': 'global_quarterly_2016q3_mosaic',
   'quad_ids': ['10-3133',
    '1000-2818',
    '1000-2819',
    '1000-2826',
    '1000-3037']}],
 'source_type': 'basemaps',
 'state': 'queued'}

In [13]:
for annual_grids in grids_filtered_annual_5000[5][0:1]:
    
    basemap_name = 'global_quarterly_' + str(int(annual_grids.planet_basemap_year.iloc[0])) + 'q3_mosaic'

    requested_list = []

    # split image info into list of chunks of approximately 5 basemaps to download
    n = 5
    grids_filtered_list = np.array_split(annual_grids, math.ceil(len(annual_grids)/n))
    
    count = 0
    
    # submit order for each item in the list
    for chunk in grids_filtered_list:
        count += 1
        
        # Get chunk info
        item_ids = list(chunk.id)

        order_name = (
            basemap_name
            + '_'
            + item_ids[0]
            + '_'
            + item_ids[len(item_ids)-1]
        )
        
        # make directory to download data to
        dir_path = (
            '../data/download/'
            + basemap_name
            + '/'
            + order_name

        )
        makemydir(dir_path)

        order_link = [order['_links']['_self'] for order in prior_orders if order['name'] == order_name][0]
        
        try:
            order_status = requests.get(order_link, auth=(api_key, '')).json()
        except:
            time.sleep(5)
            order_status = requests.get(order_link, auth=(api_key, '')).json()

        order_id = order_status['id']

        for file in order_status['_links']['results']:
            filename = dir_path + '/' + file['name'].replace(order_id + '/', '').replace('/', '_')
            url = file['location']

            print("-------------------------------------")

            if not os.path.exists(filename):
                print("Downloading ", order_name)
                # download the file
                start_time = time.time()
                try:
                    recu_down(url, filename)# the actual download
                except:
                    time.sleep(5)
                    recu_down(url, filename)# the actual download

                elapsed_time = time.time() - start_time
                print("downloading time =", np.round(elapsed_time, 2), "seconds")

                # save info about images that have been downloaded
                download_df = pd.DataFrame({
                    'order_name': [order_name],
                    'filename': filename,
                    'url': url
                })

                download_df.to_csv(
                    order_download_path,
                    index = False,
                    mode = 'a',
                    header = not os.path.exists(order_download_path)
                )
            else:
                print(order_name, ' already downloaded.')

            print("-------------------------------------")
            print("\n")

-------------------------------------
global_quarterly_2020q3_mosaic_1298-1577_1329-1657  already downloaded.
-------------------------------------


-------------------------------------
global_quarterly_2020q3_mosaic_1298-1577_1329-1657  already downloaded.
-------------------------------------


-------------------------------------
global_quarterly_2020q3_mosaic_1298-1577_1329-1657  already downloaded.
-------------------------------------


-------------------------------------
global_quarterly_2020q3_mosaic_1298-1577_1329-1657  already downloaded.
-------------------------------------


-------------------------------------
global_quarterly_2020q3_mosaic_1298-1577_1329-1657  already downloaded.
-------------------------------------


-------------------------------------
global_quarterly_2020q3_mosaic_1298-1577_1329-1657  already downloaded.
-------------------------------------


-------------------------------------
global_quarterly_2020q3_mosaic_1298-1577_1329-1657  already do