# Search, filter and download satellite imagery from planet.com

This notebook is a mishmash of several notebooks, most of them provided by planet.com on GitHub.

In [1]:
import numpy as np
import pandas as pd
import planet as pl
import os
import json
import requests
import time
from requests.auth import HTTPBasicAuth

# TODO / improvements:
# - filter images according to overlap with area of interest (AOI)
# - read geojson files instead of specifying coordinates in code below for AOI
# - deal with time interval in which to retrieve data programmatically, e.g.
#   up to 3 months before last road label

## Planet.com API setup

In [2]:
# PRELUDE: 
# to make the key an environmental variable, run this in a terminal:
#     export PL_API_KEY="d5ac8f37733c4be8abede024776121d0"
# or (less ideal, because it's visible to all) this in the notebook
os.environ['PL_API_KEY']="d5ac8f37733c4be8abede024776121d0"

# read API Key stored as an env variable
PLANET_API_KEY = os.getenv('PL_API_KEY')
if PLANET_API_KEY is None:
    raise Exception("key does not exist")

# Setup Planet Data API base URL:
URL = "https://api.planet.com/data/v1"

# - Setup the session
session = requests.Session()

# - Authenticate
session.auth = (PLANET_API_KEY, "")

# - Make a GET request to the Planet Data API
res = session.get(URL)

if res.status_code != 200:
    session.close()
    raise Exception("Houston, we have no planet")

# - Setup the quick search endpoint url (used for temporary searches as this one)
quick_url = "{}/quick-search".format(URL)

## User-defined parameters for imagery to be retrieved

In [3]:
# set all 'global' parameters needed to let this notebook do its job without user interaction

# - name of area of interest
aoi = "3093"
# - directory to dump imagery into
data_dir = "/media/hh/hd_internal/_data_DS/DSR/satelliteImages/Borneo/" + aoi + "/"

# - maximal fraction of cloud cover
cloud_cover_max = 0.01 

# - minimal sun elevation: make sure it's not set too high and chimes with the season (date range)
sun_elevation_min = 50

# - type of product
# PSScene3Band - PlanetScope 3-band Basic and Ortho Scenes
# PSScene4Band - PlanetScope 4-band Basic and Ortho Scenes **
# PSOrthoTile - PlanetScope 4-band Ortho Tiles as 25 km x 25 km UTM tiles
# SkySatScene - SkySat Basic and Ortho Scenes
# SkySatCollect -  is created by composing SkySat Ortho Scenes
item_type = "PSScene4Band"
item_type = "PSScene3Band"

# - asset types (see https://api.planet.com/data/v1/asset-types/)
asset_type = (
    "analytic", # Radiometrically-calibrated analytic imagery stored as 16-bit scaled radiance, suitable for analytic applications.
    "analytic_sr", #Atmospherically-corrected analytic imagery stored as 16-bit scaled (surface) reflectance, suitable for analytic applications.
    "analytic_xml", # Radiometrically-calibrated analytic image metadata
    "ortho_analytic_dn", # Orthorectified 16-bit 4-Band DN Image
    "ortho_analytic_udm", # Orthorectified 16-bit 4-Band DN Image Unuseable Data Mask
    "ortho_visual,"
    "visual",
    "visual_xml"
)


asset_type = (
    "visual",
)

# if true, lots of information will be displayed
be_verbose = False
# if true, assets will neither be activated nor downloaded
dry_run = False

In [4]:
# define list of AOI 

dict_aoi = {
    "name": "3093",     # keys correspond to numeric code of Laurance lab (e.g. 3093)
    "descript": "Borneo, labels by Laurance group",
    "coordinates": [
        [
            [ 
            [115.7397189289545,-1.680836871258456],
            [116.2810919527378,-1.680331498795499],
            [116.2805050935049,-1.229806405260338],
            [115.7397803420542,-1.230358513985345],
            [115.7397189289545,-1.680836871258456]      
            ]
        ]
    ],
    "date_label_min" : "2005-11-08T00:00:00.000Z", # earliest date of labeled road
    "date_label_max" : "2017-12-31T00:00:00.000Z", # latest date of labeled road
    "date_retrieve_min" : "2017-06-01T00:00:00.000Z", # earliest date of imagery to be retrieved
    "date_retrieve_max" : "2017-12-31T00:00:00.000Z", # latest date of imagery to be retrieved
}


# convert to dataframe so we can take a look
df_aoi = pd.DataFrame(dict_aoi).set_index("name")
df_aoi

Unnamed: 0_level_0,descript,coordinates,date_label_min,date_label_max,date_retrieve_min,date_retrieve_max
name,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
3093,"Borneo, labels by Laurance group","[[[115.7397189289545, -1.680836871258456], [11...",2005-11-08T00:00:00.000Z,2017-12-31T00:00:00.000Z,2017-06-01T00:00:00.000Z,2017-12-31T00:00:00.000Z


In [5]:
# load coordinates into geojson_geometry
geojson_geometry = {
  "type": "Polygon",
  "coordinates": df_aoi.loc[aoi,"coordinates"]
}

## Other setup

In [6]:
# Functions

# Helper function to print formatted JSON using the json module
def p(data):
    print(json.dumps(data, indent=2))

# Function to download asset files
def download_ass(url, data_dir="~/Downloads/", filename=None):
    """Downloads asset files from given site; input variables:
       - url (the location url)
       - data_dir - directory (default: ~/Downloads/)
       - filename (the filename to save it as. defaults to whatever the file is called originally)
    """
    # Send a GET request to the provided location url, using API Key for authentication
    res = requests.get(url, stream=True, auth=(PLANET_API_KEY, ""))
    # If no filename argument is given
    if not filename:
        # Construct a filename from the API response
        if "content-disposition" in res.headers:
            filename = res.headers["content-disposition"].split("filename=")[-1].strip("'\"")
        # Construct a filename from the location url
        else:
            filename = url.split("=")[1][:10]
    # Save the file
    with open(data_dir + filename, "wb") as f:
        for chunk in res.iter_content(chunk_size=1024):
            if chunk: # filter out keep-alive new chunks
                f.write(chunk)
                f.flush()

    return filename

## Filters

In [7]:
# get images that overlap with our AOI 
geometry_filter = {
  "type": "GeometryFilter",
  "field_name": "geometry",
  "config": geojson_geometry
}

# - date range
date_range_filter = {
  "type": "DateRangeFilter",
  "field_name": "acquired",
  "config": {
    "gte": df_aoi.loc[aoi,"date_retrieve_min"], # greater than or equal to
    "lte": df_aoi.loc[aoi,"date_retrieve_max"]  # less than or equal to
  }
}

# - cloud coverage
cloud_cover_filter = {
  "type": "RangeFilter",
  "field_name": "cloud_cover",
  "config": {
    "lte": cloud_cover_max
  }
}

# - sun elevation
sun_elevation_filter = {
  "type": "RangeFilter",
  "field_name": "sun_elevation",
  "config": {
    "gte": sun_elevation_min
  }
}

# combine filters
combined_filter = {
  "type": "AndFilter",
  "config": [geometry_filter, date_range_filter, cloud_cover_filter, sun_elevation_filter]
}


## Searching: Items and Assets

In [8]:
# API request object
search_request = {
  "interval": "day",
  "item_types": [item_type], 
  "filter": combined_filter
}

# Send the POST request to the API quick search endpoint
search_result = session.post(quick_url, json=search_request)

# srj is a nested dict with the following keys at the top level:
# "_links" 
# "features" - a list of the retrieved items
# "type"
srj = search_result.json()
if be_verbose:
    # pretty-print
    p(srj)

print("found " + str(len(srj["features"])) + " items")

found 46 items


## Activation

In [9]:
# loop over features and their assets to activate
status_count = {202 : 0, # The request has been accepted and the activation will begin shortly. 
                204: 0, # The asset is already active and no further action is needed. 
                401: 0 # The user does not have permissions to download this file.
                }
assets_get_list = []
for feature in srj["features"]:
    assets_url = feature["_links"]["assets"]
    res = session.get(assets_url)
    # Assign a variable to the item's assets url response
    assets = res.json()
    if be_verbose:
        print("available assets: " + str(assets.keys()))
    # set of assets which are available and desired 
    assets_available = set(assets.keys()).intersection(set(asset_type))
    if be_verbose:
        print("available and requested assets: " + str(assets_available))    
    # list of assets for which we have permission to download
    assets_permitted = [key for key in assets.keys() if (assets[key]["_permissions"][0] == "download")]
    # intersection of both
    assets_get = assets_available.intersection(set(assets_permitted))
    if be_verbose:
        print("available, requested and permitted assets: " + str(assets_get))
    # put in list
    assets_get_list.append(assets_get)
    # if list is not empty:
    if (assets_available and not dry_run):
        for ass in assets_get:
            activation_url = assets[ass]["_links"]["activate"]
            # Send a request to the activation url to activate the item
            res_activation = session.get(activation_url)
            # update status count
            status_count[res_activation.status_code] += 1
            # update of list of items to get - unfisnished, not working
            #if (res_activation.status_code in [202, 204]):
            #    assets_get_list[fCount].append(ass)
            
p(status_count)           

{
  "202": 0,
  "204": 46,
  "401": 0
}


## Download

In [10]:
# now check status and download once available
# note: as the downloading function is serial in nature, so is this loop
if ((status_count[202] + status_count[204]) > 0) and not dry_run:
    fIx = 0
    for feature in srj["features"]:
        assets_url = feature["_links"]["assets"]
        asset_activated = False
        while asset_activated == False:           
            res = session.get(assets_url)
            # Assign a variable to the item's assets url response
            assets = res.json()
            for ass in assets_get_list[fIx]:
                ass_status = assets[ass]["status"]
                if ass_status == 'active':
                    location_url = assets[ass]["location"]
                    print(feature["id"] + ": downloading " + ass + "...")
                    download_ass(location_url, data_dir=data_dir)
                    # don't forget
                    asset_activated = True
            # wait a bit before next try
            time.sleep(1)
        # increment index
        fIx += 1    
    print("done")
else:
    if not dry_run:
        print("no single item could be activated or was already activated")

20171216_015808_103e: downloading visual...
20171127_032332_1043: downloading visual...
20171117_032237_1048: downloading visual...
20171115_015852_103e: downloading visual...
20171115_015854_103e: downloading visual...
20171115_015853_103e: downloading visual...
20171115_032324_100d: downloading visual...
20171024_015353_103e: downloading visual...
20171024_015354_103e: downloading visual...
20171009_015551_0f3f: downloading visual...
20171009_015552_0f3f: downloading visual...
20170907_015647_1022: downloading visual...
20170927_015722_1039: downloading visual...
20170901_015249_0f4e: downloading visual...
20170920_015441_103b: downloading visual...
20170920_015439_103b: downloading visual...
20170920_015440_103b: downloading visual...
20170920_015541_0f17: downloading visual...
20170920_015547_0f17: downloading visual...
20170920_015546_0f17: downloading visual...
20170920_015740_0f28: downloading visual...
20170920_015548_0f17: downloading visual...
20170920_015736_0f28: downloadin

In [11]:
# when done, close session
session.close()