Order/preprocess imagery, run boat-detector and spaceknow_ship_segmentation and collect runtime statistics.

In [1]:
# Set credentials

import os
os.environ['GBDX_USERNAME'] = ''
os.environ['GBDX_PASSWORD'] = ''
os.environ['GBDX_CLIENT_ID'] = '' 
os.environ['GBDX_CLIENT_SECRET'] = ''

import gbdxtools
gbdx = gbdxtools.Interface()

In [2]:
catids = ['1040050005DC6000',             # Sevastopol, WV03, 2017-08-09
          '1030010070611600',             # Halifax, WV02, 2017-08-25
          '104001002E7BBF00',             # Genoa, WV03, 2017-06-20  
          '105001000A1FFF00',             # Osaka, GE01, 2017-06-03
          '103001006100D200']             # Dubai, WV02, 2016-12-13

from os.path import join
          
wfids = {}          
          
for catid in catids:          
          
    # order
    order = gbdx.Task('Auto_Ordering', cat_id=catid)
    order.impersonation_allowed = True

    # acomped multispectral in utm
    aop1 = gbdx.Task('AOP_Strip_Processor',
                     data=order.outputs.s3_location.value,
                     bands='MS',
                     enable_dra=False,
                     enable_pansharpen=False,
                     ortho_epsg='UTM')     

    # acomped pansharpened with baselayer matching in utm
    # baselayer matching is optional but results in better colors
    aop2 = gbdx.Task('AOP_Strip_Processor',
                     data=order.outputs.s3_location.value,
                     enable_dra=False,                      # disable automatic dra
                     ortho_epsg='UTM')                         
    blm = gbdx.Task('baselayermatch',
                    data=aop2.outputs.data.value,
                    cloud_id=catid)                        # take clouds into account when dra'ing

    # acomped pansharpened per spaceknow settings
    aop3 = gbdx.Task('AOP_Strip_Processor',
                     data=order.outputs.s3_location.value,
                     ortho_pixel_size='0.5',
                     ortho_interpolation_type='Bilinear')      
                    
    # dsat boat detection
    bd = gbdx.Task('boat-detector',
                   ms_image=aop1.outputs.data.value,
                   ps_image=blm.outputs.data.value)

    # spaceknow boat detection
    # crop in 4 parts - the spaceknow task works on small chunks
    crop = gbdx.Task('CropGeotiff', data=aop3.outputs.data.value, num_partitions='4')                
          
    # each of the 4 parts goes to a separate spaceknow task
    sss = [0]*4    
    sss[0] = gbdx.Task('spaceknow_ship_segmentation', data=crop.outputs.data_0.value)
    sss[1] = gbdx.Task('spaceknow_ship_segmentation', data=crop.outputs.data_1.value)
    sss[2] = gbdx.Task('spaceknow_ship_segmentation', data=crop.outputs.data_2.value)
    sss[3] = gbdx.Task('spaceknow_ship_segmentation', data=crop.outputs.data_3.value)
      
    wf = gbdx.Workflow([order, aop1, aop2, aop3, blm, bd, crop] + sss)
    wf.savedata(aop1.outputs.data, join('platform-stories/boats', catid, 'ms'))
    wf.savedata(blm.outputs.data, join('platform-stories/boats', catid, 'ps'))
    wf.savedata(aop3.outputs.data, join('platform-stories/boats', catid, 'ps-sss'))
    wf.savedata(bd.outputs.detections, join('platform-stories/boats/detections', catid))
    for i in range(4):
        wf.savedata(sss[i].outputs.result, join('platform-stories/boats/detections-sss', catid, str(i)))
      
    wfids[catid] = wf.execute()

In [27]:
for catid in wfids:
    wf = gbdx.Workflow([])
    wf.id = wfids[catid]
    print wf.id, wf.status

 4675222631758838872 {u'state': u'complete', u'event': u'succeeded'}
4675222606140016285 {u'state': u'complete', u'event': u'succeeded'}
4675222645986135861 {u'state': u'complete', u'event': u'succeeded'}
4675222659649856198 {u'state': u'complete', u'event': u'succeeded'}
4675222593325639999 {u'state': u'complete', u'event': u'succeeded'}
4675222618665250491 {u'state': u'complete', u'event': u'succeeded'}


Calculate waiting time and execution time per km2.

In [28]:
import datetime
import numpy as np
import pyproj
from shapely.wkt import loads
from shapely.ops import transform
from functools import partial

# get datetime object from string timestamp
def get_time(timestamp):
    return datetime.datetime.strptime(timestamp[:-13], "%Y-%m-%dT%H:%M:%S")

# get area in km2 from wkt string (we need this for the execution time per km2 calculation)
def area_km2(wkt):
    shape = loads(wkt)
    proj = partial(pyproj.transform, pyproj.Proj(init='epsg:4326'),
                   pyproj.Proj('+proj=aea', lat1=shape.bounds[1], lat2=shape.bounds[3]))
    return transform(proj, shape).area/float(10**6)

submission_times, start_times, end_times, failures, wait_times, exec_times = {}, {}, {}, {} , {}, {}

for task in ['boat-detector', 'spaceknow_ship_segmentation']:
    submission_times[task] = {}
    start_times[task] = {}
    end_times[task] = {} 
    failures[task] = {}
    wait_times[task] = []
    exec_times[task] = []
    
for catid in wfids:
    
    # compute the area
    wkt =  gbdx.catalog.get_strip_footprint_wkt(catid)
    area = area_km2(wkt)
    
    wf = gbdx.Workflow([])
    wf.id = wfids[catid]

    for event in wf.events:

        if 'boat-detector' in event['task'] or 'spaceknow_ship_segmentation' in event['task']:

            if event['task'][0] == 'b':
                task = 'boat-detector'
            else:
                task = 'spaceknow_ship_segmentation'
            
            task_id = event['task_id']
            
            failures[task][task_id] = False
            
            if event['event'] == 'submitted':
                submission_times[task][task_id] = get_time(event['timestamp'])
            elif event['event'] == 'started':
                start_times[task][task_id] = get_time(event['timestamp'])
            elif event['event'] == 'succeeded':
                end_times[task][task_id] = get_time(event['timestamp'])
            elif event['event'] == 'failed':
                failures[task][task_id] = True
    
for task in ['boat-detector', 'spaceknow_ship_segmentation']:
    
    if task == 'spaceknow_ship_segmentation':
        area_task = area/4.0
    else:
        area_task = area
    
    for task_id in submission_times[task]:
    
        wait_times[task].append((start_times[task][task_id] - submission_times[task][task_id]).total_seconds()/60)
        if not failures[task][task_id]:
            exec_times[task].append((end_times[task][task_id] - start_times[task][task_id]).total_seconds()/(60*area_task))
    
    print '{} mean waiting time: {} min'.format(task, np.mean(wait_times[task]))
    print '{} mean execution time: {} min/km2'.format(task, np.mean(exec_times[task]))

boat-detector mean waiting time: 154.683333333 min
boat-detector mean execution time: 0.0155878934591 min/km2
spaceknow_ship_segmentation mean waiting time: 161.392361111 min
spaceknow_ship_segmentation mean execution time: 0.781218699136 min/km2
