There are large regions of the planet which, although inhabited, remain unmapped to this day. In the past, DigitalGlobe has launched crowdsourcing campaigns to detect remote population centers in Ethiopia, Sudan and Swaziland in support of NGO vaccination and aid distribution initiatives. Beyond DigitalGlobe, there are other initiatives under way to fill in the gaps in the global map, aiding first responders in their effort to provide relief to vulnerable, yet inaccessible, people.

The area of interest consists of 9 WorldView-2 and 2 GeoEye-1 image strips collected between January 2015 and May 2016 over northeastern Nigeria, close to as well as on the border with Niger and Cameroon. We picked 4 WorldView-2 strips, divided them in square chips of side 115m (250 pixels at sensor resolution) and asked our crowd to label them as ‘Buildings’ or ‘No Buildings’. The output of the crowdsourcing campaign is the file train.geojson which contains the labeled chip geometries.

We used the labeled data to train the same neural net that we used [here](http://gbdxstories.digitalglobe.com/swimming-pools/) and deployed the trained model on the rest of the strips. 

The full story is [here](http://gbdxstories.digitalglobe.com/building-detection/).

In [None]:
# Specify your credentials and create a gbdx interface

import os
os.environ['GBDX_USERNAME'] = ''
os.environ['GBDX_PASSWORD'] = ''
os.environ['GBDX_CLIENT_ID'] = '' 
os.environ['GBDX_CLIENT_SECRET'] = ''

import gbdxtools
gbdx = gbdxtools.Interface()

In [None]:
from os.path import join
import uuid

# specify location of input files
input_location = 's3://gbd-customer-data/32cbab7a-4307-40c8-bb31-e2de32f940c2/platform-stories/building-detection'

# train task
train_task = gbdx.Task('train-cnn-classifier')
train_task.inputs.images = join(input_location, 'images')
train_task.inputs.geojson = join(input_location, 'train-geojson')
train_task.inputs.classes = 'No Buildings, Buildings'     # classes in train.geojson

# set hyperparameters
train_task.inputs.nb_epoch = '75'              # validation loss plateaus at around 60 - 70 epochs
train_task.inputs.train_size = '5000'          # enough to get a reliable model, too much higher will make training too slow
train_task.inputs.learning_rate = '0.001'
train_task.inputs.max_side_dim = '245'         # chip side at sensor resolution
train_task.inputs.resize_dim = '(3, 150, 150)' # down sample chips due to memory constraints
train_task.inputs.two_rounds = 'False'         # second round results in low recall
train_task.inputs.test_size = '1500'
train_task.inputs.bit_depth = '8'      
train_task.inputs.batch_size='32'              # low enough to fit into memory

# deploy task
deploy_tasks = {}
deploy_ids = ['103001003D8CC700',       # WV-2
              '1030010041B6F800',       # WV-2   
              '1030010051A75500',       # WV-2  
              '1030010054A8BD00',       # WV-2
              '1030010055AF2D00',       # WV-2
              '10504100120ADF00',       # GE-1
              '1050410012CDC100']       # GE-1

for catid in deploy_ids:
    deploy_task = gbdx.Task('deploy-cnn-classifier')
    deploy_task.inputs.model = train_task.outputs.trained_model.value     # Trained model from train_task
    deploy_task.inputs.images = join(input_location, 'deploy-images', catid)
    deploy_task.inputs.geojson = join(input_location, 'target-geojsons', catid)
    deploy_task.inputs.classes = 'No Buildings, Buildings'
    deploy_task.inputs.bit_depth = '8'
    deploy_task.inputs.min_side_dim = '0'    
    deploy_task.inputs.max_side_dim = '245'
    deploy_tasks[catid] = deploy_task

# define workflow
workflow = gbdx.Workflow([train_task] + deploy_tasks.values())

# set output location to platform-stories/trial-runs/random_str within your bucket/prefix
random_str = str(uuid.uuid4())
output_location = join('platform-stories/trial-runs', random_str)

# save workflow outputs
workflow.savedata(train_task.outputs.trained_model, join(output_location, 'trained_model'))

# save output from each deploy_task
for catid, task in deploy_tasks.iteritems():
    workflow.savedata(task.outputs.classified_geojson, join(output_location, catid, 'classified_geojson'))

In [None]:
# execute workflow
workflow.execute()

In [None]:
workflow.status

In [None]:
# Check under this location for the output
print output_location

In [None]:
# Create slippy map
from ipyleaflet import Map, TileLayer

m = Map(center=[13.48,12.69], zoom=12)

# These are the IDAHO TMS urls for the pansharpened image and for the building heat map
# We've saved them in IDAHO format in a custom bucket and are serving them through the TMS service
url_image = 'http://idaho.geobigdata.io/v1/tile/platform-stories/969d2051-e456-4fa7-8d53-b5769287e069/{z}/{x}/{y}?bands=0,1,2&token=' + gbdx.gbdx_connection.access_token
url_heatmap = 'http://idaho.geobigdata.io/v1/tile/platform-stories/f685a0a4-2425-4e3c-809e-c24c93813b31/{z}/{x}/{y}?bands=0,1,2&token=' + gbdx.gbdx_connection.access_token

m.add_layer(TileLayer(url=url_image))
m.add_layer(TileLayer(url=url_heatmap, opacity=0.5))
    
# launch map    
m