# [SpaceNet on AWS](https://aws.amazon.com/public-datasets/spacenet/)
[SpaceNet](https://spacenetchallenge.github.io/) is a corpus of commercial satellite imagery and labeled training data being made available at no cost to the public to foster innovation in the development of computer vision algorithms to automatically extract information from remote sensing data.

The current SpaceNet corpus includes thousands of square kilometers of high resolution imagery collected from DigitalGlobe’s commercial satellites which includes 8-band multispectral data. This dataset is being made public to advance the development of algorithms to automatically extract geometric features such as roads, building footprints, and points of interest using satellite imagery. The currently available Areas of Interest (AOI) are Rio De Janeiro, Paris, Las Vegas, Shanghai and Khartoum. Please see the data catalog below for more information.

In [1]:
from geopyspark.geopycontext import GeoPyContext
geopysc = GeoPyContext(appName="SpaceNet")

In [2]:
from shapely.geometry import Polygon
from functools import partial
import pyproj
from shapely.ops import transform
import os

# data local bc s3 request payer and rasterio don't play well
# (vsis3/... DNE)
def get_catalog_uri():
    cwd = os.getcwd()
    data_dir = cwd + "/"
    bucket = "spacenet-dataset/"
    aoi = "AOI_1_Rio/"
    catalog_uri = data_dir + bucket + aoi
    return catalog_uri

def bounds_to_shape(bounds):
    return Polygon([(bounds.left, bounds.top), 
                    (bounds.right, bounds.top),
                    (bounds.right, bounds.bottom),
                    (bounds.left, bounds.bottom)])

def reprojected(shape):
    project = partial(
        pyproj.transform,
        pyproj.Proj(init='epsg:32633'),
        pyproj.Proj(init='epsg:4326'))
    return transform(project, shape)

In [3]:
import rasterio
from shapely.ops import cascaded_union
from geonotebook.wrappers.vector import GeoJsonData
from shapely.geometry import mapping
from geopyspark.geotrellis.catalog import _construct_catalog, _mapped_cached

def read_bounding_boxes_from_raster_data(prefix):
    catalog_uri = get_catalog_uri()
    def get_bounds(prefix, key):
        src = rasterio.open('%s%s%s' % (catalog_uri, prefix, key))
        try:
            bounds = bounds_to_shape(src.bounds)
#             ll_bounds = reprojected(bounds)
            return bounds
        finally:
            src.close()

    return [get_bounds(prefix, key) 
            for key in os.listdir(catalog_uri + prefix) 
            if key.endswith('.tif')]

def union_polygons_via_shapely(polygons):
    return cascaded_union(polygons)

def center_map(polygon_union):
    M.set_center(polygon_union.centroid.x, polygon_union.centroid.y, 11)

def save_polygon_union_as_geojson(polygon_union):
    vd = GeoJsonData(mapping(polygon_union))
    return vd

def add_vectordata_layer(vd, prefix, color):
    l = M.add_layer(vd, name=prefix, colors=[color])
    
def get_footprint_of_imagery(prefix, color):
    bounding_boxes = read_bounding_boxes_from_raster_data(prefix)
    polygon_union = union_polygons_via_shapely(bounding_boxes)
    center_map(polygon_union)
    vd = save_polygon_union_as_geojson(polygon_union)
    add_vectordata_layer(vd, prefix, color)

In [4]:
RED = 0xFF0000
GREEN = 0x00FF00
# Compressed 3band 200m x 200m tiles with associated building foot print labels
processed_prefix = "processedData/processedBuildingLabels/3band/"
# 3band (RGB) Raster Mosaic for Rio De Jenairo area (2784 sq KM) collected by WorldView-2
src_prefix = "srcData/rasterData/3-Band/"

In [5]:
get_footprint_of_imagery(processed_prefix, RED)

In [6]:
get_footprint_of_imagery(src_prefix, GREEN)