In [1]:
import os
os.environ["GBDX_THREADS"] = "64"

In [2]:
import os.path
import re
from itertools import groupby, chain, cycle, product as cartesian_prod
from collections import Counter

from shapely.geometry import shape, box, Polygon, MultiPolygon
import numpy as np
from tqdm import tqdm_notebook as tqdm
from rtree.index import Index as SpatialIndex

from gbdxtools.vectors import Vectors
from gbdxtools import TmsImage

MAPS_API_KEY = "pk.eyJ1IjoiZGlnaXRhbGdsb2JlIiwiYSI6ImNqMXkyZXZsODAwYWszMmsyM3lvZHBzMWsifQ.EYqlvq6QWczWsvrEDDTf7g"

boundary = box(-97.803125,30.230669,-97.667427,30.306355) # Austin, TX
boundary = box(-74.047422,40.686561,-73.942108,40.730804) # Manhattan
tms = TmsImage(access_token=MAPS_API_KEY, zoom=18)
tms_region = tms[boundary]

In [3]:
from gbdxtools import Interface
gbdx = Interface()

osm_data = gbdx.vectors.query(shape(tms_region).wkt, query="item_type:* AND ingest_source:OSM", index="vector-osm-*")
osm_index = SpatialIndex()
for idx, rec in tqdm(enumerate(osm_data), total=len(osm_data)):
    osm_index.add(idx, shape(rec["geometry"]).bounds, rec)




In [4]:
item_types = set(chain(*[rec["properties"]["item_type"] for rec in osm_data]))

In [5]:
with open("nominatim.txt") as f:
    categories = [[s.strip() for s in re.split("\|+", l) if s.strip() != ""] for l in f.readlines()]
clean_categories = [cat[:3] for cat in categories if cat[-1] == "N" and cat[-2] == "-"]
keyfn = lambda rec: rec[1]
cats = {key: {(v[1], v[2]):v[0] for v in val} for key, val in groupby(sorted(clean_categories, key=keyfn), key=keyfn)}
# selected relevant 
groups = ["building", "natural", "man_made", "leisure", "historic", "aeroway", "landuse", "waterway", "highway"]
basis = {v:k for k,v in enumerate(list(sorted(reduce(lambda s,e: s + cats[e].values(), groups, []))))}
lut = {}
for grp in groups:
    lut.update(cats[grp])
    

In [6]:
def extract_nominatim(rec):
    return [lut[key] for key in rec["properties"]["attributes"].items() if key in lut]

def lookup_basis_vector(region):
    g = shape(region)
    observed_types =  Counter(chain(*[extract_nominatim(rec.object) 
                                      for rec in osm_index.intersection(g.bounds, objects=True)
                                      if shape(rec.object["geometry"]).intersects(g)]))
    return np.asarray([observed_types.get(key, 0) for key in basis]).astype(np.uint32)

In [7]:
from resnet import ResnetBuilder

Using TensorFlow backend.


In [8]:
model = ResnetBuilder.build_resnet_34((3, 301, 301), len(basis))
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

In [9]:
len(osm_data)

59266

In [10]:
# TODO: lookup from model
row_chunk = 301
col_chunk = 301

# NOTE: Dropping far boundary, should fix
row_lims = xrange(row_chunk, tms_region.shape[1], row_chunk)
col_lims = xrange(col_chunk, tms_region.shape[2], col_chunk)

training_data = []
for maxy, maxx in tqdm(cartesian_prod(row_lims, col_lims),
                       total=len(row_lims)*len(col_lims)):
    region = tms_region[:, (maxy-row_chunk):maxy, (maxx-col_chunk):maxx]
    training_data.append((region, lookup_basis_vector(region)))




In [11]:
def training_gen(data, batch_size=32):
    while True:
        input_batch = []
        output_batch = []
        source = cycle(data)
        for i in xrange(batch_size):
            img, cat = next(source)
            input_batch.append(np.rollaxis(img.read(), 0, 3))
            output_batch.append(cat)
        input_batch = np.stack(input_batch)
        output_batch = np.stack(output_batch)
        yield (input_batch, output_batch)

In [12]:
h = model.fit_generator(training_gen(training_data), int(len(training_data)/32), epochs=4)

Epoch 1/4
Epoch 2/4
Epoch 3/4
Epoch 4/4


In [15]:
test_data = next(training_gen(training_data))

In [13]:
result = model.evaluate_generator(training_gen(training_data), 10)

In [41]:
result

[32.633122253417966, 0.9375]

In [16]:
prediction = model.predict_on_batch(test_data[0])

In [17]:
import matplotlib.pyplot as plt
import ipywidgets as widgets

@widgets.interact(idx=widgets.IntSlider(min=0, max=31))
def plot_compare(idx):
    plt.figure(figsize=(12, 6))
    plt.subplot(1, 2, 1)
    plt.plot(prediction[idx])
    plt.subplot(1, 2, 2)
    plt.plot(test_data[1][idx])
    plt.show()

In [42]:
prediction[0]

array([  1.67201677e-06,   1.31542220e-06,   1.83119573e-06,
         3.19966739e-06,   1.46369973e-06,   1.90543733e-06,
         1.06112850e-06,   1.38313862e-06,   2.08316169e-06,
         2.08774190e-06,   4.66649226e-07,   1.04540777e-06,
         2.42854867e-06,   8.10807762e-07,   4.56580371e-01,
         4.07496125e-01,   4.50640619e-06,   2.26137786e-06,
         2.20196807e-06,   7.56107738e-07,   1.25965755e-06,
         1.03619561e-06,   1.83473026e-06,   1.22044082e-06,
         3.15752663e-06,   8.03252192e-07,   1.30945932e-06,
         8.44791145e-07,   3.16882074e-06,   3.53477458e-06,
         5.56500027e-07,   2.43677323e-06,   6.47734396e-07,
         2.17118554e-06,   1.38420205e-06,   3.19855712e-06,
         9.74881686e-07,   1.27624651e-06,   2.07351513e-06,
         1.08599727e-06,   1.06273069e-06,   1.46200193e-06,
         5.88018338e-07,   8.55151313e-07,   3.27587668e-06,
         1.20928030e-06,   1.68013526e-06,   1.34460947e-06,
         1.14904424e-06,

In [49]:
osm_data[7]

{u'geometry': {u'coordinates': [[[-97.721041, 30.276972],
    [-97.721018, 30.27686],
    [-97.721108, 30.276845],
    [-97.721147, 30.277026],
    [-97.721056, 30.27704],
    [-97.721041, 30.276972]]],
  u'type': u'Polygon'},
 u'properties': {u'access': {u'groups': [u'_ALL_'], u'users': [u'_ALL_']},
  u'attributes': {u'_osm_changeset': u'35519698',
   u'_osm_copyright': u'\xa9 OpenStreetMap contributors',
   u'_osm_license': u'http://www.opendatacommons.org/licenses/odbl',
   u'_osm_user_id': u'3341321',
   u'_osm_user_name': u'jseppi_atxbuildings',
   u'_osm_version': u'1',
   u'building': u'yes',
   u'height': u'4.6'},
  u'format': u'OSM',
  u'id': u'OSM-way-382131008',
  u'ingest_attributes': {u'osm_database': u'osm', u'osm_host': u'173.50.1.53'},
  u'ingest_date': u'2016-02-15T07:03:24.850Z',
  u'ingest_source': u'OSM',
  u'item_date': u'2015-11-22T23:13:53.000Z',
  u'item_type': [u'Building'],
  u'name': None,
  u'original_crs': u'EPSG:4326',
  u'source': None,
  u'text': u'yes |

In [50]:
basis

['Airport',
 'Allotment',
 'Apartment Block',
 'Archaeological Site',
 'Basin',
 'Battlefield',
 'Bay',
 'Beach',
 'Beach Resort',
 'Boat Ramp',
 'Boatyard',
 'Boundary Stone',
 'Bridleway',
 'Brownfield Land',
 'Building',
 'Building',
 'Building Block',
 'Building Entrance',
 'Bunker',
 'Bus Stop',
 'Byway',
 'Canal',
 'Cape',
 'Castle',
 'Cave Entrance',
 'Cemetery',
 'Chapel',
 'Church',
 'City Hall',
 'Cliff',
 'Coastline',
 'Commercial Area',
 'Commercial Building',
 'Common Land',
 'Conservation',
 'Construction',
 'Cycle Path',
 'Dam',
 'Derelict Canal',
 'Distance Marker',
 'Ditch',
 'Dock',
 'Dormitory',
 'Drain',
 'Emergency Access Point',
 'Faculty Building',
 'Farm',
 'Farm Building',
 'Farmland',
 'Farmyard',
 'Fell',
 'Fishing Area',
 'Flats',
 'Footpath',
 'Ford',
 'Forest',
 'Garage',
 'Garden',
 'Gate',
 'Glacier',
 'Golf Course',
 'Grass',
 'Greenfield Land',
 'Guided Bus Lane',
 'Hackerspace',
 'Hall',
 'Heath',
 'Highway under Construction',
 'Hospital Building',
 