In [9]:
import glob
from osmread import parse_file, Way,Node,Relation
import shapely.geometry as shpgeo
import overpy
from shapely.ops import linemerge
import time
import geopandas as gp

In [2]:
api = overpy.Overpass()
shpgeo.Polygon.centroid

<property at 0x4429d68>

In [3]:
def lonlat_in_way(way):
    coors = []
    for node in way.nodes:
        coors.append((node.lon, node.lat))
    return coors
def is_closed_way(way):
    return way.nodes[0] == way.nodes[-1]

def get_relation(relation,need_api=True):
    while True:
        try:
            if need_api:
                relation = api.query("""
                relation(%s);
                (._;>;);
                out;
                """ % relation.id).relations[0]
            break
        except Exception as e:
            print e
            time.sleep(10)
    return relation

def way2line(way,need_api=True):
    while True:
        try:
            if need_api:
                way = api.query("""
                way(%s);
                (._;>;);
                out;
                """% way.id).ways[0]
            break
        except Exception as e:
            print e
            time.sleep(10)
    coors = lonlat_in_way(way)
    return shpgeo.LineString(coors)

def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    from math import radians, cos, sin, asin, sqrt
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    km = 6367 * c
    m = km *1000
    return m
       
def handle_poly(name_pattern,poly):
    ext_coords = poly.exterior.coords
    centr = poly.centroid
    radius = get_max_dis_from_center_to_ext(centr, ext_coords)
    center_str = '%.6f,%.6f' %(centr.coords[0][0], centr.coords[0][1])
    return '%s\t%s\t%s\t%s\n' %(name_pattern, center_str, radius, list(poly.exterior.coords))
    

In [4]:
def remove_equal_shpobj(objs):
    import rtree
    size = len(objs)
    equal_pair = []
    keep = []
    exclude_idx = set()

    tree_idx = rtree.index.Index()
    objs_bounds = [o.bounds for o in objs]
    for i in xrange(size):
        try:
            tree_idx.insert(i, objs_bounds[i])
        except Exception as e:
            print i, objs_bounds[i], objs[i]
            raise e

    for i in xrange(size):
        if i in exclude_idx:
            continue
        keep.append(i)
        js = tree_idx.intersection(objs[i].bounds)
        for j in js:
            if i!=j and objs[i].equals(objs[j]):
                equal_pair.append((i,int(j)))
                exclude_idx.add(j)

    return keep, equal_pair
def remove_equal_shpobj(objs):
    import rtree
    size = len(objs)
    equal_pair = []
    keep = []
    exclude_idx = set()

    tree_idx = rtree.index.Index()
    objs_bounds = [o.bounds for o in objs]
    for i in xrange(size):
        try:
            tree_idx.insert(i, objs_bounds[i])
        except Exception as e:
            print i, objs_bounds[i], objs[i]
            raise e

    for i in xrange(size):
        if i in exclude_idx:
            continue
        keep.append(i)
        js = tree_idx.intersection(objs[i].bounds)
        for j in js:
            if i!=j and objs[i].equals(objs[j]):
                equal_pair.append((i,int(j)))
                exclude_idx.add(j)

    return keep, equal_pair
def rltn2dictShp(relation, sub_rltn=False):
    nodes, ways, sub_nodes, sub_ways = [], [], [], []
    for m in relation.members:
        obj = m.resolve()
        if isinstance(obj, overpy.Node):
            nodes.append(obj)
        elif isinstance(obj, overpy.Way):
            ways.append(obj)
        elif isinstance(obj, overpy.Relation):
            r_nodes, r_ways = rltn2dictShp(osm_container, obj, True)
            sub_nodes.extend(r_nodes)
            sub_ways.extend(r_ways)
    if sub_rltn:
        return nodes, ways
    nodes.extend([node for node in sub_nodes])
    ways.extend([way for way in sub_ways])

    points = [node2pt(node) for node in nodes]
    keep_pts_idx, _ = remove_equal_shpobj(points)
    points = [p for cnt, p in enumerate(points) if cnt in keep_pts_idx]

    lines = [way2line(way,False) for way in ways]
    keep_lines_idx, _ = remove_equal_shpobj(lines)
    lines = [l for cnt, l in enumerate(lines) if cnt in keep_lines_idx]

    dict_shp= {'Point': points, 'LineString': [], 'Polygon': []}
    if lines:
        merged = linemerge(lines)
        if merged.type == 'LineString':
            merged = [merged]
        else:
            merged = list(merged)
        for ln in merged:
            if ln.is_ring:
                dict_shp['Polygon'].append(shpgeo.Polygon(ln))
            else:
                dict_shp['LineString'].append(ln)
    return dict_shp
def merge_within_by_list_shp(list_shp):
    import geopandas as gp
    import pandas as pd
    from other_utils import find_tree
    gpdf = gp.GeoDataFrame(list_shp,columns=['geometry'])
    sjoin = gp.tools.sjoin(gpdf,gpdf,op='within')
    messy_tree_df = pd.DataFrame(zip(sjoin.index.values, sjoin.index_right.values), columns=['node','parent'])
    clean_tree_df = find_tree(messy_tree_df)
    top_level_shp_idx = clean_tree_df[clean_tree_df.parent==-1].node.values
    return gpdf[gpdf.index.isin(top_level_shp_idx)].values

def rltn2mergedListShp( relation):
    shpcltn = rltn2dictShp(relation)
    list_shp = []
    for l in shpcltn.values():
        list_shp += l
    merge_list_shp = merge_within_by_list_shp(list_shp)
    return merge_list_shp


def rltn2mergedFlattenListShp(relation):
    merge_list_shp = rltn2mergedListShp(relation)
    flat_shpcltn = []
    for shpobjs in merge_list_shp:
        flat_shpcltn.extend(shpobjs)
    return flat_shpcltn

def rltn2convexhull(relation):
    return shpgeo.MultiPolygon(rltn2mergedFlattenListShp(relation)).convex_hull

In [5]:
import sys, os
sys.path.insert(0, os.path.abspath('..'))
sys.path.insert(0, os.path.abspath('../utils'))


In [6]:

def get_max_dis_from_center_to_ext(centr,ext_coords):
    lon1, lat1 = centr
    return max([haversine(lon1,lat1, lon2,lat2) for lon2, lat2 in ext_coords])

def get_cntr_radius(poly):
    cntr = poly.centroid.coords[0]
    ext_coords = poly.exterior.coords
    radius = get_max_dis_from_center_to_ext(cntr, ext_coords)
    return cntr, radius, ext_coords

def osm2polys(osm_data):
    if isinstance(osm_data, Way):
        ln = way2line(osm_data)
        poly = shpgeo.Polygon(ln)
        return [poly]
    if isinstance(osm_data, Relation):
        r = get_relation(osm_data)
        return rltn2mergedFlattenListShp(r)

def osms2flattenPolys(data_file_dir, geojson_file):
    place_polys = []
    for i, file_path in enumerate(glob.glob(data_file_dir+'/*')):
#         if i<=0:
#             continue
#         if i >5:
#             break
        _, place = file_path.split('\\')
        place = place[:-4].replace(' ','_')
        osm_data = parse_file(file_path)
        osm_data = list(osm_data)
        assert len(osm_data)==1, '{}th {}: len!=0'.format(i, file_path)
        print i, place, len(list(osm_data))
        osm_data = osm_data[0]
        polys = osm2polys(osm_data)
        for cnt, poly in enumerate(polys):
            cntr, radius, ext_coords = get_cntr_radius(poly)
            place_polys.append(['{}##{}'.format(place,cnt), cntr, radius+1000, poly])
        
    print len(place_polys)
    return place_polys


In [7]:

OSM_XML_DIR = u'../data/OSM_national_park@US/'
PLACE_POLYS_NP = '../data/place_polys_np.geojson'

In [11]:
# place_polys = osms2flattenPolys(OSM_XML_DIR,PLACE_POLYS_NP)
# gpdf = gp.GeoDataFrame(place_polys,columns=['place##cnt', 'cntr','radius+1km','geometry'])
gpdf = gp.read_file(PLACE_POLYS_NP)

In [12]:
gpdf[gpdf['radius+1km']>32000]

Unnamed: 0,cntr,geometry,id,place##cnt,radius+1km
52,"(-75.24275410620281, 38.06367569552205)","POLYGON ((-75.38908549999999 37.8967133, -75.3...",52,Assateague_Island_NS_(ASIS)##0,32993.988803
54,"(-102.49910933011147, 43.70423794640537)","POLYGON ((-102.793833 43.6877596, -102.7938206...",54,Badlands_NP_(BADL)##0,50848.913056
59,"(-164.40373530978803, 65.95458549197815)","POLYGON ((-167.5049829 65.829255, -167.511852 ...",59,Bering_Land_Bridge_NPRES_(BELA)##0,144080.7786
60,"(-103.22965673900961, 29.29181709350466)","POLYGON ((-103.7811917 29.2427238, -103.7812 2...",60,Big_Bend_NP_(BIBE)##0,55047.881177
61,"(-81.08132634216997, 25.97024338948424)","POLYGON ((-81.3832 25.842689, -81.370538800000...",61,Big_Cypress_NPRES_(BICY)##0,46326.310996
63,"(-84.66519057791041, 36.5327669596277)","POLYGON ((-84.78574980000001 36.6015051, -84.7...",63,Big_South_Fork_NRRA_(BISO)##0,32542.588715
64,"(-108.20577735631353, 45.03332513676874)","POLYGON ((-108.296018 45.132289, -108.290915 4...",64,Bighorn_Canyon_NRA##0,39916.802808
69,"(-81.66096563928171, 36.128231674785546)","POLYGON ((-83.2862768 35.5178981, -83.2851898 ...",69,Blue_Ridge_PKWY##0,168168.707947
70,"(-79.76253427187021, 37.35793617868595)","POLYGON ((-79.8720154 37.249429, -79.8733582 3...",70,Blue_Ridge_PKWY##1,42196.275031
71,"(-79.16918257630627, 37.78645835963287)","POLYGON ((-79.5000254 37.5322428, -79.4996397 ...",71,Blue_Ridge_PKWY##2,41560.53887


In [27]:
ddir = '../data/social_media_raw/flickr/np/'
crawled_places = set([f.rsplit('_',2)[0].split('\\')[1] for f in glob.glob(ddir+'*.*')])
len(crawled_places)
# print crawled_places

52

In [172]:

gpdf.cntr = gpdf.cntr.apply(str)
with open(PLACE_POLYS_NP,'w') as f:
    f.write(gpdf.to_json())

In [160]:
gpdf[gpdf['place##cnt'].str.contains('Blue_Ridge_PKWY')]

Unnamed: 0,place##cnt,cntr,radius+1km,geometry,id
69,Blue_Ridge_PKWY##0,"(-81.6609656393, 36.1282316748)",168168.707947,POLYGON ((-83.38083338844375 35.55018165724734...,1
70,Blue_Ridge_PKWY##1,"(-79.7625342719, 37.3579361787)",42196.275031,"POLYGON ((-80.2247187355749 37.13233249453511,...",1
71,Blue_Ridge_PKWY##2,"(-79.1691825763, 37.7864583596)",41560.53887,POLYGON ((-79.60002433837357 37.53270358645526...,1


In [158]:
import geopandas as gp
gpdf.geometry = gpdf.buffer(0.1)
gpdf['id'] = 1
with open('nationalpark_polygons.js','w') as f:
    f.write('var np = %s;'%gpdf.to_json())

In [5]:
with open('nationalpark_boundbox_osm.tsv','w') as f:
    for i, poly in enumerate(polys):
        name_pattern = national_parks[i]
        bbox = poly.bounds
        bbox_str = ','.join([str(x)for x in bbox])  
        center_str = '%.6f,%.6f' %(poly.centroid.coords[0][0], poly.centroid.coords[0][1])
        f.write('%s\t%s\t%s\n' %(name_pattern, center_str, bbox_str))
        

unicode

In [178]:
type(place_gpdf.cntr.iloc[0])

tuple

In [21]:
place_gpdf = gp.read_file(PLACE_POLYS_NP)
radius = place_gpdf['radius+1km'].apply(lambda x: '{}km'.format(int(x/1000)+1)).values
cntr = place_gpdf.cntr.apply(eval).apply(lambda x: (x[1],x[0]))
place = place_gpdf['place##cnt'].values
PLACES = zip(place, cntr, radius)

In [28]:
PLACES
PLACES[0][0] in crawled_places

True

In [31]:
import datetime
[datetime.datetime(2099,1,1)]*10

[datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0),
 datetime.datetime(2099, 1, 1, 0, 0)]