In [89]:
from uszipcode import SearchEngine
import networkx as nx
import pickle
import folium as f

search = SearchEngine(simple_zipcode=False, db_file_dir="./zip_code_cache")

In [90]:
g = nx.read_gpickle("01_cambridge.gpickle")
nodes_data_subset = g.nodes().data()

## Populate dictionary of zip codes and node_ids.

### `zip_code_dict` and `nodes_by_zip` are inverses of each other

In [91]:
count = 0
with open('zip_code_dict.pickle', 'rb') as handle:
    zip_code_dict = pickle.load(handle)

for node_data in nodes_data_subset:
    count += 1
    node_id = node_data[1]["id"]
    lon,lat = node_data[1]['lon'], node_data[1]['lat']
    if node_id not in zip_code_dict:
        print("missing zip codes being added! This might take a while...")
        zip_code_dict[node_id] = search.by_coordinates(lat, lon, returns=1)[0].zipcode
    if count % 100 == 0: 
        with open('zip_code_dict.pickle', 'wb') as handle:
            pickle.dump(zip_code_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)        
            
with open('zip_code_dict.pickle', 'wb') as handle:
    pickle.dump(zip_code_dict, handle, protocol=pickle.HIGHEST_PROTOCOL)
    

# Generate reverse dictionary——nodes by zip instaed of zips by node
nodes_by_zip = {}
for node_id, zip_code in zip_code_dict.items():
    nodes_by_zip.setdefault(zip_code, []).append(node_id)


## Use `area` library to find Polygon's geographic area

### First, simple PoC with Wyoming

In [92]:
from area import area
import math

wyoming = {'type':'Polygon','coordinates':[[[-111.046768, 40.997963], 
                                            [-111.055196, 45.001320], 
                                            [-104.057691, 44.997377],
                                            [-104.053251, 41.001410],
                                            [-111.046768, 40.997963]]]}

area_km2 = area(wyoming)

area_km2 = area_km2 / 1e+6
# print ('area m2: ' + str(math.floor(area_m2)))
print ('Estimated area of Wyoming: ' + str(math.floor(area_km2)) + 'km^2')
print("Error:                     " + str(253600 - math.floor(area_km2)) + "km^2" )

Estimated area of Wyoming: 253529km^2
Error:                     71km^2


### Now running on entire Cambridge dataset. (Note—outliers skew this)

In [93]:
from scipy.spatial import ConvexHull

lat_lons = [[x[1]["lat"], x[1]["lon"]] for x in g.nodes().data()]
hull = ConvexHull(lat_lons)
hull_edges = [[hull.points[ix][1], hull.points[ix][0]] for ix in hull.vertices]

camb = {'type': 'Polygon', 'coordinates': [hull_edges]}

print("Area of convex map: " + (str (area(camb) / 1000000)) + "km^2")


Area of convex map: 21.747622196450717km^2


In [109]:
from scipy.spatial import ConvexHull

ZIP_CODE = '02139'

lat_lons = [[x[1]["lat"], x[1]["lon"]] for x in g.nodes().data() if x[0] in nodes_by_zip[ZIP_CODE]]
hull = ConvexHull(lat_lons)
hull_edges = [[hull.points[ix][1], hull.points[ix][0]] for ix in hull.vertices]
polygon = {'type': 'Polygon', 'coordinates': [hull_edges]}

print("Area of convex map:        " + (str (area(polygon) / 1000000)) + "km^2")
print("Actual land area of " + ZIP_CODE + ": " + str(search.by_zipcode(ZIP_CODE).land_area_in_sqmi * 2.58999) + "km^2")


m = f.Map(location = [42.3611108,-71.1079923], zoom_start=16)
nodes_data_subset = g.nodes().data()


# Show graph with hull edges
for node_data in nodes_data_subset:
    node_id = node_data[1]["id"]
    if not node_data[1].get('lon'):
        import pdb; pdb.set_trace()
    lon,lat = node_data[1]['lon'], node_data[1]['lat']  
    if [lon, lat] in hull_edges:
        m.add_child(f.Marker(location=[lat,lon], color="red", radius=1))
    
    m.add_child(f.CircleMarker(location=[lat,lon], color="orange", radius=.5))

m

Area of convex map:        3.1954485686585246km^2
Actual land area of 02139: 4.0403844km^2


## Area-producing cruft

In [83]:
import pyproj    
import shapely
import shapely.ops as ops
from shapely.geometry.polygon import Polygon
from functools import partial


# geom = Polygon([[40.997963, -111.046768], 
#                 [45.001320, -111.055196], 
#                 [44.997377, -104.057691],
#                 [41.001410, -104.053251],
#                 [40.997963, -111.046768]])


geom = Polygon([[-111.046768, 40.997963], 
                [-111.055196, 45.001320], 
                [-104.057691, 44.997377],
                [-104.053251, 41.001410],
                [-111.046768, 40.997963]])
# geom = Polygon([[40,-111],
#                 [2,3],
#                 [3,5],
#                 [40,-111]])
geom_area = ops.transform(
    partial(
        pyproj.transform,
        pyproj.Proj(init='EPSG:4326'),
        pyproj.Proj(
            proj='aea',
            lat_1=geom.bounds[1],
            lat_2=geom.bounds[3])),
    geom)

# Print the area in m^2
print (math.floor(geom_area.area / 1000000) )

253115


  return _prepare_from_string(" ".join(pjargs))
  projstring = _prepare_from_string(" ".join((projstring, projkwargs)))
