In [2]:
import geopandas as gp
import pandas as pd
import numpy as np
from itertools import chain
dc_segs = gp.read_file('dcdata\Street_Segments.geojson')
pts = gp.read_file('dcdata\Parking_Violations_in_January_2016.geojson')

In [6]:
def haversine(lon1, lat1, lon2, lat2):
    """
    Calculate the great circle distance between two points 
    on the earth (specified in decimal degrees)
    """
    from math import radians, cos, sin, asin, sqrt
    # convert decimal degrees to radians 
    lon1, lat1, lon2, lat2 = map(radians, [lon1, lat1, lon2, lat2])
    # haversine formula 
    dlon = lon2 - lon1 
    dlat = lat2 - lat1 
    a = sin(dlat/2)**2 + cos(lat1) * cos(lat2) * sin(dlon/2)**2
    c = 2 * asin(sqrt(a)) 
    km = 6367 * c
    m = km *1000
    return m   
def ptfromln(pt, ln):
    n_pt = ln.interpolate(ln.project(pt))
    lon1, lat1 = n_pt.coords[0]
    lon2, lat2 = pt.coords[0]
    return haversine(lon1, lat1, lon2, lat2)
def pts2seg(pts, gp_segs, buffer_dis=50, near_dis_thres=5):
    pts_crs,gp_segs_crs = pts.to_crs(epsg=3559), dc_segs.to_crs(epsg=3559)
    pts_crs_bfr = pts_crs.copy()

    pts_crs_bfr.geometry = pts_crs_bfr.buffer(near_dis_thres*1.1)
    close_jn = gp.tools.sjoin(pts_crs_bfr, gp_segs_crs)[['OBJECTID_left','STREETSEGID_right']]
    handledid = set(pd.unique(close_jn.OBJECTID_left))
    mask = (~pts_crs_bfr.OBJECTID.isin(handledid))

    far_jns = []
    while pts_crs_bfr[mask].shape[0]!=0:
        pts_crs_bfr.loc[mask, 'geometry'] = pts_crs_bfr[mask].buffer(buffer_dis)
        jn = gp.tools.sjoin(pts_crs_bfr[mask], gp_segs_crs)[['OBJECTID_left','STREETSEGID_right']]
        far_jns.append(jn)
        handledid |= set(pd.unique(jn.OBJECTID_left))
        mask = (~pts_crs_bfr.OBJECTID.isin(handledid))
        
    far_jns = pd.concat(far_jns)
    mr = pd.merge(dc_segs[['geometry','STREETSEGID']],far_jns , left_on='STREETSEGID', right_on='STREETSEGID_right')
    mr = pd.merge(pts[['OBJECTID','geometry','STREETSEGID']],mr, left_on='OBJECTID', right_on='OBJECTID_left')
    mr['dis']=mr.apply(lambda x: ptfromln(x.geometry_x, x.geometry_y),axis=1)
    result = close_jn.groupby('OBJECTID_left')['STREETSEGID_right'].apply(list).append(mr.groupby('OBJECTID').apply(lambda x: [x.ix[x.dis.idxmin()].STREETSEGID_y]))
    return pd.DataFrame(result, columns=['segid'])


In [54]:
pts_files = [
    'dcdata/Moving_Violations_in_January_2016.geojson',
    'dcdata/Moving_Violations_in_February_2016.geojson',
    'dcdata/Moving_Violations_in_March_2016.geojson',
    'dcdata/Moving_Violations_in_April_2016.geojson',
    'dcdata/Moving_Violations_in_May_2016.geojson',
    'dcdata/Parking_Violations_in_January_2016.geojson',
    'dcdata/Parking_Violations_in_February_2016.geojson',
    'dcdata/Parking_Violations_in_March_2016.geojson',
    'dcdata/Parking_Violations_in_April_2016.geojson',
    'dcdata/Parking_Violations_in_May_2016.geojson',
]
pts_col_name = [
    'mvjan',
    'mvfeb',
    'mvmar',
    'mvapr',
    'mvmay',
    'prjan',
    'prfeb',
    'prmar',
    'prapr',
    'prmay',
]

In [85]:
dc_segs_statistic = dc_segs[['OBJECTID','STREETSEGID','SHAPE_Length','geometry']]
for i, pts_file in enumerate(pts_files):
    print i, pts_file
    pts = gp.read_file(pts_file)
    pts_col = pts_col_name[i]
    pts_seg = pts2seg(pts, dc_segs)
    seg_counts = pd.DataFrame(pd.Series(list(chain.from_iterable(pts_seg.values.ravel()))).value_counts(),columns=[pts_col])
    dc_segs_statistic = dc_segs_statistic.merge(seg_counts, left_on = 'STREETSEGID', right_index=True, how='left')

0 dcdata/Moving_Violations_in_January_2016.geojson
(4877, 19)
(794, 19)
(1, 19)
(0, 19)
1 dcdata/Moving_Violations_in_February_2016.geojson
(4459, 19)
(842, 19)
(1, 19)
(0, 19)
2 dcdata/Moving_Violations_in_March_2016.geojson
(4562, 19)
(893, 19)
(0, 19)
3 dcdata/Moving_Violations_in_April_2016.geojson
(4434, 19)
(1080, 19)
(0, 19)
4 dcdata/Moving_Violations_in_May_2016.geojson
(3127, 19)
(612, 19)
(0, 19)
5 dcdata/Parking_Violations_in_January_2016.geojson
(374, 18)
(84, 18)
(36, 18)
(2, 18)
(0, 18)
6 dcdata/Parking_Violations_in_February_2016.geojson
(436, 18)
(95, 18)
(50, 18)
(0, 18)
7 dcdata/Parking_Violations_in_March_2016.geojson
(577, 18)
(66, 18)
(25, 18)
(2, 18)
(0, 18)
8 dcdata/Parking_Violations_in_April_2016.geojson
(1071, 18)
(178, 18)
(52, 18)
(0, 18)
9 dcdata/Parking_Violations_in_May_2016.geojson
(425, 18)
(67, 18)
(31, 18)
(0, 18)


In [134]:
mvarray = ['mvjan','mvfeb','mvmar','mvapr','mvmay']
prarray = ['prjan','prfeb','prmar','prapr','prmay']
# mvarray = ['mvjan']
# prarray = ['prjan']
dc_segs_statistic['mv_vio'] = dc_segs_statistic[mvarray].sum(axis=1)
dc_segs_statistic['pr_vio'] = dc_segs_statistic[prarray].sum(axis=1)
dc_segs_statistic_drop = dc_segs_statistic.drop(pts_col_name,axis=1)
dc_segs_statistic_drop['mv_vio/len'] = dc_segs_statistic_drop.mv_vio/dc_segs_statistic_drop.SHAPE_Length
dc_segs_statistic_drop['pr_vio/len'] = dc_segs_statistic_drop.pr_vio/dc_segs_statistic_drop.SHAPE_Length

print dc_segs_statistic_drop.columns
def normalize(col_name, df):
    col_name_norm = col_name + '_norm'
    df[col_name_norm] = df[col_name]+1
    df[col_name_norm] = np.log2(df[col_name_norm])
    df[col_name_norm] = df[col_name_norm]/df[col_name_norm].max()
normalize('mv_vio',dc_segs_statistic_drop)
normalize('pr_vio',dc_segs_statistic_drop)
normalize('mv_vio/len',dc_segs_statistic_drop)
normalize('pr_vio/len',dc_segs_statistic_drop)


Index([    u'OBJECTID',  u'STREETSEGID', u'SHAPE_Length',     u'geometry',
             u'mv_vio',       u'pr_vio',        u'color',   u'mv_vio/len',
         u'pr_vio/len'],
      dtype='object')


In [132]:
dc_segs_statistic_drop.sort('mv_vio/len')

Unnamed: 0,OBJECTID,STREETSEGID,SHAPE_Length,geometry,mv_vio,pr_vio,color,mv_vio/len,pr_vio/len,mv_vio_norm,pr_vio_norm,mv_vio/len_norm,pr_vio/len_norm
0,1,876,192.925131,LINESTRING (-77.01239193659748 38.950836102255...,0,0,#ffffff,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8759,8760,13125,90.780990,LINESTRING (-76.929638125978 38.90721291084983...,0,0,#ffffff,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8760,8761,4564,77.674020,LINESTRING (-76.92859149019044 38.907212995971...,0,0,#ffffff,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
8761,8762,4239,89.251057,LINESTRING (-77.00057784941127 38.881279000930...,0,18,#81-98d-969,0.000000,0.201678,0.000000,0.476926,0.000000,0.096917
8762,8763,2998,80.652054,LINESTRING (-77.00058233764283 38.880475002042...,0,2,#f1-2d-29,0.000000,0.024798,0.000000,0.177948,0.000000,0.012922
8763,8764,3846,50.951109,LINESTRING (-77.00057875896626 38.879748475258...,0,10,#b9-4dd-4c9,0.000000,0.196267,0.000000,0.388399,0.000000,0.094536
8764,8765,2733,97.520530,LINESTRING (-77.00058232793353 38.879289497629...,0,3,#ea-c3-bd,0.000000,0.030763,0.000000,0.224545,0.000000,0.015984
8765,8766,2124,97.330124,LINESTRING (-77.00058312746832 38.878410999062...,0,12,#ab-609-5f1,0.000000,0.123292,0.000000,0.415458,0.000000,0.061332
8766,8767,12328,116.173288,LINESTRING (-77.00058358126051 38.877534211958...,0,71,#-f2-289b-280d,0.000000,0.611156,0.000000,0.692714,0.000000,0.251606
8767,8768,12681,177.089979,LINESTRING (-77.0005830216614 38.8764876918826...,0,0,#ffffff,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000


In [135]:
def gradient_color(percent):
    min_color = np.array([255,255,255])
    max_color = np.array([248,105,107])
    return '#%02x%02x%02x' % tuple([int(k) for k in min_color+(max_color-min_color)*percent])

def write_var(col_name, var_name, df,f):
    df['color']=df[col_name].apply(gradient_color)
    f.write('var %s = %s;\n' % (var_name, df.to_json()))

with open('Volumes of MV and Park.js','w') as f:
    write_var('mv_vio_norm', 'mv', dc_segs_statistic_drop,f)
    write_var('pr_vio_norm', 'pr', dc_segs_statistic_drop,f)
    write_var('mv_vio/len_norm', 'mvnorm', dc_segs_statistic_drop,f)
    write_var('pr_vio/len_norm', 'prnorm', dc_segs_statistic_drop,f)
    