# Monthly moving violation
Downloaded data from [opendata.dc.gov](http://opendata.dc.gov/datasets?q=moving%20violations)
    - 2014, 2015, 2016, Jan 2017 on 04/11/2017

# Monthly parking violation
Downloaded data from [opendata.dc.gov](http://opendata.dc.gov/datasets?q=parking%20violations)
    - 2014, 2015, 2016, Jan 2017 on 04/11/2017

In [1]:
%pylab inline
import geopandas as gpd
from geopandas.tools import sjoin
import pandas as pd
from shapely.geometry import Point,LineString
from IPython.display import display
# from pygeocoder import Geocoder
import numpy as np
# import googlemaps,csv,math
import os,calendar
from collections import Counter

Populating the interactive namespace from numpy and matplotlib


In [2]:
str_gpd = gpd.read_file('data/segments_dc.geojson')
slice_str = str_gpd[['STREETSEGID']].reset_index()
slice_str.columns=['index_seg', 'SEG_ID']

# CRIME DATA (2014-15-16) ANALYSIS

In [2]:

crime_gpd_2014 = gpd.read_file('../../data/Crime Incidents/Crime_Incidents_in_2014.geojson')
crime_gpd_2015 = gpd.read_file('../../data/Crime Incidents/Crime_Incidents_in_2015.geojson')
crime_gpd_2016 = gpd.read_file('../../data/Crime Incidents/Crime_Incidents_in_2016.geojson')

buf = 0.0001
str_bfr_gpd = str_gpd[['OBJECTID','STREETSEGID','geometry','SHAPE_Length']].copy()
str_bfr_gpd.geometry = str_bfr_gpd.buffer(buf)

crime_bfr_gpd_2014 = crime_gpd_2014[['OBJECTID','OFFENSE','START_DATE','geometry']].copy()
crime_bfr_gpd_2014.geometry = crime_bfr_gpd_2014.buffer(buf)

crime_bfr_gpd_2015 = crime_gpd_2015[['OBJECTID','OFFENSE','START_DATE','geometry']].copy()
crime_bfr_gpd_2015.geometry = crime_bfr_gpd_2015.buffer(buf)

crime_bfr_gpd_2016 = crime_gpd_2016[['OBJECTID','OFFENSE','START_DATE','geometry']].copy()
crime_bfr_gpd_2016.geometry = crime_bfr_gpd_2016.buffer(buf)

joined1 = sjoin(crime_bfr_gpd_2014, str_bfr_gpd, how='left', op='intersects')
joined2 = sjoin(crime_bfr_gpd_2015, str_bfr_gpd, how='left', op='intersects')
joined3 = sjoin(crime_bfr_gpd_2016, str_bfr_gpd, how='left', op='intersects')
joined = pd.concat([joined1,joined2,joined3])

temp = pd.to_datetime(joined.START_DATE)
joined['MONTH'] = temp.dt.month
joined['YEAR'] = temp.dt.year

gb = joined.groupby(['STREETSEGID','MONTH','YEAR','OFFENSE']).size().reset_index()
gb.columns = ['STREETSEGID','MONTH','YEAR','TYPE','COUNT']

ids = gb.STREETSEGID.unique()
dct = dict(zip(ids,range(len(ids))))
gb.set_index('STREETSEGID',inplace=True)
crime_statistic = pd.DataFrame(index=range(len(ids)),columns=['SEG_ID'])

def apply_fn(row):
    idx = dct[row.name]
    crime_statistic.set_value(idx,"SEG_ID",row.name)
    crime_statistic.set_value(idx,"MONTH",row['MONTH'])
    crime_statistic.set_value(idx,"YEAR",row['YEAR'])
    crime_statistic.set_value(idx,row['TYPE'],row['COUNT'])

gb.apply(apply_fn,axis=1);
final_crime = crime_statistic.copy()

important = ['SEG_ID', 'MONTH', 'YEAR']
temp = [c for c in final_crime.columns if c not in important]
reordered = important + temp
final_crime = final_crime[reordered]
# final_crime['TOTAL'] = final_crime[temp].apply(sum,axis=1)
# final_crime.fillna(0,inplace=True)
# final_crime.head().T


In [3]:

ftr_crime = final_crime.merge(slice_str)
ftr_crime.set_index('index_seg', inplace=True)
ftr_crime[ftr_crime.YEAR.isin([2014, 2015, 2016])].drop('SEG_ID', axis=1).to_csv('../../data/Crime Incidents/feature_crime_incidences_dc.csv')

# MOVING VIOLATIONS (2014-15-16) FEATURE SET

In [4]:
month = list(calendar.month_name)
year = [2014,2015, 2016]
flag = True
for k in range(3):
    print('year = %d' % year[k])
    for i in range(1,13):
        filename = '../../data/Moving Violations/Moving_Violations_in_'+month[i]+"_"+str(year[k])
        mov_gpd = gpd.read_file(filename+'.geojson')
        temp = mov_gpd.groupby(['STREETSEGID','VIOLATIONDESC']).size().reset_index()
        temp.columns = ['STREETSEGID','TYPE','COUNT']
        ids = temp.STREETSEGID.unique()
        dct = dict(zip(ids,range(len(ids))))
        temp.set_index('STREETSEGID',inplace=True)
        mov_statistic = pd.DataFrame(index=range(len(ids)),columns=['SEG_ID'])

        def apply_fn(row):
            idx = dct[row.name]
            mov_statistic.set_value(idx,"SEG_ID",row.name)
            mov_statistic.set_value(idx,"MONTH",i)
            mov_statistic.set_value(idx,"YEAR",year[k])
            mov_statistic.set_value(idx,row['TYPE'],row['COUNT'])
            
        temp.apply(apply_fn,axis=1);
        
        if(flag):
            final_mov = mov_statistic.copy()
            flag = False
        else:
            final_mov = pd.concat([final_mov,mov_statistic],axis=0, ignore_index=True)

important = ['SEG_ID', 'MONTH', 'YEAR']
temp = [c for c in final_mov.columns if c not in important]
reordered = important + temp
final_mov = final_mov[reordered]
# final_mov['TOTAL_MOV'] = final_mov[temp].apply(sum,axis=1)
# final_mov.fillna(0,inplace=True)
# final_mov.to_csv('../../data/Moving Violations/Moving_Violations_Details.csv',index=False)


year = 2014
year = 2015
year = 2016


In [5]:
ftr_mov = final_mov.merge(slice_str)
ftr_mov.set_index('index_seg', inplace=True)
ftr_mov.drop('SEG_ID', axis=1).to_csv('../../data/Moving Violations/feature_mov_violations_dc.csv')

# PARKING VIOLATIONS (2014-15-16) FEATURE SET

In [4]:
month = list(calendar.month_name)
year = [2014,2015,2016]
flag = True
for k in range(3):
    print('year = %d' % year[k])
    for i in range(1,13):
        filename = '../../data/Parking Violations/Parking_Violations_in_'+month[i]+"_"+str(year[k])
        mov_gpd = gpd.read_file(filename+'.geojson')
        temp = mov_gpd.groupby(['STREETSEGID','VIOLATION_DESCRIPTION']).size().reset_index()
        temp.columns = ['STREETSEGID','TYPE','COUNT']
        ids = temp.STREETSEGID.unique()
        dct = dict(zip(ids,range(len(ids))))
        temp.set_index('STREETSEGID',inplace=True)
        park_statistic = pd.DataFrame(index=range(len(ids)),columns=['SEG_ID'])

        def apply_fn(row):
            idx = dct[row.name]
            park_statistic.set_value(idx,"SEG_ID",row.name)
            park_statistic.set_value(idx,"MONTH",i)
            park_statistic.set_value(idx,"YEAR",year[k])
            park_statistic.set_value(idx,row['TYPE'],row['COUNT'])
            
        temp.apply(apply_fn,axis=1);
        
        if(flag):
            final_park = park_statistic.copy()
            flag = False
        else:
            final_park = pd.concat([final_park,park_statistic],axis=0, ignore_index=True)

important = ['SEG_ID', 'MONTH', 'YEAR']
temp = [c for c in final_park.columns if c not in important]
reordered = important + temp
final_park = final_park[reordered]

# final['TOTAL'] = final[temp].apply(sum,axis=1)
# final.fillna(0,inplace=True)
# final.to_csv('dcdata/Parking Violations/Parking_Violations_Details.csv',index=False)

2014
2015
2016


In [5]:
final_park = final_park.merge(slice_str)
final_park.set_index('index_seg', inplace=True)
final_park.drop('SEG_ID', axis=1).to_csv('../../data/Parking Violations/feature_parking_violations_dc.csv')

# deprecated???

In [49]:

park_statistic.fillna(0,inplace=True)    
park_statistic['TYPE_COUNT'] = park_statistic.iloc[:,2:park_statistic.columns.size].sum(axis=1)
temp = str_gpd[['STREETSEGID','SHAPE_Length']]
park_statistic = park_statistic.merge(temp,left_on='STREETSEGID',right_on='STREETSEGID',right_index=False)
park_statistic.rename(columns={'SHAPE_Length': 'LENGTH'}, inplace=True) 

# temp = list(park_statistic.columns)
# temp.insert(2,'LENGTH')
# del temp[-1]
# park_statistic = park_statistic[temp]

for column in park_statistic.columns:
    if('_COUNT' in column):
        col_len = column + '/LEN'
        park_statistic[col_len] = park_statistic[column]/park_statistic.LENGTH
        
park_statistic.head().T

Unnamed: 0,0,1,2,3,4
10 HOUR RULE VIOLATION (PASSENGER)_COUNT,0,0,0,0,0
ALTERED TAGS_COUNT,0,0,0,0,0
ALTERING A DRIVING PERMIT_COUNT,0,0,0,0,0
ALTERING A MANIFEST_COUNT,0,0,0,0,0
BACK UP WITHOUT CAUTION_COUNT,0,0,0,0,0
BACKING TO TURN_COUNT,0,0,0,0,0
BAD FOOT BRAKES_COUNT,0,0,0,0,0
BAD HAND BRAKES_COUNT,0,0,0,0,0
BIKE IMPROPER EQUIPMENT_COUNT,0,0,0,0,0
BIKE/PMD FAIL TO YIELD RIGHT OF WAY TO PED OR VEH_COUNT,0,0,0,0,0


In [570]:
coord = []
for i,line in enumerate(str_gpd.geometry):
    if(type(line).__name__=='MultiLineString'):
        print(i)
        for l in line:
            coord.append(list(l.coords))
    else:
        coord.append(list(line.coords))
print(len(coord))

12349
13523


In [53]:
coord = [item for l in coord for item in l]
len(coord)

198894