In [8]:
%matplotlib inline
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import geopandas as gpd

import json
import sys
import geojson

In [12]:
with open('VehicleIdling.json', 'r') as f:
    data = json.load(f)
df = pd.DataFrame(data)

print('Number of rows: ', len(df))
str(df.columns.tolist())

Number of rows:  20938


"['Address Type', 'Agency', 'Agency Name', 'Borough', 'Bridge Highway Direction', 'Bridge Highway Name', 'Bridge Highway Segment', 'City', 'Closed Date', 'Community Board', 'Complaint Type', 'Created Date', 'Cross Street 1', 'Cross Street 2', 'Descriptor', 'Due Date', 'Facility Type', 'Incident Address', 'Incident Zip', 'Intersection Street 1', 'Intersection Street 2', 'Landmark', 'Latitude', 'Location', 'Location Type', 'Longitude', 'Park Borough', 'Park Facility Name', 'Resolution Action Updated Date', 'Resolution Description', 'Road Ramp', 'Status', 'Street Name', 'Taxi Company Borough', 'Taxi Pick Up Location', 'Unique Key', 'Vehicle Type', 'X Coordinate (State Plane)', 'Y Coordinate (State Plane)']"

In [13]:
df.head()

Unnamed: 0,Address Type,Agency,Agency Name,Borough,Bridge Highway Direction,Bridge Highway Name,Bridge Highway Segment,City,Closed Date,Community Board,...,Resolution Description,Road Ramp,Status,Street Name,Taxi Company Borough,Taxi Pick Up Location,Unique Key,Vehicle Type,X Coordinate (State Plane),Y Coordinate (State Plane)
0,ADDRESS,DEP,Department of Environmental Protection,MANHATTAN,,,,NEW YORK,01/07/2010 03:45:00 PM,05 MANHATTAN,...,The Department of Environmental Protection did...,,Closed,WEST 47 STREET,,,15656648,,989909,215099
1,ADDRESS,DEP,Department of Environmental Protection,STATEN ISLAND,,,,STATEN ISLAND,01/11/2010 05:45:00 PM,02 STATEN ISLAND,...,The Department of Environmental Protection did...,,Closed,ROCKVILLE AVENUE,,,15674887,,938560,156291
2,ADDRESS,DEP,Department of Environmental Protection,QUEENS,,,,Rockaway Park,01/09/2010 05:15:00 PM,14 QUEENS,...,The Department of Environmental Protection did...,,Closed,BEACH 100 STREET,,,15675126,,1033664,152620
3,INTERSECTION,DEP,Department of Environmental Protection,MANHATTAN,,,,NEW YORK,01/30/2010 07:00:00 PM,02 MANHATTAN,...,The Department of Environmental Protection did...,,Closed,,,,15831650,,986121,205580
4,INTERSECTION,DEP,Department of Environmental Protection,BROOKLYN,,,,BROOKLYN,01/29/2010 10:15:00 AM,08 BROOKLYN,...,The Department of Environmental Protection did...,,Closed,,,,15831651,,1004308,185447


In [14]:
df['Latitude'] = df['Latitude'].astype(float)
df['Longitude'] = df['Longitude'].astype(float)

In [17]:
useful_cols = ['Cross Street 1', 'Cross Street 2', 'Incident Zip', 'City', 'X Coordinate (State Plane)', 'Y Coordinate (State Plane)', 'Latitude', 'Longitude']
df_sub = df[useful_cols]
df_sub.head()

Unnamed: 0,Cross Street 1,Cross Street 2,Incident Zip,City,X Coordinate (State Plane),Y Coordinate (State Plane),Latitude,Longitude
0,5 AVENUE,AVENUE OF THE AMERICAS,10036,NEW YORK,989909,215099,40.757072,-73.979574
1,PLYMOUTH RD,BOWLING GRN PL,10314,STATEN ISLAND,938560,156291,40.595541,-74.164521
2,NYCTA SUBWAY,BEACH CHANNEL DR,11694,Rockaway Park,1033664,152620,40.585445,-73.822096
3,E 8 ST,MERCER ST,10003,NEW YORK,986121,205580,40.730946,-73.993249
4,DEAN ST,ROCHESTER AVE,11233,BROOKLYN,1004308,185447,40.675663,-73.927688


In [19]:
df_geo = df_sub.dropna(subset=['Latitude', 'Longitude'], axis=0, inplace=False)
print(len(df_geo))

20938


In [20]:
df_geo['City'].value_counts()

NEW YORK               7571
BROOKLYN               7003
BRONX                  1357
STATEN ISLAND           881
College Point           565
Astoria                 336
Cambria Heights         261
Forest Hills            240
Flushing                220
Jamaica                 214
Ridgewood               185
Long Island City        178
Woodside                119
Sunnyside                80
Jackson Heights          80
Fresh Meadows            73
Ozone Park               71
Maspeth                  68
Corona                   67
East Elmhurst            66
FOREST HILLS             65
South Ozone Park         62
Bayside                  60
Rego Park                59
Middle Village           58
Elmhurst                 55
Queens Village           54
Far Rockaway             52
Whitestone               52
Saint Albans             49
                       ... 
RICHMOND HILL             8
CAMBRIA HEIGHTS           8
JACKSON HEIGHTS           8
Floral Park               8
CORONA              

In [21]:
def df_to_geojson(df, props, lat='Latitude', lng='Longitude'):
    geojson = {'type':'FeatureCollection', 'features':[]}
    
    for _, row in df.iterrows():
        feature = {'type':'Feature',
                   'properties':{},
                   'geometry':{'type':'Point',
                               'coordinates':[]}}
        feature['geometry']['coordinates'] = [row[lng], row[lat]]
        
        for prop in props:
            feature['properties'][prop] = row[prop]
            
        geojson['features'].append(feature)
    return geojson

In [25]:
useful_cols = ['Cross Street 1', 'Cross Street 2', 'City']
geojson_dict = df_to_geojson(df_geo, props=useful_cols)
geojson_str = json.dumps(geojson_dict, indent=2)

In [33]:
output_filename = 'VehicleIdling.js'
with open(output_filename, 'w') as output_file:
    output_file.write('var vehicleIdling = {};'.format(geojson_str))
    