In [1]:
# Imports
import numpy as np
import pandas as pd
import folium as fm # For the map, will only work in Jupyter since it needs a browser
from folium.plugins import MarkerCluster # In order to add map markers
from pyproj import Proj, transform
import matplotlib.pyplot as plt
import time

np.set_printoptions(threshold=np.inf, linewidth = 500, suppress=True)
pd.set_option('display.max_rows', 3000)
pd.set_option('display.max_columns', 3000)
pd.set_option('display.width', 1000)
pd.set_option('display.float_format', lambda x: '%.3f' % x)

# data cleaning

In [2]:
# load in data
df = pd.read_csv('violations_final.csv', index_col='Unnamed: 0', 
                 parse_dates=['Issue Datetime'], dtype={'Ticket Cost' : str})

df.dropna(subset=['Zip Code', 'Ticket Cost'], inplace=True)

df['Zip Code'] = df['Zip Code'].astype(np.int64)


In [3]:
# how many datapoints do we have?
len(df)

5582

In [4]:
# check formatting
df.head()

Unnamed: 0,Issue Datetime,Summons Number,Violation Code,Violation Location,Violation Precinct,House Number,Street Name,Intersecting Street,City & State,Ticket Cost,Ticket Type,Zip Code,Lat,Long
0,2019-01-14 12:00:00,8678224277,14,83,83,W,St Nicholas Ave,07ft N/of Jefferson,"Brooklyn, NY",115.0,General No Standing: Standing or parking where...,11237,40.708,-73.923
1,2019-01-11 11:29:00,8678223868,21,90,90,N,Wythe Ave,08ft W/of Rutledge S,"Brooklyn, NY",45.0,Street Cleaning: No parking where parking is n...,11249,40.7,-73.96
2,2019-01-13 15:05:00,8699568241,50,79,79,S,Macon St,0ft E/of Arlington P,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11216,40.681,-73.951
3,2019-01-11 08:37:00,8701817176,50,90,90,N,S 1st St,0ft E/of Driggs Ave,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11211,40.713,-73.96
4,2019-01-14 12:37:00,8695487570,50,90,90,S,Debevoise St,0ft E/of Graham Ave,"Brooklyn, NY",115.0,"Stopping, standing or parking in a crosswalk. ...",11206,40.702,-73.942


In [5]:
# map between folium color options and ticket costs
v_types = pd.unique(df['Ticket Cost'])
colors = ['red', 'blue', 'green', 'purple', 'orange', 'beige', 'white', 'pink']

cdict = {v : c for v, c in zip(v_types, colors)}

In [6]:
# clear map for successive runs

# create map instance
m = fm.Map([40.7255, -73.9465], zoom_start=14)

# create lists of violation times, format them into popup icons
vtimes = df['Issue Datetime'].apply(lambda x: x.strftime('%m/%d/%y')).tolist()
vcodes = df['Violation Code'].astype(str).tolist()
popups = ['\nViolation Code: '.join([t, c]) for t, c in zip(vtimes, vcodes)]
icons = [fm.Icon(color=cdict[tcost]) for tcost in df['Ticket Cost']]

# create MarkerCluster object with location and info of each violation
marker_cluster = MarkerCluster(locations=list(zip(df['Lat'], df['Long'])), 
                               popups=popups, 
                               icons=icons).add_to(m)


In [7]:
# folium/Jupyter can't handle a map with this much data, so save it out as an HTML file that can be opened later
m.save(outfile='all_days_clustered.html')

In [8]:
# code graveyard


# for ix, row in df.iterrows():
# #     if row['Issue Datetime'].date() == dt:
#     # create an icon object
#     point = fm.Icon(color=cdict[row['Ticket Cost']])
#     # create popup text for each icon
#     try:
#         vtime = row['Issue Datetime'].strftime('%m/%d/%y')
#         vcode = str(row['Violation Code'])
#         popup = '\n'.join([vcode, vtime])

#     except:
#         popup = 'No data!'

#     # create a marker at the proper location
#     mrkr = fm.Marker(location=(row['Lat'], row['Long']), icon=point, popup=popup)
#     # add it to the map
#     mrkr.add_to(m)
#     display(m)