# Map violations data

#### Set up environment

In [120]:
# Install folium and paretochart
!pip install folium
!pip install --upgrade paretochart

# Import necessary packages 
import folium
from folium import plugins
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import numpy as np
from paretochart import pareto

%matplotlib inline

[31mdistributed 1.21.8 requires msgpack, which is not installed.[0m
Requirement already up-to-date: paretochart in /Users/davidplewis/anaconda3/lib/python3.6/site-packages (1.0)
Requirement not upgraded as not directly required: matplotlib in /Users/davidplewis/anaconda3/lib/python3.6/site-packages (from paretochart) (2.2.2)
Requirement not upgraded as not directly required: numpy>=1.7.1 in /Users/davidplewis/anaconda3/lib/python3.6/site-packages (from matplotlib->paretochart) (1.14.3)
Requirement not upgraded as not directly required: cycler>=0.10 in /Users/davidplewis/anaconda3/lib/python3.6/site-packages (from matplotlib->paretochart) (0.10.0)
Requirement not upgraded as not directly required: pyparsing!=2.0.4,!=2.1.2,!=2.1.6,>=2.0.1 in /Users/davidplewis/anaconda3/lib/python3.6/site-packages (from matplotlib->paretochart) (2.2.0)
Requirement not upgraded as not directly required: python-dateutil>=2.1 in /Users/davidplewis/anaconda3/lib/python3.6/site-packages (from matplotlib->pa

ImportError: cannot import name 'pareto'

#### Import and clean data

In [75]:
# Import business license data
import os.path
root_path = os.path.dirname(os.getcwd())
violation_values  = pd.read_csv(os.path.join(root_path,"DATA/violation_values.csv"))
violation_counts  = pd.read_csv(os.path.join(root_path,"DATA/violation_counts.csv"))


In [86]:
violation_values.head()

Unnamed: 0,v_1,v_2,v_3,v_4,v_5,v_6,v_7,v_8,v_9,v_10,...,v_37,v_38,v_39,v_40,v_41,v_42,v_43,v_44,v_70,inspection_id
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2177032
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2177027
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2177028
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2177029
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2177009


In [115]:
violation_columns = (list(violation_values.columns))
violation_columns.remove("inspection_id")
violation_sums = violation_values[violation_columns].sum()
violation_sums = pd.DataFrame({'violation':violation_sums.index, 'count':violation_sums.values})
violation_sums


Unnamed: 0,violation,count
0,v_1,294.0
1,v_2,2605.0
2,v_3,5188.0
3,v_4,257.0
4,v_5,8.0
5,v_6,952.0
6,v_7,68.0
7,v_8,1344.0
8,v_9,414.0
9,v_10,279.0


#### Map licenses

In [68]:
# Use folium to visualize map
m = folium.Map([41.8600, -87.6298], zoom_start=10)
m

In [69]:
# mark each license as a point

# for index, row in business_licenses.iterrows():
#     folium.CircleMarker([row['latitude'], row['longitude']],
#                         radius=15,
#                         popup=row['legal_name'],
#                         fill_color="grey", # divvy color
#                        ).add_to(m)

business_licenses.apply(lambda row:folium.CircleMarker(location=[row["latitude"], row["longitude"]], 
                                              radius=.5, fill_color="grey")
                                             .add_to(m), axis=1)


507842    <folium.features.CircleMarker object at 0x1a42...
508168    <folium.features.CircleMarker object at 0x1a42...
509776    <folium.features.CircleMarker object at 0x1a42...
509815    <folium.features.CircleMarker object at 0x1a42...
509871    <folium.features.CircleMarker object at 0x1a42...
510907    <folium.features.CircleMarker object at 0x1a42...
510970    <folium.features.CircleMarker object at 0x1a42...
511222    <folium.features.CircleMarker object at 0x1a42...
511696    <folium.features.CircleMarker object at 0x1a42...
512417    <folium.features.CircleMarker object at 0x1a42...
513913    <folium.features.CircleMarker object at 0x1a42...
514811    <folium.features.CircleMarker object at 0x1a42...
515322    <folium.features.CircleMarker object at 0x1a42...
515377    <folium.features.CircleMarker object at 0x1a42...
515519    <folium.features.CircleMarker object at 0x1a42...
515847    <folium.features.CircleMarker object at 0x1a42...
516332    <folium.features.CircleMarker 

In [70]:
m

In [71]:
business_licenses[['latitude', 'longitude']].values

array([[ 41.90332173, -87.67598197],
       [ 41.97359627, -87.65508289],
       [ 41.94005906, -87.6639765 ],
       ...,
       [ 41.91745063, -87.68918063],
       [ 41.75825583, -87.68292391],
       [ 41.77913417, -87.69963358]])

In [72]:
# convert to (n, 2) nd-array format for heatmap
licenseArr = business_licenses[['latitude', 'longitude']].values
# licenseArr = list(map(tuple, licenseArr))

# plot heatmap
m.add_child(plugins.HeatMap(licenseArr.tolist(), radius=17))
# plugins.HeatMap(licenseArr).add_to(m)
m


In [None]:
business_licenses["legal_name"]