In [1]:
# Import libraries
import pandas as pd
import numpy as np
import folium
import os
import json
import matplotlib.pyplot as plt
import folium
from folium import plugins
import warnings
warnings.filterwarnings("ignore")

#### Import Plotting File and Clean Data

In [2]:
data_plot = pd.read_csv('Pred_Result.csv')

In [3]:
# keep only zipcode and score for plotting
score_plot = data_plot[['Zipcode', 'Predicted_Score','Actual_Score']]
# change zipcode to string
score_plot['Zipcode'] = score_plot['Zipcode'].astype(str)
score_plot['Zipcode'] = score_plot.Zipcode.str[:5]

In [4]:
# check predicted crime level by group
score_plot[['Zipcode','Predicted_Score']].groupby('Predicted_Score').count()

Unnamed: 0_level_0,Zipcode
Predicted_Score,Unnamed: 1_level_1
1.0,72
2.0,123
3.0,67


In [5]:
# check actual crime level by group
score_plot[['Zipcode','Actual_Score']].groupby('Actual_Score').count()

Unnamed: 0_level_0,Zipcode
Actual_Score,Unnamed: 1_level_1
1.0,78
2.0,113
3.0,71


In [6]:
score_plot.head()

Unnamed: 0,Zipcode,Predicted_Score,Actual_Score
0,90305,1.0,1.0
1,90063,3.0,3.0
2,91792,2.0,2.0
3,91377,1.0,1.0
4,90028,2.0,2.0


#### Remove Zipcode Areas without Predictions 

In [7]:
zipcode = list(score_plot['Zipcode'])
len(zipcode)

262

In [8]:
# Read LA JSON data with zipcode information into the datastore variable
if 'zip-code-tabulation-areas-2012.geojson':
    with open('zip-code-tabulation-areas-2012.geojson', 'r') as f:
        datastore = json.load(f)
len(datastore['features'])

804

In [9]:
# Find index of plotting zip areas' corresponding geo info
idx = []
for i in zipcode:
    for j in range(len(datastore['features'])):
        if (datastore['features'][j]['properties']['name'] == str(i)):
            idx.append(j)

In [10]:
# Obtain index of non-useful zipcode's geo info
notidx = list(set(list(range(len(datastore['features'])))) - set(idx))
len(idx), len(notidx)

(262, 552)

In [11]:
# Delete zipcode area beyond model dataset (no prediction)
for j in sorted(notidx, reverse=True):
    del datastore['features'][j]

In [12]:
len(datastore['features'])

252

In [13]:
# Writing cleaned JSON data to LA_crime_zipcode.json
if 'LA_crime_zipcode.json':
    with open('LA_crime_zipcode.json', 'w') as f:
        json.dump(datastore, f)

In [14]:
datastore

{'features': [{'geometry': {'coordinates': [[[[-118.265151, 33.970249],
       [-118.265166, 33.974735],
       [-118.262969, 33.974746],
       [-118.262981, 33.981836],
       [-118.265174, 33.981828],
       [-118.265185, 33.989227],
       [-118.256436, 33.989317],
       [-118.256436, 33.989498],
       [-118.241159, 33.989422],
       [-118.241126, 33.988174],
       [-118.240505, 33.988158],
       [-118.240502, 33.98867],
       [-118.23899, 33.988664],
       [-118.239021, 33.989403],
       [-118.237918, 33.989393],
       [-118.235685, 33.979486],
       [-118.235352, 33.979534],
       [-118.235105, 33.978705],
       [-118.234324, 33.974732],
       [-118.234685, 33.974731],
       [-118.234432, 33.972967],
       [-118.233915, 33.970674],
       [-118.233561, 33.970731],
       [-118.232835, 33.967469],
       [-118.232995, 33.967467],
       [-118.232405, 33.965314],
       [-118.231371, 33.963268],
       [-118.230013, 33.961768],
       [-118.231885, 33.961565],
      

#### Visualize Predicted and Actual Crime Level in Map

To make map visualizations more readible by ensuring markers fall into polygons, coordinates for central points are manually calculated.

In [15]:
### Calculate coordinates in polygon center corresponding to zipcode
zipname = []
center = []
for feature in datastore['features']:
    zipname.append(feature['properties']['name'])
    location = feature['geometry']['coordinates'][0][0]
    long = []
    lat = []
    for l in location:
        long.append(l[0])
        lat.append(l[1])
    center.append([np.mean(long), np.mean(lat)])   

In [16]:
### Create dictionary for plugin in search
tmp = []
for i in range(len(zipname)):
    tmp.append({'geometry': {
       'coordinates': center[i],
       'type': 'Point'},
      'properties': {'name': zipname[i]},
      'type': 'Feature'})
points = {"type": "FeatureCollection", "features": tmp}

#### Note on Map Interpretation:
The choropleth map feature provided by folium is limited when it comes to plot categorical levels. Therefore, method for continuous variable is used and here the top-right lengend with scale of (0.5, 1.5, 2.5, 3.5) actually indicates categorical level (1,2,3). Alternatively, 1 falls into (0.5, 1.5) with lightest color and 3 falls into (2.5, 3.5) with darkest color. Also, 1 is safe, 2 is moderate, and 3 is dangerous. 

Also, several exampes for search input: 90014: LA downtown theater, 90212: Beverly Hills, 92697: UCI.

In [18]:
### Plot predicted crime level by zipcode based on model results
geo_zip = os.path.join('','LA_crime_zipcode.json')

# Initialize the map:
m0 = folium.Map(location=[34.049427, -118.083042], zoom_start=5)

# Add the color for the chloropleth:
m0.choropleth(geo_data=geo_zip, name='choropleth', data=score_plot, 
             columns=['Zipcode', 'Predicted_Score'], key_on='feature.properties.name', 
             fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2, legend_name='Predicted Crime Level',
             threshold_scale=[0.5, 1.5, 2.5, 3.5])
folium.LayerControl().add_to(m0)

# Add search bar plug in
plugins.Search(points, search_zoom=20).add_to(m0)

m0.save('Pred_SearchBar_Choropleth_Map_LA.html')
m0

In [19]:
### Plot actual crime level by zipcode based on Police Dept Data
geo_zip = os.path.join('','LA_crime_zipcode.json')

# Initialize the map:
m1 = folium.Map(location=[34.049427, -118.083042], zoom_start=5)

# Add the color for the chloropleth:
m1.choropleth(geo_data=geo_zip, name='choropleth', data=score_plot, 
             columns=['Zipcode', 'Actual_Score'], key_on='feature.properties.name', 
             fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2, legend_name='Crime Level',
             threshold_scale=[0.5, 1.5, 2.5, 3.5])
folium.LayerControl().add_to(m1)

# Add search bar plug in
plugins.Search(points, search_zoom=20).add_to(m1)

m1.save('Actual_SearchBar_Choropleth_Map_LA.html')
m1

#### Examine Prediction Error by Map Visualization 

Similarly, the map scale corresponds to (-1,0,1,2). 0 indicates prediction is the same as actual value. -1 indicates  predicted crime level lower than actual level. 1 and 2 indicates predicted crime level higher than actual level. Using trained model, the prediction accuracy is near 90% for LA complete dataset.

In [23]:
# 'prediction - actual' 
score_plot['Diff'] = score_plot['Predicted_Score'] - score_plot['Actual_Score']
score_plot.groupby('Diff').count()

Unnamed: 0_level_0,Zipcode,Predicted_Score,Actual_Score
Diff,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
-1.0,14,14,14
0.0,235,235,235
1.0,10,10,10
2.0,3,3,3


In [24]:
# Prediction Accuracy Rate
sum(score_plot['Diff'] == 0.0)/len(score_plot['Diff'])

0.8969465648854962

In [21]:
df_error = score_plot[(score_plot['Predicted_Score'] != score_plot['Actual_Score'])]
#df_error[df_error['Diff'] == 1.0]

In [22]:
# Initialize the error map:
m2 = folium.Map(location=[34.049427, -118.083042], zoom_start=5)

# Add the color for the chloropleth:
m2.choropleth(geo_data=geo_zip, name='choropleth', data=score_plot, 
             columns=['Zipcode', 'Diff'], key_on='feature.properties.name', 
             fill_color='OrRd', fill_opacity=0.7, line_opacity=0.2, legend_name='Crime Level Prediction Error',
             threshold_scale=[-1.5, -0.5, 0.5, 1.5, 2.5])
folium.LayerControl().add_to(m2)

# Add search bar plug in
plugins.Search(points, search_zoom=20).add_to(m2)

# Save to html
#m0.save('crime_score_chloropleth_LA.html')

m2.save('Error_SearchBar_Choropleth_Map_LA.html')
m2