# Import Libraries

In [64]:
import pandas as pd
import numpy as np

# Load Data

In [65]:
df = pd.read_csv('crime_cleaned.csv')
df.head()

Unnamed: 0.1,Unnamed: 0,PctLess9thGrade,PctNotHSGrad,PctBSorMore,PctUnemployed,PctEmploy,PctHousOccup,PctHousOwnOcc,PctVacantBoarded,PctVacMore6Mos,racepctblack,racePctWhite,racePctAsian,racePctHisp,nonViolPerPop,ViolentCrimesPerPop,State
0,0,5.81,9.9,48.18,2.7,64.55,98.37,91.01,3.12,37.5,1.37,91.78,6.5,1.88,1394.59,41.02,New Jersey
1,1,5.59,20.23,16.51,5.55,60.08,86.55,82.54,0.72,60.24,0.63,97.81,0.83,2.58,2543.13,131.47,New Jersey
2,2,4.57,12.02,41.42,3.75,67.53,97.46,81.45,2.53,46.46,11.1,83.79,4.57,2.83,2160.1,115.14,New Jersey
3,3,5.04,18.12,20.42,4.1,69.92,89.49,68.94,0.86,38.3,7.36,88.58,2.67,3.69,3668.03,251.24,New Jersey
4,4,4.91,9.49,48.6,2.71,69.32,98.06,75.52,1.19,38.1,0.54,94.18,4.9,2.7,1032.55,49.56,New Jersey


In [66]:
df = df.drop(columns = ['Unnamed: 0'])

In [67]:
df.columns

Index(['PctLess9thGrade', 'PctNotHSGrad', 'PctBSorMore', 'PctUnemployed',
       'PctEmploy', 'PctHousOccup', 'PctHousOwnOcc', 'PctVacantBoarded',
       'PctVacMore6Mos', 'racepctblack', 'racePctWhite', 'racePctAsian',
       'racePctHisp', 'nonViolPerPop', 'ViolentCrimesPerPop', 'State'],
      dtype='object')

In [68]:
df.dtypes

PctLess9thGrade        float64
PctNotHSGrad           float64
PctBSorMore            float64
PctUnemployed          float64
PctEmploy              float64
PctHousOccup           float64
PctHousOwnOcc          float64
PctVacantBoarded       float64
PctVacMore6Mos         float64
racepctblack           float64
racePctWhite           float64
racePctAsian           float64
racePctHisp            float64
nonViolPerPop          float64
ViolentCrimesPerPop    float64
State                   object
dtype: object

In [69]:
df.shape

(1902, 16)

In [70]:
# group values by state and average the values of Violent Crime and NonViolent Crime by state
crime_state = df.groupby('State').agg({'ViolentCrimesPerPop':'mean','nonViolPerPop':'mean'})[['ViolentCrimesPerPop','nonViolPerPop']].reset_index()
crime_state.head()

Unnamed: 0,State,ViolentCrimesPerPop,nonViolPerPop
0,Alabama,1197.585625,6025.02375
1,Alaska,576.576667,4961.673333
2,Arizona,551.0945,6532.186
3,Arkansas,718.5504,6509.5756
4,California,810.469388,5136.655683


In [71]:
crime_state['State'].unique()

array(['Alabama', 'Alaska', 'Arizona', 'Arkansas', 'California',
       'Colorado', 'Connecticut', 'Delaware', 'District of Columbia',
       'Florida', 'Georgia', 'Idaho', 'Indiana', 'Iowa', 'Kentucky',
       'Louisiana', 'Maine', 'Maryland', 'Massachusetts', 'Minnesota',
       'Mississippi', 'Missouri', 'Nevada', 'New Hampshire', 'New Jersey',
       'New Mexico', 'New York', 'North Carolina', 'North Dakota', 'Ohio',
       'Oklahoma', 'Oregon', 'Pennsylvania', 'Rhode Island',
       'South Carolina', 'South Dakota', 'Tennessee', 'Texas', 'Utah',
       'Virginia', 'Washington', 'West Virginia', 'Wisconsin', 'Wyoming'],
      dtype=object)

In [72]:
crime_state['State'].value_counts()

Minnesota               1
West Virginia           1
Kentucky                1
Louisiana               1
Wyoming                 1
Mississippi             1
New Jersey              1
Pennsylvania            1
Ohio                    1
Oklahoma                1
New Hampshire           1
Maine                   1
Arkansas                1
Virginia                1
Washington              1
Rhode Island            1
Texas                   1
New Mexico              1
New York                1
Utah                    1
Alaska                  1
Florida                 1
North Carolina          1
South Dakota            1
Missouri                1
Arizona                 1
Georgia                 1
Indiana                 1
Oregon                  1
Idaho                   1
North Dakota            1
Massachusetts           1
Alabama                 1
California              1
Connecticut             1
Tennessee               1
Nevada                  1
Iowa                    1
Maryland    

# Extract Latitudes and Longitudes of Data

In [73]:
from geopy.geocoders import Nominatim

geolocator = Nominatim(user_agent='app')

In [74]:
lat = []
lon = []

for location in crime_state['State']:
    location = geolocator.geocode(location)
    if location is None:
        lat.append(np.nan)
        lon.append(np.nan)
    else:
        lat.append(location.latitude)
        lon.append(location.longitude)

In [75]:
# check if the for loop worked
print(lat, lon)

[33.2588817, 64.4459613, 34.395342, 35.2048883, 36.7014631, 38.7251776, 41.6500201, 38.6920451, 38.89379365, 27.7567667, 32.3293809, 43.6447642, 40.3270127, 41.9216734, 37.5726028, 30.8703881, 45.709097, 39.5162234, 42.3788774, 45.9896587, 32.9715645, 38.7604815, 39.5158825, 43.4849133, 40.0757384, 34.5708167, 40.7127281, 35.6729639, 47.6201461, 40.2253569, 34.9550817, 43.9792797, 40.9699889, 41.7962409, 33.6874388, 44.6471761, 35.7730076, 31.8160381, 39.4225192, 37.1232245, 38.893661249999994, 38.4758406, 44.4308975, 43.1700264] [-86.8295337, -149.680909, -111.7632755, -92.4479108, -118.7559974, -105.6077167, -72.7342163, -75.4013315, -76.98799757261312, -81.4639835, -83.1137366, -114.0154071, -86.1746933, -93.3122705, -85.1551411, -92.007126, -68.8590201, -76.9382069, -72.032366, -94.6113288, -89.7348497, -92.5617875, -116.8537227, -71.6553992, -74.4041622, -105.993007, -74.0060152, -79.0392919, -100.540737, -82.6881395, -97.2684063, -120.737257, -77.7278831, -71.5992372, -80.4363743

In [76]:
# Create new columns for Longitude and Latitude in locations dataframe
crime_state['lat'] = lat
crime_state['lon'] = lon

crime_state.head()

Unnamed: 0,State,ViolentCrimesPerPop,nonViolPerPop,lat,lon
0,Alabama,1197.585625,6025.02375,33.258882,-86.829534
1,Alaska,576.576667,4961.673333,64.445961,-149.680909
2,Arizona,551.0945,6532.186,34.395342,-111.763276
3,Arkansas,718.5504,6509.5756,35.204888,-92.447911
4,California,810.469388,5136.655683,36.701463,-118.755997


In [77]:
# Create a new csv
crime_state.to_csv('Crime_unique_locations.csv', index=False)

# Geospatial Visualization

### Heatmap of Violent Crimes

In [86]:
import folium

def generatebasemap(default_location=[37,-95], default_zoom_start=4):
    basemap = folium.Map(location=default_location, zoom_start=default_zoom_start, tiles='Stamen Terrain')
    return basemap

In [87]:
# Call function
basemap = generatebasemap()
basemap

In [88]:
# Plot heatmap for ViolentCrimesPerPop
from folium.plugins import HeatMap

HeatMap(crime_state[['lat','lon','ViolentCrimesPerPop']], zoom=20).add_to(basemap)
basemap

In [85]:
# Plot heatmap for nonViolPerPop
from folium.plugins import HeatMap

HeatMap(crime_state[['lat','lon','nonViolPerPop']], zoom=20).add_to(basemap)
basemap