In [1]:
import folium
import geopy
import pandas as pd
import numpy as np
from folium.plugins import FastMarkerCluster
from pandas.io.json import json_normalize
import json
import shapefile
from shapely.geometry import shape, Point

In [2]:
atl_df = pd.read_csv('NPU_DATA.csv')
crime_df = pd.read_csv('COBRA-2019.csv')

In [3]:
atl_df.head()

Unnamed: 0,NPU,Neighborhoods,"Count, Total population, 2015","Median, Median age (years), 2015","Median, Median value of owner-occupied unit (dollars), 2015","Median, Median gross rent (dollars), 2015","Median, Median household Income, 2015","Percent, Public transportation (excluding taxicab) to work, 2015","Percent, Walked to work, 2015","Percent, Other means to work, 2015"
0,NPU V,"Adair Park, Pittsburgh",5328,34.7,72308.0,894.0,20732.0,37.0,2.3,0.6
1,NPU R,"Adams Park, Laurens Valley, Southwest",7148,34.6,115395.0,867.0,33988.0,17.0,1.2,1.0
2,NPU H,"Adamsville, Oakcliff",2312,29.9,64550.0,747.0,19798.0,22.7,3.6,0.1
3,NPU X,Airport,0,,,,,,,
4,NPU G,"Almond Park, Carey Park",2795,24.6,52729.0,639.0,19492.0,22.2,1.6,0.0


In [4]:
atl_df.dtypes

NPU                                                                  object
Neighborhoods                                                        object
Count, Total population, 2015                                         int64
Median, Median age (years), 2015                                    float64
Median, Median value of owner-occupied unit (dollars), 2015         float64
Median, Median gross rent (dollars), 2015                           float64
Median, Median household Income, 2015                               float64
Percent, Public transportation (excluding taxicab) to work, 2015    float64
Percent, Walked to work, 2015                                       float64
Percent, Other means to work, 2015                                  float64
dtype: object

In [5]:
crime_df.head()

Unnamed: 0,Report Number,Report Date,Occur Date,Occur Time,Possible Date,Possible Time,Beat,Apartment Office Prefix,Apartment Number,Location,Shift Occurrence,Location Type,UCR Literal,UCR #,IBR Code,Neighborhood,NPU,Latitude,Longitude
0,190010138,2019-01-01,2019-01-01,20,2019-01-01,25,511.0,,,50 UPPER ALABAMA ST SW,Morning Watch,13.0,LARCENY-NON VEHICLE,620,2302,Downtown,M,33.75194,-84.38964
1,190010299,2019-01-01,2019-01-01,120,2019-01-01,130,511.0,,,20 BROAD ST,Morning Watch,,LARCENY-NON VEHICLE,620,2302,Downtown,M,33.75312,-84.39208
2,190011858,2019-01-01,2019-01-01,1740,2019-01-01,1750,411.0,,A15,3000 CONTINENTAL COLONY PKWY SW,Evening Watch,26.0,LARCENY-NON VEHICLE,620,2302,Greenbriar,R,33.68077,-84.4937
3,190010845,2019-01-01,2019-01-01,415,2019-01-01,420,607.0,,,1362 BOULEVARD SE,Morning Watch,23.0,LARCENY-NON VEHICLE,630,2303,Benteen Park,W,33.71744,-84.36818
4,190011541,2019-01-01,2019-01-01,1400,2019-01-01,1430,210.0,,,3393 PEACHTREE RD NE @LENOX MALL,Evening Watch,8.0,LARCENY-NON VEHICLE,630,2303,Lenox,B,33.84676,-84.36212


In [6]:
crime_df.dtypes

Report Number                int64
Report Date                 object
Occur Date                  object
Occur Time                  object
Possible Date               object
Possible Time                int64
Beat                       float64
Apartment Office Prefix     object
Apartment Number            object
Location                    object
Shift Occurrence            object
Location Type               object
UCR Literal                 object
UCR #                        int64
IBR Code                    object
Neighborhood                object
NPU                         object
Latitude                   float64
Longitude                  float64
dtype: object

I am stealing a small little helper class I wrote for another project. It's purpose is to, using a geojson, determine in which region a given point lies.

In [7]:
class Locator:
    def __init__(self, geojson):
        with open(geojson) as f:
            self.js = json.load(f)
    def feature_name(self, pt):
        point = Point(pt)
        for feature in self.js['features']:
            poly = shape(feature['geometry'])
            if poly.contains(point):
                return feature['properties']['PAGENAME_1']

In [8]:
locator = Locator('NPU.json')
crime_df['Coordinate'] = crime_df[['Latitude', 'Longitude']].apply(lambda x: (x[1], x[0]), axis=1)
crime_df['NPU'] = crime_df['Coordinate'].apply(locator.feature_name)
crime_df.head()

Unnamed: 0,Report Number,Report Date,Occur Date,Occur Time,Possible Date,Possible Time,Beat,Apartment Office Prefix,Apartment Number,Location,Shift Occurrence,Location Type,UCR Literal,UCR #,IBR Code,Neighborhood,NPU,Latitude,Longitude,Coordinate
0,190010138,2019-01-01,2019-01-01,20,2019-01-01,25,511.0,,,50 UPPER ALABAMA ST SW,Morning Watch,13.0,LARCENY-NON VEHICLE,620,2302,Downtown,NPU-M,33.75194,-84.38964,"(-84.38964, 33.75194)"
1,190010299,2019-01-01,2019-01-01,120,2019-01-01,130,511.0,,,20 BROAD ST,Morning Watch,,LARCENY-NON VEHICLE,620,2302,Downtown,NPU-M,33.75312,-84.39208,"(-84.39208, 33.75312)"
2,190011858,2019-01-01,2019-01-01,1740,2019-01-01,1750,411.0,,A15,3000 CONTINENTAL COLONY PKWY SW,Evening Watch,26.0,LARCENY-NON VEHICLE,620,2302,Greenbriar,NPU-R,33.68077,-84.4937,"(-84.4937, 33.68077)"
3,190010845,2019-01-01,2019-01-01,415,2019-01-01,420,607.0,,,1362 BOULEVARD SE,Morning Watch,23.0,LARCENY-NON VEHICLE,630,2303,Benteen Park,NPU-W,33.71744,-84.36818,"(-84.36817999999998, 33.71744)"
4,190011541,2019-01-01,2019-01-01,1400,2019-01-01,1430,210.0,,,3393 PEACHTREE RD NE @LENOX MALL,Evening Watch,8.0,LARCENY-NON VEHICLE,630,2303,Lenox,NPU-B,33.84676,-84.36212,"(-84.36212, 33.84676)"


In [10]:
violent_crimes = ['AGG ASSAULT', 'HOMICIDE', 'LARCENY-FROM VEHICLE', 'ROBBERY-PEDESTRIAN', 'LARCENY-NON VEHICLE']
num_violent_crimes = crime_df[crime_df['UCR Literal'].isin(violent_crimes)].groupby('NPU').size()

In [11]:
num_violent_crimes.columns = ['NPU', 'Count']

In [12]:
atl_crime_choro = folium.Map(location=[33.7176502, -84.3601671], zoom_start=12)
atl_crime_choro.choropleth(geo_data='NPU.json', name='Crime data', data=num_violent_crimes, key_on='feature.properties.PAGENAME_1', fill_color='YlOrRd', legend_name='Violent Crime Count')
atl_crime_choro