In [1]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point

#### This notebook takes care of a couple of tasks with the police calls
1. Added zipcodes to police calls (using old 2018 file from the 2019 jumpstart b/c fewer missing values)
2. Removed some unused columns and renamed others

In [23]:
police_calls = pd.read_csv('../data/Metro_Nashville_Calls_For_Police_Service_2018.csv')
police_calls.head(2)

Unnamed: 0,Event Number,Call Received,Complaint Number,Shift,Tencode,Tencode Description,Tencode Suffix,Tencode Suffix Description,Disposition Code,Disposition Description,Block,Street Name,Unit Dispatched,Sector,Zone,RPA,Latitude,Longitude,Mapped Location
0,PD201800174180,02/16/2018 05:47:00 PM,20180155292,B,44,Disorderly Person,P,PROGRESS,6,ASSISTED OTHER UNIT,,GALLATIN PKE S,711B,,,,,,
1,PD201800174484,02/16/2018 07:09:00 PM,20180155542,B,93,Traffic Violation,,,3,CITATION - TRAFFIC,600.0,ALBANY DR,521B,H,23.0,9509.0,36.21,-86.6,"(36.21, -86.6)"


In [24]:
police_calls.isnull().sum()

Event Number                       0
Call Received                      0
Complaint Number                   0
Shift                          30686
Tencode                            0
Tencode Description                0
Tencode Suffix                319977
Tencode Suffix Description    355368
Disposition Code                   0
Disposition Description          138
Block                          22615
Street Name                        2
Unit Dispatched                39795
Sector                         30203
Zone                           23556
RPA                            23556
Latitude                       25390
Longitude                      25390
Mapped Location                25390
dtype: int64

In [26]:
police_calls.head()

Unnamed: 0,Event Number,Call Received,Complaint Number,Shift,Tencode,Tencode Description,Tencode Suffix,Tencode Suffix Description,Disposition Code,Disposition Description,Block,Street Name,Unit Dispatched,Sector,Zone,RPA,Latitude,Longitude,Mapped Location
0,PD201800174180,02/16/2018 05:47:00 PM,20180155292,B,44,Disorderly Person,P,PROGRESS,6,ASSISTED OTHER UNIT,,GALLATIN PKE S,711B,,,,,,
1,PD201800174484,02/16/2018 07:09:00 PM,20180155542,B,93,Traffic Violation,,,3,CITATION - TRAFFIC,600.0,ALBANY DR,521B,H,23.0,9509.0,36.21,-86.6,"(36.21, -86.6)"
2,PD201800173483,02/16/2018 01:38:00 PM,20180154722,A,45,Vehicle Accident - Property Damage,P,PROGRESS,1,M.P.D. REPORT COMPLED,0.0,MCCANN ST & 2ND AV S,3T74,S,11.0,8153.0,36.151,-86.768,"(36.151, -86.768)"
3,PD201800175018,02/16/2018 10:08:00 PM,0,B,87,Safety Hazard,P,PROGRESS,11,DISREGARD / SIGNAL 9,0.0,CLARKSVILLE PKE & KINGS LN,,N,23.0,3141.0,36.219,-86.837,"(36.219, -86.837)"
4,PD201800174650,02/16/2018 08:02:00 PM,20180155684,B,93,Traffic Violation,,,9,SUBJECT WARNED,5800.0,OLD HICKORY BLVD,5F34,H,21.0,9527.0,36.169,-86.601,"(36.169, -86.601)"


In [27]:
police_calls = police_calls.loc[police_calls['Latitude'].notnull()]
police_calls.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 721933 entries, 1 to 747322
Data columns (total 19 columns):
 #   Column                      Non-Null Count   Dtype  
---  ------                      --------------   -----  
 0   Event Number                721933 non-null  object 
 1   Call Received               721933 non-null  object 
 2   Complaint Number            721933 non-null  int64  
 3   Shift                       691763 non-null  object 
 4   Tencode                     721933 non-null  int64  
 5   Tencode Description         721933 non-null  object 
 6   Tencode Suffix              403239 non-null  object 
 7   Tencode Suffix Description  373220 non-null  object 
 8   Disposition Code            721933 non-null  int64  
 9   Disposition Description     721800 non-null  object 
 10  Block                       721933 non-null  float64
 11  Street Name                 721933 non-null  object 
 12  Unit Dispatched             682173 non-null  object 
 13  Sector        

In [28]:
police_calls.columns.values

array(['Event Number', 'Call Received', 'Complaint Number', 'Shift',
       'Tencode', 'Tencode Description', 'Tencode Suffix',
       'Tencode Suffix Description', 'Disposition Code',
       'Disposition Description', 'Block', 'Street Name',
       'Unit Dispatched', 'Sector', 'Zone', 'RPA', 'Latitude',
       'Longitude', 'Mapped Location'], dtype=object)

In [30]:
police_calls = police_calls[['Event Number', 'Call Received', 'Complaint Number', 'Shift',
       'Tencode', 'Tencode Description', 'Disposition Code',
       'Disposition Description', 'Block', 'Street Name',
       'Unit Dispatched', 'Sector', 'Zone', 'RPA', 'Latitude',
       'Longitude']]
police_calls.head()

Unnamed: 0,Event Number,Call Received,Complaint Number,Shift,Tencode,Tencode Description,Disposition Code,Disposition Description,Block,Street Name,Unit Dispatched,Sector,Zone,RPA,Latitude,Longitude
1,PD201800174484,02/16/2018 07:09:00 PM,20180155542,B,93,Traffic Violation,3,CITATION - TRAFFIC,600.0,ALBANY DR,521B,H,23,9509.0,36.21,-86.6
2,PD201800173483,02/16/2018 01:38:00 PM,20180154722,A,45,Vehicle Accident - Property Damage,1,M.P.D. REPORT COMPLED,0.0,MCCANN ST & 2ND AV S,3T74,S,11,8153.0,36.151,-86.768
3,PD201800175018,02/16/2018 10:08:00 PM,0,B,87,Safety Hazard,11,DISREGARD / SIGNAL 9,0.0,CLARKSVILLE PKE & KINGS LN,,N,23,3141.0,36.219,-86.837
4,PD201800174650,02/16/2018 08:02:00 PM,20180155684,B,93,Traffic Violation,9,SUBJECT WARNED,5800.0,OLD HICKORY BLVD,5F34,H,21,9527.0,36.169,-86.601
5,PD201800175242,02/16/2018 11:32:00 PM,20180156185,,43,Want Officer for Investigation / Assistance,6,ASSISTED OTHER UNIT,2000.0,CHURCH ST,815C,MT,13,5509.0,36.154,-86.802


In [32]:
zipcodes = gpd.read_file('../data/Zip Codes.geojson')
zipcodes.head(2)

Unnamed: 0,zip,objectid,po_name,shape_stlength,shape_starea,geometry
0,37115,1,MADISON,178783.0248888682,596553400.5788574,"MULTIPOLYGON (((-86.68725 36.31821, -86.68722 ..."
1,37216,3,NASHVILLE,75820.99782140006,188884682.28344727,"MULTIPOLYGON (((-86.73451 36.23774, -86.73425 ..."


In [35]:
police_calls['geometry'] = police_calls.apply(lambda x: Point((float(x.Longitude), float(x.Latitude))), axis=1)
police_calls.head()

Unnamed: 0,Event Number,Call Received,Complaint Number,Shift,Tencode,Tencode Description,Disposition Code,Disposition Description,Block,Street Name,Unit Dispatched,Sector,Zone,RPA,Latitude,Longitude,geometry
1,PD201800174484,02/16/2018 07:09:00 PM,20180155542,B,93,Traffic Violation,3,CITATION - TRAFFIC,600.0,ALBANY DR,521B,H,23,9509.0,36.21,-86.6,POINT (-86.59999999999999 36.21)
2,PD201800173483,02/16/2018 01:38:00 PM,20180154722,A,45,Vehicle Accident - Property Damage,1,M.P.D. REPORT COMPLED,0.0,MCCANN ST & 2ND AV S,3T74,S,11,8153.0,36.151,-86.768,POINT (-86.76799999999999 36.151)
3,PD201800175018,02/16/2018 10:08:00 PM,0,B,87,Safety Hazard,11,DISREGARD / SIGNAL 9,0.0,CLARKSVILLE PKE & KINGS LN,,N,23,3141.0,36.219,-86.837,POINT (-86.837 36.219)
4,PD201800174650,02/16/2018 08:02:00 PM,20180155684,B,93,Traffic Violation,9,SUBJECT WARNED,5800.0,OLD HICKORY BLVD,5F34,H,21,9527.0,36.169,-86.601,POINT (-86.601 36.169)
5,PD201800175242,02/16/2018 11:32:00 PM,20180156185,,43,Want Officer for Investigation / Assistance,6,ASSISTED OTHER UNIT,2000.0,CHURCH ST,815C,MT,13,5509.0,36.154,-86.802,POINT (-86.80200000000001 36.154)


In [37]:
police_geo = gpd.GeoDataFrame(police_calls, crs = zipcodes.crs, geometry = police_calls.geometry)
type(police_geo)

geopandas.geodataframe.GeoDataFrame

In [38]:
police_calls_2018 = gpd.sjoin(police_geo, zipcodes, op = 'within')
police_calls_2018.head()

Unnamed: 0,Event Number,Call Received,Complaint Number,Shift,Tencode,Tencode Description,Disposition Code,Disposition Description,Block,Street Name,...,RPA,Latitude,Longitude,geometry,index_right,zip,objectid,po_name,shape_stlength,shape_starea
1,PD201800174484,02/16/2018 07:09:00 PM,20180155542,B,93,Traffic Violation,3,CITATION - TRAFFIC,600.0,ALBANY DR,...,9509.0,36.21,-86.6,POINT (-86.60000 36.21000),46,37076,53,HERMITAGE,291751.428326004,983805973.8341064
4,PD201800174650,02/16/2018 08:02:00 PM,20180155684,B,93,Traffic Violation,9,SUBJECT WARNED,5800.0,OLD HICKORY BLVD,...,9527.0,36.169,-86.601,POINT (-86.60100 36.16900),46,37076,53,HERMITAGE,291751.428326004,983805973.8341064
7,PD201800175038,02/16/2018 10:16:00 PM,20180156001,B,3,Administrative/Special Assignment,6,ASSISTED OTHER UNIT,3700.0,JAMES KAY LN,...,9511.0,36.178,-86.616,POINT (-86.61600 36.17800),46,37076,53,HERMITAGE,291751.428326004,983805973.8341064
89,PD201800174217,02/16/2018 05:58:00 PM,20180155321,B,87,Safety Hazard,3,CITATION - TRAFFIC,0.0,MM 221 4 I40 E,...,9504.0,36.17,-86.591,POINT (-86.59100 36.17000),46,37076,53,HERMITAGE,291751.428326004,983805973.8341064
150,PD201800173907,02/16/2018 04:19:00 PM,20180155047,B,93,Traffic Violation,9,SUBJECT WARNED,1300.0,TULIP GROVE RD,...,9557.0,36.175,-86.596,POINT (-86.59600 36.17500),46,37076,53,HERMITAGE,291751.428326004,983805973.8341064


In [45]:
police_calls_2018 = police_calls_2018.rename(columns = {'zip':'zipcode', 'po_name': 'PO'})

In [43]:
police_calls_2018 = police_calls_2018.drop(columns = ['RPA','index_right', 'objectid', 'shape_stlength', 'shape_starea'])

In [46]:
police_calls_2018.head()

Unnamed: 0,Event Number,Call Received,Complaint Number,Shift,Tencode,Tencode Description,Disposition Code,Disposition Description,Block,Street Name,Unit Dispatched,Sector,Zone,Latitude,Longitude,geometry,zipcode,PO
1,PD201800174484,02/16/2018 07:09:00 PM,20180155542,B,93,Traffic Violation,3,CITATION - TRAFFIC,600.0,ALBANY DR,521B,H,23,36.21,-86.6,POINT (-86.60000 36.21000),37076,HERMITAGE
4,PD201800174650,02/16/2018 08:02:00 PM,20180155684,B,93,Traffic Violation,9,SUBJECT WARNED,5800.0,OLD HICKORY BLVD,5F34,H,21,36.169,-86.601,POINT (-86.60100 36.16900),37076,HERMITAGE
7,PD201800175038,02/16/2018 10:16:00 PM,20180156001,B,3,Administrative/Special Assignment,6,ASSISTED OTHER UNIT,3700.0,JAMES KAY LN,521B,H,21,36.178,-86.616,POINT (-86.61600 36.17800),37076,HERMITAGE
89,PD201800174217,02/16/2018 05:58:00 PM,20180155321,B,87,Safety Hazard,3,CITATION - TRAFFIC,0.0,MM 221 4 I40 E,5122,H,23,36.17,-86.591,POINT (-86.59100 36.17000),37076,HERMITAGE
150,PD201800173907,02/16/2018 04:19:00 PM,20180155047,B,93,Traffic Violation,9,SUBJECT WARNED,1300.0,TULIP GROVE RD,521B,H,23,36.175,-86.596,POINT (-86.59600 36.17500),37076,HERMITAGE


In [47]:
police_calls_2018.to_csv('../data/police_calls_2018.csv', index = False)

#### And with the hubNashville data
- limit to 2018
- add zipcodes
- little bit of cleanup

In [2]:
hub = pd.read_csv('../data/hubNashville__311__Service_Requests.csv')
hub.head()

  has_raised = await self.run_ast_nodes(code_ast.body, cell_name,


Unnamed: 0,Request #,Status,Request Type,Subrequest Type,Additional Subrequest Type,Date / Time Opened,Date / Time Closed,Request Origin,System of Origin ID,Contact Type,...,State Issue,Parent Request,Closed When Created,Address,City,Council District,ZIP,Latitude,Longitude,Mapped Location
0,198180,Closed,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,06/11/2019 06:15:39 PM,06/11/2019 06:15:39 PM,Phone,,,...,False,,True,,,,,,,
1,197727,Closed,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,06/10/2019 09:24:14 PM,06/10/2019 09:24:14 PM,Phone,,,...,False,,True,,,,,,,
2,197987,Closed,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,06/11/2019 03:06:56 PM,06/11/2019 03:06:56 PM,Phone,,,...,False,,True,,,,,,,
3,198062,Closed,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,06/11/2019 04:03:54 PM,06/11/2019 04:03:54 PM,Phone,,,...,False,,True,,,,,,,
4,197917,Closed,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,Resolved by hubNashville on First Call,06/11/2019 01:56:36 PM,06/11/2019 01:56:36 PM,Phone,,,...,False,,True,,,,,,,


In [3]:
hub['Date / Time Opened'] = pd.to_datetime(hub['Date / Time Opened'])

In [4]:
hub['Date / Time Opened'].dt.year.unique()

array([2019, 2020, 2018, 2017])

In [5]:
hub2018 = hub.loc[hub['Date / Time Opened'].dt.year == 2018]

In [6]:
hub2018.shape

(86173, 21)

In [7]:
hub2018.isnull().sum()

Request #                         0
Status                            0
Request Type                    130
Subrequest Type                 130
Additional Subrequest Type      162
Date / Time Opened                0
Date / Time Closed              361
Request Origin                    0
System of Origin ID           75995
Contact Type                  74415
Preferred Language            86173
State Issue                       0
Parent Request                85457
Closed When Created               0
Address                        3938
City                           4811
Council District               4833
ZIP                            4855
Latitude                       4601
Longitude                      4601
Mapped Location                4601
dtype: int64

In [8]:
hub2018.columns.values

array(['Request #', 'Status', 'Request Type', 'Subrequest Type',
       'Additional Subrequest Type', 'Date / Time Opened',
       'Date / Time Closed', 'Request Origin', 'System of Origin ID',
       'Contact Type', 'Preferred Language', 'State Issue',
       'Parent Request', 'Closed When Created', 'Address', 'City',
       'Council District', 'ZIP', 'Latitude', 'Longitude',
       'Mapped Location'], dtype=object)

In [9]:
hub2018 = hub2018[['Request #', 'Status', 'Request Type', 'Subrequest Type',
       'Additional Subrequest Type', 'Date / Time Opened',
       'Date / Time Closed', 'Request Origin', 'ZIP', 'Latitude', 'Longitude']]

In [10]:
hub2018 = hub2018.loc[hub2018['ZIP'].notnull()]

In [11]:
hub2018 = hub2018.loc[hub2018['Latitude'].notnull()]

In [12]:
hub2018.isnull().sum()

Request #                       0
Status                          0
Request Type                    0
Subrequest Type                 0
Additional Subrequest Type     18
Date / Time Opened              0
Date / Time Closed            300
Request Origin                  0
ZIP                             0
Latitude                        0
Longitude                       0
dtype: int64

In [13]:
hub2018.shape

(81166, 11)

In [14]:
hub2018 = hub2018.rename(columns = {'ZIP': 'Zipcode'})
hub2018.head(2)

Unnamed: 0,Request #,Status,Request Type,Subrequest Type,Additional Subrequest Type,Date / Time Opened,Date / Time Closed,Request Origin,Zipcode,Latitude,Longitude
59,45480,Closed,"Streets, Roads & Sidewalks",Blocking the Right of Way,Blocking the Right of Way,2018-02-12 14:00:20,02/12/2018 03:48:14 PM,Phone,37115,36.296917,-86.699162
85,45482,Closed,"Streets, Roads & Sidewalks",Blocking the Right of Way,Blocking the Right of Way,2018-02-12 14:00:21,02/12/2018 03:49:13 PM,Phone,37076,36.215403,-86.588513


In [15]:
hub2018['Status'].value_counts()

Closed         80863
New              150
In Progress      103
Assigned          24
On Hold           12
Transferred       11
Denied             3
Name: Status, dtype: int64

In [16]:
hub2018.to_csv('../data/hubNashville_2018.csv', index = False)