# Nashville Polive Service Calls Analysis

## Dependencies

In [1]:
from matplotlib import pyplot as plt
import numpy as np
import pandas as pd
import requests

### Import

* N.B. - The dataset is large (more than 6.5M records), so it is not available in this Github repo.

&nbsp; &nbsp; &nbsp; &nbsp; &nbsp; If you'd like the dataset, you may find it [here](https://data.nashville.gov/Police/Metro-Nashville-Police-Department-Calls-for-Servic/kwnd-qrrm), on the nashville.gov website.

In [20]:
main_df = pd.read_csv('data/Metro_Nashville_Police_Department_Calls_for_Service.csv', parse_dates=['Call Received'])

  main_df = pd.read_csv('data/Metro_Nashville_Police_Department_Calls_for_Service.csv', parse_dates=['Call Received'])


### Preprocessing

* Let's get a sample of the data to see what we're working with.

In [21]:
samp_df = main_df.sample(frac=.01, random_state=22)

In [8]:
cols = ['Event Number', 'Call Received', 'Complaint Number', 'Tencode',
       'Tencode Description', 'Tencode Suffix', 'Tencode Suffix Description',
       'Disposition Code', 'Disposition Description', 'Block', 'Street Name',
       'Unit Dispatched', 'Shift', 'Sector', 'Zone', 'RPA', 'Latitude',
       'Longitude', 'Mapped Location']

In [22]:
samp_df.dtypes

Event Number                          object
Call Received                 datetime64[ns]
Complaint Number                     float64
Tencode                                int64
Tencode Description                   object
Tencode Suffix                        object
Tencode Suffix Description            object
Disposition Code                      object
Disposition Description               object
Block                                float64
Street Name                           object
Unit Dispatched                       object
Shift                                 object
Sector                                object
Zone                                  object
RPA                                  float64
Latitude                             float64
Longitude                            float64
Mapped Location                       object
dtype: object

#### 'Event Number'

* It looks like all the event numbers begin with 'PD'; if this is the case, then I can strip those two characters and cast as an int, saving space

In [None]:
pd_check = [event.startswith('PD') for event in samp_df['Event Number'].values]

In [None]:
print(sum(pd_check))

In [6]:
def event_number_clean(num):
    return int(num[2:])

In [7]:
samp_df['Event Number'] = samp_df['Event Number'].apply(event_number_clean)

In [None]:
samp_df.iloc[[0]]

#### 'Call Received'

* This is a datetime column, so I'll parse as I read in the csv

In [23]:
samp_df['Call Received'].head(20)

5797724   2016-12-01 16:51:05
6541170   2021-06-10 21:08:23
827216    2016-04-10 20:40:50
503265    2019-06-03 10:42:15
4525988   2020-06-05 00:59:52
5769934   2018-10-06 21:03:00
2849480   2020-09-04 20:03:21
1010424   2020-01-23 07:48:02
4775143   2018-06-03 17:17:59
5435890   2015-11-26 16:23:20
3102685   2019-04-26 10:28:01
6226479   2020-12-28 15:26:32
6396955   2021-01-28 18:17:39
4008377   2017-04-25 14:15:19
1958280   2019-12-30 09:32:33
5330872   2016-01-14 13:41:43
5190845   2015-12-18 05:18:07
1015439   2015-01-07 17:15:41
5494682   2018-04-20 17:05:18
5482143   2016-02-11 19:56:38
Name: Call Received, dtype: datetime64[ns]

#### 'Complaint Number'

* I am not interested in the specific number, just whether or not an incident was generated, so I'll update this to a simple Boolean flag

In [25]:
samp_df['Complaint Number'].isna().value_counts()

True     60129
False     5572
Name: Complaint Number, dtype: int64

In [32]:
def complaint_number_clean(num):
    if num == null:
        return 0
    else:
        return 1

In [33]:
samp_df['generated_incident_yn'] = samp_df['Complaint Number'].apply(complaint_number_clean)

NameError: name 'Null' is not defined

In [31]:
# check the function
samp_df['generated_incident'].value_counts()

1    65701
Name: generated_incident, dtype: int64

## EDA

In [None]:
main_df.columns

In [None]:
main_df.dtypes

In [None]:
for col in cols:
    print(f'Column name: {col}')
    print(main_df[col].head(10))
    print('\n*******\n')