# Does the Time of Day Affect the Frequency of Stops Made?

In [2]:
import pandas as pd
import numpy as np
import datetime

In [146]:
df = pd.read_csv('ct_hartford.csv')
print(f'The data set has {df.shape[0]} rows and {df.shape[1]} columns' )

The data set has 18439 rows and 26 columns


## Deciding which columns to keep

In [147]:
print(df.columns)

Index(['raw_row_number', 'date', 'time', 'location', 'lat', 'lng', 'district',
       'subject_age', 'subject_race', 'subject_sex', 'officer_id_hash',
       'department_name', 'type', 'arrest_made', 'citation_issued',
       'search_vehicle', 'search_basis', 'reason_for_stop',
       'raw_subject_race_code', 'raw_subject_ethnicity_code',
       'raw_search_authorization_code', 'raw_intervention_disposition_code'],
      dtype='object')


In [148]:
columns_to_keep = ['date', 'time', 'reason_for_stop', 'arrest_made']

## Using keyword arguments

In [149]:
df.drop(columns=df.columns.difference(columns_to_keep), inplace=True)
print(df)

             date      time arrest_made         reason_for_stop
0      2013-10-13  15:21:00       False               Stop Sign
1      2013-10-24  01:12:00       False        Defective Lights
2      2013-10-26  10:06:00       False  Traffic Control Signal
3      2013-10-26  18:06:00       False  Traffic Control Signal
4      2013-10-26  19:56:00       False               Stop Sign
...           ...       ...         ...                     ...
18434  2016-09-29  16:10:00       False              Cell Phone
18435  2016-09-29  17:13:00       False               Stop Sign
18436  2016-09-29  18:30:00       False  Administrative Offense
18437  2016-09-29  20:09:00        True        Moving Violation
18438  2016-09-29  22:29:00       False           Speed Related

[18439 rows x 4 columns]


## Change the order of the columns

In [150]:
df = df[columns_to_keep]
print(df)

             date      time         reason_for_stop arrest_made
0      2013-10-13  15:21:00               Stop Sign       False
1      2013-10-24  01:12:00        Defective Lights       False
2      2013-10-26  10:06:00  Traffic Control Signal       False
3      2013-10-26  18:06:00  Traffic Control Signal       False
4      2013-10-26  19:56:00               Stop Sign       False
...           ...       ...                     ...         ...
18434  2016-09-29  16:10:00              Cell Phone       False
18435  2016-09-29  17:13:00               Stop Sign       False
18436  2016-09-29  18:30:00  Administrative Offense       False
18437  2016-09-29  20:09:00        Moving Violation        True
18438  2016-09-29  22:29:00           Speed Related       False

[18439 rows x 4 columns]


In [153]:
empty_value_count = df.isnull().sum()
print(empty_value_count)

date               0
time               0
reason_for_stop    0
arrest_made        0
dtype: int64


## Dropping rows with no value

In [154]:
df = df.dropna()
print(df)

             date      time         reason_for_stop arrest_made
0      2013-10-13  15:21:00               Stop Sign       False
1      2013-10-24  01:12:00        Defective Lights       False
2      2013-10-26  10:06:00  Traffic Control Signal       False
3      2013-10-26  18:06:00  Traffic Control Signal       False
4      2013-10-26  19:56:00               Stop Sign       False
...           ...       ...                     ...         ...
18434  2016-09-29  16:10:00              Cell Phone       False
18435  2016-09-29  17:13:00               Stop Sign       False
18436  2016-09-29  18:30:00  Administrative Offense       False
18437  2016-09-29  20:09:00        Moving Violation        True
18438  2016-09-29  22:29:00           Speed Related       False

[18430 rows x 4 columns]


## Percentage of arrests made

In [155]:
num_true_rows = (df['arrest_made'] == True).sum()
print(f'{round(num_true_rows/18430 * 100, 2)}%')

3.87%


## Sorting according to the time of the day

In [156]:
df['time'] = pd.to_datetime(df['time'],format= '%H:%M:%S' ).dt.time
print(df)

             date      time         reason_for_stop arrest_made
0      2013-10-13  15:21:00               Stop Sign       False
1      2013-10-24  01:12:00        Defective Lights       False
2      2013-10-26  10:06:00  Traffic Control Signal       False
3      2013-10-26  18:06:00  Traffic Control Signal       False
4      2013-10-26  19:56:00               Stop Sign       False
...           ...       ...                     ...         ...
18434  2016-09-29  16:10:00              Cell Phone       False
18435  2016-09-29  17:13:00               Stop Sign       False
18436  2016-09-29  18:30:00  Administrative Offense       False
18437  2016-09-29  20:09:00        Moving Violation        True
18438  2016-09-29  22:29:00           Speed Related       False

[18430 rows x 4 columns]


In [13]:
print(df.dtypes)

date               object
time               object
reason_for_stop    object
arrest_made        object
dtype: object
