# Road Safety in San Diego County

In [1]:
!pip install geojson



In [2]:
#imports 

# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

import string

# Display plots directly in the notebook instead of in a new window
%matplotlib inline

In [3]:
# Configure libraries
# The seaborn library makes plots look nicer
sns.set()
sns.set_context('talk')

# Round decimals when displaying DataFrames
pd.set_option('precision', 2)

## Data frame management

We should figure out how we want to structure the data frames. 
Options include: 
- Merge into one df
- Find matching identifiers and try to merge data by these
- Work in completely independent dfs

In [10]:
df_police_stops = pd.read_csv("/Users/sarahamiraslani/Desktop/cogs108_final_project/datasets/police_stops/ripa_stops_datasd_v1.csv")
df_collisions = pd.read_csv("/Users/sarahamiraslani/Desktop/cogs108_final_project/datasets/traffic_collisions/pd_collisions_datasd_v1.csv")
df_police_calls = pd.read_csv("/Users/sarahamiraslani/Desktop/cogs108_final_project/datasets/police_calls/pd_calls_for_service_2015_datasd_v1.csv")
df_police_calls = pd.read_csv("/Users/sarahamiraslani/Desktop/cogs108_final_project/datasets/police_calls/pd_calls_for_service_2016_datasd_v1.csv")
df_police_calls = pd.read_csv("/Users/sarahamiraslani/Desktop/cogs108_final_project/datasets/police_calls/pd_calls_for_service_2017_datasd_v1.csv")
df_police_calls = pd.read_csv("/Users/sarahamiraslani/Desktop/cogs108_final_project/datasets/police_calls/pd_calls_for_service_2018_datasd.csv")
df_police_calls = pd.read_csv("/Users/sarahamiraslani/Desktop/cogs108_final_project/datasets/police_calls/pd_calls_for_service_2019_datasd.csv")

# DATA CLEANING

## df_collisions

In [11]:
# Splitting datetime for the datasets

# Splitting datetime for df_collisions
# Creating seperate columns for Date and Time
date_time = df_collisions['date_time'].str.split(' ', n=1, expand=True)
df_collisions = df_collisions.drop(['date_time'], axis=1)
df_collisions['date'] = date_time[0]
df_collisions['time'] = date_time[1]

In [12]:
# droping useless columns
drop_cols_collision = ['address_pd_primary','address_pd_intersecting', 'address_name_intersecting', 'address_sfx_intersecting','report_id']
df_collisions = df_collisions.drop(drop_cols_collision, axis = 1)


In [13]:
# Making everything lowercase

df_collisions['address_road_primary'] = df_collisions['address_road_primary'].str.lower()
df_collisions['address_sfx_primary'] = df_collisions['address_sfx_primary'].str.lower()
df_collisions['charge_desc'] = df_collisions['charge_desc'].str.lower()
df_collisions['hit_run_lvl'] = df_collisions['hit_run_lvl'].str.lower()

In [14]:
#stripping gets rid of extraneous spaces to allow for easier processing of data
df_collisions['address_road_primary'] = df_collisions.address_road_primary.str.strip()
df_collisions['address_sfx_primary'] = df_collisions.address_sfx_primary.str.strip()
df_collisions['violation_section']= df_collisions.violation_section.str.strip()
df_collisions['violation_type'] = df_collisions.violation_type.str.strip()
df_collisions['charge_desc'] = df_collisions.charge_desc.str.strip()
df_collisions['hit_run_lvl'] = df_collisions.hit_run_lvl.str.strip()

In [15]:
# convert the 'Date' column to datetime format 
df_collisions['date']= pd.to_datetime(df_collisions['date']) 
df_collisions['time']= pd.to_datetime(df_collisions['time']) 

In [16]:
df_collisions.dtypes

police_beat                        int64
address_number_primary             int64
address_road_primary              object
address_sfx_primary               object
violation_section                 object
violation_type                    object
charge_desc                       object
injured                            int64
killed                             int64
hit_run_lvl                       object
date                      datetime64[ns]
time                      datetime64[ns]
dtype: object

## df_police calls

In [17]:
# Change entries in col 'day_of_week' to represent the day of the week in words
df_police_calls['day_of_week'] = df_police_calls['day_of_week'].replace(1,'Monday')
df_police_calls['day_of_week'] = df_police_calls['day_of_week'].replace(2,'Tuesday')
df_police_calls['day_of_week'] = df_police_calls['day_of_week'].replace(3,'Wednesday')
df_police_calls['day_of_week'] = df_police_calls['day_of_week'].replace(4,'Thursday')
df_police_calls['day_of_week'] = df_police_calls['day_of_week'].replace(5,'Friday')
df_police_calls['day_of_week'] = df_police_calls['day_of_week'].replace(6,'Saturday')
df_police_calls['day_of_week'] = df_police_calls['day_of_week'].replace(7,'Sunday')

In [18]:
# Splitting datetime for df_police_calls
date_time = df_police_calls['date_time'].str.split(' ', n=1, expand=True)
df_police_calls = df_police_calls.drop(['date_time'], axis=1)
df_police_calls['date'] = date_time[0]
df_police_calls['time'] = date_time[1]

In [19]:
# dropping useless columns
drop_cols_calls = ['address_dir_primary', 'address_dir_intersecting','incident_num']

df_police_calls = df_police_calls.drop(drop_cols_calls,axis = 1)

In [20]:
# making everything lowercase
df_police_calls['address_road_primary']=df_police_calls['address_road_primary'].str.lower()
df_police_calls['address_road_intersecting']=df_police_calls['address_road_intersecting'].str.lower()
df_police_calls['address_sfx_primary'] = df_police_calls['address_sfx_primary'].str.lower()


In [21]:
# strip any extra spaces in string data
df_police_calls['address_road_primary']= df_police_calls.address_road_primary.str.strip()
df_police_calls['address_sfx_primary'] = df_police_calls.address_sfx_primary.str.strip()
df_police_calls['address_road_intersecting']= df_police_calls.address_road_intersecting.str.strip()
df_police_calls['call_type'] = df_police_calls.call_type.str.strip()
df_police_calls['disposition'] = df_police_calls.disposition.str.strip()


In [22]:
# convert the 'Date' column to datetime format 

df_police_calls['date']= pd.to_datetime(df_police_calls['date']) 
df_police_calls['time']= pd.to_datetime(df_police_calls['time']) 

In [23]:
df_police_calls.dtypes

day_of_week                          object
address_number_primary                int64
address_road_primary                 object
address_sfx_primary                  object
address_road_intersecting            object
address_sfx_intersecting            float64
call_type                            object
disposition                          object
beat                                  int64
priority                              int64
date                         datetime64[ns]
time                         datetime64[ns]
dtype: object

## df_police_stops

In [24]:
drop_cols_stops = ['isstudent', 'gend_nc','agency','stop_id']
df_police_stops = df_police_stops.drop(drop_cols_stops, axis = 1)

In [25]:
# making everything lowercase

df_police_stops['address_city'] = df_police_stops['address_city'].str.lower()


We can probably merge **address_road_primary** and **address_sfx_primary**

In [26]:
df_police_stops.dtypes

ori                           object
exp_years                      int64
date_stop                     object
time_stop                     object
stopduration                   int64
stop_in_response_to_cfs        int64
officer_assignment_key         int64
assignment                    object
intersection                  object
address_block                float64
land_mark                     object
address_street                object
highway_exit                  object
isschool                       int64
school_name                   object
address_city                  object
beat                           int64
beat_name                     object
pid                            int64
perceived_limited_english      int64
perceived_age                  int64
perceived_gender              object
gender_nonconforming           int64
gend                           int64
perceived_lgbt                object
dtype: object

In [27]:
df_collisions.head()

Unnamed: 0,police_beat,address_number_primary,address_road_primary,address_sfx_primary,violation_section,violation_type,charge_desc,injured,killed,hit_run_lvl,date,time
0,935,5500,valerio,trail,MISC-HAZ,VC,miscellaneous hazardous violations of the vehi...,0,0,misdemeanor,2017-01-01,2019-11-05 00:01:00
1,322,6400,crawford,street,MISC-HAZ,VC,miscellaneous hazardous violations of the vehi...,0,0,misdemeanor,2017-01-01,2019-11-05 00:01:00
2,124,8300,cam del oro,,MISC-HAZ,VC,miscellaneous hazardous violations of the vehi...,0,0,misdemeanor,2017-01-01,2019-11-05 00:01:00
3,325,8100,royal gorge,drive,22107,VC,turning movements and required signals,0,0,misdemeanor,2017-01-01,2019-11-05 00:01:00
4,521,1000,11th,avenue,22107,VC,turning movements and required signals,0,0,misdemeanor,2017-01-01,2019-11-05 01:00:00


visualization tools: https://medium.com/@stallonejacob/d3-in-juypter-notebook-685d6dca75c8

## Geospatial tools
- Geojson: https://pypi.org/project/geojson/
    - https://www.datacamp.com/community/tutorials/geospatial-data-python
- GeoPandas: http://geopandas.org/