This notebook is aiming to tease out the causes and trends among crashes in Chicago, answering questions such as: <br>
    • When and where are crashes most likely? <br>
    • What types of crashes are most likely? <br>
    • Do you see any actionable insights for the CTA? <br>
    
The data for this effort is sourced from Chicago Data Portal [Traffic Crashes](https://data.cityofchicago.org/Transportation/Traffic-Crashes-Crashes/85ca-t3if/data). We'll use the API endpoint to retrieve the data. Lastly we will track code changes and host this code on github. This will ensure source control but also ease of access for private and public individuals.

In [39]:
#API Endpoint to the data
#https://data.cityofchicago.org/resource/85ca-t3if.json
import requests
import json
import pandas as pd


# This uses API v2.1+ where $limit is unlimited, so setting the limit to the total amount of rows shown on website.
# paging and limits are shown here ---> https://dev.socrata.com/docs/paging.html
response_API = requests.get("https://data.cityofchicago.org/resource/85ca-t3if.json?$limit=785112")
data = response_API.text

# default limit of 1000, used online for demo'ing
# response_API = requests.get("https://data.cityofchicago.org/resource/85ca-t3if.json")


In [35]:
crashDF = pd.read_json(data)
crashDF.head()

Unnamed: 0,crash_record_id,crash_date,posted_speed_limit,traffic_control_device,device_condition,weather_condition,lighting_condition,first_crash_type,trafficway_type,alignment,...,statements_taken_i,photos_taken_i,hit_and_run_i,crash_date_est_i,dooring_i,private_property_i,work_zone_i,work_zone_type,workers_present_i,lane_cnt
0,724341ca371050eedda726f0024170e43515dc8d81fabc...,2023-11-30T22:47:00.000,35,TRAFFIC SIGNAL,UNKNOWN,CLEAR,"DARKNESS, LIGHTED ROAD",REAR END,OTHER,STRAIGHT AND LEVEL,...,,,,,,,,,,
1,794f8a802e46f575dfeb359e6738e18a55ccf8a3fe0e0c...,2023-11-30T22:41:00.000,30,NO CONTROLS,NO CONTROLS,CLEAR,"DARKNESS, LIGHTED ROAD",PARKED MOTOR VEHICLE,NOT DIVIDED,STRAIGHT AND LEVEL,...,,,,,,,,,,
2,c505dc5fb28c4f43bceb38b4816e848c9f229760e197f9...,2023-11-30T21:56:00.000,30,NO CONTROLS,NO CONTROLS,CLEAR,"DARKNESS, LIGHTED ROAD",PARKED MOTOR VEHICLE,ALLEY,STRAIGHT AND LEVEL,...,,,,,,,,,,
3,8dc590a25ce4e823d09b12f5ee03c74c12316d371f191b...,2023-11-30T21:03:00.000,30,NO CONTROLS,NO CONTROLS,RAIN,"DARKNESS, LIGHTED ROAD",PARKED MOTOR VEHICLE,NOT DIVIDED,STRAIGHT AND LEVEL,...,Y,,,,,,,,,
4,19472fa501c5b4d4f099009a2ab0c9de2f24b32fe0a9ed...,2023-11-30T20:30:00.000,30,TRAFFIC SIGNAL,FUNCTIONING PROPERLY,UNKNOWN,"DARKNESS, LIGHTED ROAD",TURNING,FOUR WAY,STRAIGHT AND LEVEL,...,,Y,,,,,,,,


In [None]:
crashDF.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 785112 entries, 0 to 785111
Data columns (total 49 columns):
 #   Column                         Non-Null Count   Dtype  
---  ------                         --------------   -----  
 0   crash_record_id                785112 non-null  object 
 1   crash_date                     785112 non-null  object 
 2   posted_speed_limit             785112 non-null  int64  
 3   traffic_control_device         785112 non-null  object 
 4   device_condition               785112 non-null  object 
 5   weather_condition              785112 non-null  object 
 6   lighting_condition             785112 non-null  object 
 7   first_crash_type               785112 non-null  object 
 8   trafficway_type                785112 non-null  object 
 9   alignment                      785112 non-null  object 
 10  roadway_surface_cond           785112 non-null  object 
 11  road_defect                    785112 non-null  object 
 12  report_type                   

In [40]:
crashDF.tail()

Unnamed: 0,crash_record_id,crash_date,posted_speed_limit,traffic_control_device,device_condition,weather_condition,lighting_condition,first_crash_type,trafficway_type,alignment,...,statements_taken_i,photos_taken_i,hit_and_run_i,crash_date_est_i,dooring_i,private_property_i,work_zone_i,work_zone_type,workers_present_i,lane_cnt
785107,1d0232afecbdfd01968555aa956a688fd6f55a2bd1984f...,2014-02-24T19:45:00.000,30,TRAFFIC SIGNAL,FUNCTIONING PROPERLY,RAIN,DAYLIGHT,TURNING,NOT DIVIDED,STRAIGHT AND LEVEL,...,,,,,,,,,,
785108,957783a4787318f005a7dbc920e4c84cb9ac8aa7329a62...,2014-01-21T07:40:00.000,30,YIELD,NO CONTROLS,CLEAR,DAYLIGHT,ANGLE,DIVIDED - W/MEDIAN (NOT RAISED),STRAIGHT AND LEVEL,...,,,,,,,,,,
785109,f62e27317feb174811cf4fefeb9fa1064fea6c0619a873...,2014-01-18T18:14:00.000,30,NO CONTROLS,NO CONTROLS,CLEAR,DUSK,PARKED MOTOR VEHICLE,DIVIDED - W/MEDIAN BARRIER,STRAIGHT AND LEVEL,...,,,,,,,,,,
785110,19fb5af681f833c2af85734245f737fa6fbe62ac1ea379...,2013-06-01T20:29:00.000,30,NO CONTROLS,NO CONTROLS,CLEAR,"DARKNESS, LIGHTED ROAD",PEDESTRIAN,NOT DIVIDED,STRAIGHT AND LEVEL,...,,,,,,,,,,2.0
785111,a802658be15312809c771559e4f81088cfb226830792a5...,2013-03-03T16:48:00.000,30,TRAFFIC SIGNAL,FUNCTIONING PROPERLY,CLEAR,DAYLIGHT,SIDESWIPE OPPOSITE DIRECTION,NOT DIVIDED,STRAIGHT AND LEVEL,...,,,Y,,,,,,,2.0


In [42]:
from bokeh.models import ColumnDataSource, GMapOptions
from bokeh.plotting import gmap, show

map_options = GMapOptions(lat=41.885300, lng=-87.642320, map_type="roadmap", zoom=6)

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
p = gmap("", map_options, title="Chicago")

source = ColumnDataSource(
    data=dict(lat=[ 30.29,  30.20,  30.29],
              lon=[-97.70, -97.74, -97.78]),
)

p.circle(x="lon", y="lat", size=1, fill_color="blue", fill_alpha=0.8, source=source)

show(p)

In [None]:
longitude
location

In [5]:
json.loads(data)

[{'crash_record_id': '060ecd0e6925f19aed51fea3adef98de6c7f0435266ad1346bd6d6c31df6d8b50e629410ed7c856314b6503476016f265a701034683c1ef4f4b8c058eee83843',
  'crash_date': '2023-11-29T22:35:00.000',
  'posted_speed_limit': '30',
  'traffic_control_device': 'NO CONTROLS',
  'device_condition': 'NO CONTROLS',
  'weather_condition': 'CLEAR',
  'lighting_condition': 'DUSK',
  'first_crash_type': 'ANIMAL',
  'trafficway_type': 'NOT DIVIDED',
  'alignment': 'STRAIGHT AND LEVEL',
  'roadway_surface_cond': 'DRY',
  'road_defect': 'NO DEFECTS',
  'report_type': 'NOT ON SCENE (DESK REPORT)',
  'crash_type': 'NO INJURY / DRIVE AWAY',
  'damage': '$501 - $1,500',
  'date_police_notified': '2023-11-29T23:30:00.000',
  'prim_contributory_cause': 'ANIMAL',
  'sec_contributory_cause': 'ANIMAL',
  'street_no': '6212',
  'street_direction': 'N',
  'street_name': 'CENTRAL AVE',
  'beat_of_occurrence': '1621',
  'num_units': '1',
  'most_severe_injury': 'NO INDICATION OF INJURY',
  'injuries_total': '0',
  '

In [43]:
play = crashDF[:10]

In [48]:
cords = dict(lon = play["longitude"], lat = play["latitude"])

In [52]:


map_options = GMapOptions(lat=41.885300, lng=-87.642320, map_type="roadmap", zoom=20)

# For GMaps to function, Google requires you obtain and enable an API key:
#
#     https://developers.google.com/maps/documentation/javascript/get-api-key
#
# Replace the value below with your personal API key:
p = gmap("AIzaSyCEC95GOkQ0C9tp3JuoRHFmUkBnJCOaNmU", map_options, title="Chicago")

source = ColumnDataSource(
     dict(lon = play["longitude"], lat = play["latitude"])
)

p.circle(x="lon", y="lat", size=15, fill_color="blue", fill_alpha=0.8, source=source)

show(p)