# Pulling Collision Data

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pandas as pd
import requests
from io import StringIO

In [3]:
base_url = "https://ckan0.cf.opendata.inter.prod-toronto.ca"
package_url = f"{base_url}/api/3/action/package_show"
params = {
    "id": ("motor-vehicle-collisions-involving-killed-or-"
           "seriously-injured-persons")
}
package = requests.get(package_url, params=params).json()

In [4]:
package

{'help': 'https://ckan0.cf.opendata.inter.prod-toronto.ca/api/3/action/help_show?name=package_show',
 'success': True,
 'result': {'author': 'Analytics.Innovation@torontopolice.on.ca',
  'author_email': 'Analytics.Innovation@torontopolice.on.ca',
  'creator_user_id': '329e1506-b545-4fc7-a4ea-e614f220eea7',
  'dataset_category': 'Map',
  'date_published': '2020-08-18 14:14:31.515915',
  'excerpt': 'This dataset includes all traffic collision events where a person was either Killed or Seriously Injured (KSI) from 2006.',
  'formats': ['CSV', 'SHP', 'GEOJSON', 'GPKG'],
  'id': '0b6d3a00-7de1-440b-b47c-7252fd13929f',
  'information_url': 'https://data.torontopolice.on.ca/datasets/ksi',
  'is_retired': False,
  'isopen': False,
  'last_refreshed': '2024-10-15 17:18:20.899444',
  'license_id': 'notspecified',
  'license_title': 'License not specified',
  'limitations': 'This dataset includes all traffic collisions events where a person was either Killed or Seriously Injured (KSI) since 2006.

In [5]:
package['result']

{'author': 'Analytics.Innovation@torontopolice.on.ca',
 'author_email': 'Analytics.Innovation@torontopolice.on.ca',
 'creator_user_id': '329e1506-b545-4fc7-a4ea-e614f220eea7',
 'dataset_category': 'Map',
 'date_published': '2020-08-18 14:14:31.515915',
 'excerpt': 'This dataset includes all traffic collision events where a person was either Killed or Seriously Injured (KSI) from 2006.',
 'formats': ['CSV', 'SHP', 'GEOJSON', 'GPKG'],
 'id': '0b6d3a00-7de1-440b-b47c-7252fd13929f',
 'information_url': 'https://data.torontopolice.on.ca/datasets/ksi',
 'is_retired': False,
 'isopen': False,
 'last_refreshed': '2024-10-15 17:18:20.899444',
 'license_id': 'notspecified',
 'license_title': 'License not specified',
 'limitations': 'This dataset includes all traffic collisions events where a person was either Killed or Seriously Injured (KSI) since 2006.  The location of crime occurrences have been deliberately offset to the nearest road intersection node to protect the privacy of parties involv

In [6]:
package['result']['resources']

[{'cache_last_updated': None,
  'cache_url': None,
  'created': '2023-01-27T21:54:17.491298',
  'datastore_active': True,
  'datastore_cache': {'CSV': {'4326': 'fdb2834f-3a92-41dd-b098-fbd84acb9cfe',
    '2952': '7ab6c611-e56d-44ad-b457-288169d59f89'},
   'SHP': {'4326': 'c5ad6fba-46b2-40e8-a8d6-b6e4df7328c8',
    '2952': 'fddc5066-fe94-4c1f-a39e-8bcecd947312'},
   'GPKG': {'4326': '1de0f8b3-c29b-473b-bd4a-d6af4c9d3376',
    '2952': 'eb56a12f-fe43-4856-bbfb-eb6e2ec29530'},
   'GEOJSON': {'4326': '355d4464-eb3c-4780-af79-43dd533ae906'}},
  'datastore_cache_last_update': '2025-03-27T15:46:20.183848',
  'extract_job': 'Airflow - files_to_datastore.py - motor-vehicle-collisions-involving-killed-or-seriously-injured-persons',
  'format': 'GeoJSON',
  'hash': '',
  'id': 'c9b88f1f-863e-42f1-ada0-2c09b1e2eaa4',
  'is_preview': True,
  'last_modified': None,
  'metadata_modified': '2025-03-27T15:46:20.364163',
  'mimetype': None,
  'mimetype_inner': None,
  'name': 'Motor Vehicle Collisions wi

In [7]:
resource = package['result']['resources'][0]
url = f"{base_url}/datastore/dump/{resource['id']}"
resource_dump_data = requests.get(url).text

In [8]:
resource_dump_data

'_id,ACCNUM,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,geometry\r\n1,893184,2006-01-01,236,WOODBINE AVE,O CONNOR DR,None,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,50 to 54,Major,None,None,None,None,None,None,None,None,None,None,None,None,None,None,Yes,None,None,None,None,Yes,Yes,Yes,None,Yes,None,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,"{""type"": ""Point"", ""coordinates"": [-79.318797000266, 43.6995950001932]}"\n2,893184,2006-01-01,236,WOODBINE AVE,O CONNOR DR,None,Major Arterial,Toronto and East York,Intersection Relate

In [9]:
collision_data = pd.read_csv(StringIO(resource_dump_data))

In [10]:
collision_data.head()

Unnamed: 0,_id,ACCNUM,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,geometry
0,1,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,50 to 54,Major,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,"{""type"": ""Point"", ""coordinates"": [-79.31879700..."
1,2,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,15 to 19,Minor,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,"{""type"": ""Point"", ""coordinates"": [-79.31879700..."
2,3,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Driver,55 to 59,Minor,,North,"Automobile, Station Wagon",Going Ahead,Driving Properly,Normal,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,"{""type"": ""Point"", ""coordinates"": [-79.31879700..."
3,4,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,20 to 24,Minor,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,"{""type"": ""Point"", ""coordinates"": [-79.31879700..."
4,5,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,15 to 19,Minor,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,"{""type"": ""Point"", ""coordinates"": [-79.31879700..."


### Convert to Geodataframe

In [11]:
import geopandas as gpd
from shapely.geometry import shape
import json

In [12]:
collision_data['geometry']

0        {"type": "Point", "coordinates": [-79.31879700...
1        {"type": "Point", "coordinates": [-79.31879700...
2        {"type": "Point", "coordinates": [-79.31879700...
3        {"type": "Point", "coordinates": [-79.31879700...
4        {"type": "Point", "coordinates": [-79.31879700...
                               ...                        
18952    {"type": "Point", "coordinates": [-79.25145999...
18953    {"type": "Point", "coordinates": [-79.43190299...
18954    {"type": "Point", "coordinates": [-79.43190299...
18955    {"type": "Point", "coordinates": [-79.29027899...
18956    {"type": "Point", "coordinates": [-79.29027899...
Name: geometry, Length: 18957, dtype: object

In [13]:
collision_data['geometry'] = collision_data['geometry'].apply(lambda x: shape(json.loads(x)).wkt)

In [14]:
collision_data['geometry']

0         POINT (-79.318797000266 43.6995950001932)
1         POINT (-79.318797000266 43.6995950001932)
2         POINT (-79.318797000266 43.6995950001932)
3         POINT (-79.318797000266 43.6995950001932)
4         POINT (-79.318797000266 43.6995950001932)
                            ...                    
18952    POINT (-79.2514599999887 43.7721509996963)
18953    POINT (-79.4319029996452 43.6508470000693)
18954    POINT (-79.4319029996452 43.6508470000693)
18955    POINT (-79.2902789998551 43.7007889999252)
18956    POINT (-79.2902789998551 43.7007889999252)
Name: geometry, Length: 18957, dtype: object

In [15]:
collision_data['geometry'] = gpd.GeoSeries.from_wkt(collision_data['geometry'])

In [16]:
collision_data['geometry']

0          POINT (-79.3188 43.6996)
1          POINT (-79.3188 43.6996)
2          POINT (-79.3188 43.6996)
3          POINT (-79.3188 43.6996)
4          POINT (-79.3188 43.6996)
                    ...            
18952    POINT (-79.25146 43.77215)
18953     POINT (-79.4319 43.65085)
18954     POINT (-79.4319 43.65085)
18955    POINT (-79.29028 43.70079)
18956    POINT (-79.29028 43.70079)
Name: geometry, Length: 18957, dtype: geometry

In [17]:
collision_geodata = gpd.GeoDataFrame(
    collision_data,
    crs='EPSG:4326',
    geometry='geometry'
)

In [18]:
collision_geodata.head()

Unnamed: 0,_id,ACCNUM,DATE,TIME,STREET1,STREET2,OFFSET,ROAD_CLASS,DISTRICT,ACCLOC,TRAFFCTL,VISIBILITY,LIGHT,RDSFCOND,ACCLASS,IMPACTYPE,INVTYPE,INVAGE,INJURY,FATAL_NO,INITDIR,VEHTYPE,MANOEUVER,DRIVACT,DRIVCOND,PEDTYPE,PEDACT,PEDCOND,CYCLISTYPE,CYCACT,CYCCOND,PEDESTRIAN,CYCLIST,AUTOMOBILE,MOTORCYCLE,TRUCK,TRSN_CITY_VEH,EMERG_VEH,PASSENGER,SPEEDING,AG_DRIV,REDLIGHT,ALCOHOL,DISABILITY,HOOD_158,NEIGHBOURHOOD_158,HOOD_140,NEIGHBOURHOOD_140,DIVISION,geometry
0,1,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,50 to 54,Major,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,POINT (-79.3188 43.6996)
1,2,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,15 to 19,Minor,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,POINT (-79.3188 43.6996)
2,3,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Driver,55 to 59,Minor,,North,"Automobile, Station Wagon",Going Ahead,Driving Properly,Normal,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,POINT (-79.3188 43.6996)
3,4,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,20 to 24,Minor,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,POINT (-79.3188 43.6996)
4,5,893184.0,2006-01-01,236,WOODBINE AVE,O CONNOR DR,,Major Arterial,Toronto and East York,Intersection Related,No Control,Clear,Dark,Wet,Non-Fatal Injury,Approaching,Passenger,15 to 19,Minor,,,,,,,,,,,,,,,Yes,,,,,Yes,Yes,Yes,,Yes,,60,Woodbine-Lumsden,60,Woodbine-Lumsden (60),D55,POINT (-79.3188 43.6996)
