# The City of Calgary: Traffic Incidents and the Impacts on Residents

## Data Cleaning and Wrangling

In [38]:
# importing packages
import pandas as pd
import numpy as np
import geopandas as gpd

In [39]:
# importing the traffic incidents dataset
traffic_incidents_df = pd.read_csv("Traffic_Incidents_20240127.csv")
display(traffic_incidents_df.head())

Unnamed: 0,INCIDENT INFO,DESCRIPTION,START_DT,MODIFIED_DT,QUADRANT,Longitude,Latitude,Count,id,Point
0,Westbound 16 Avenue at Deerfoot Trail NE,Stalled vehicle. Partially blocking the right...,2022/06/21 07:31:40 AM,2022/06/21 07:33:16 AM,NE,-114.026687,51.067485,1,2022-06-21T07:31:4051.067485129276236-114.0266...,POINT (-114.02668672232672 51.067485129276236)
1,11 Avenue and 4 Street SW,Traffic incident. Blocking multiple lanes,2022/06/21 04:02:11 AM,2022/06/21 04:12:38 AM,SW,-114.071481,51.042624,1,2022-06-21T04:02:1151.04262449261462-114.07148...,POINT (-114.07148057660925 51.04262449261462)
2,68 Street and Memorial Drive E,Traffic incident.,2022/06/20 11:53:08 PM,2022/06/20 11:55:42 PM,NE,-113.935553,51.052474,1,2022-06-20T23:53:0851.0524735056658-113.935553...,POINT (-113.935553325751 51.0524735056658)
3,Eastbound 16 Avenue and 36 Street NE,Traffic incident. Blocking the left shoulder,2022/06/20 04:43:21 PM,2022/06/20 05:17:05 PM,NE,-113.989219,51.067086,1,2022-06-20T16:43:2151.06708565896752-113.98921...,POINT (-113.98921905311566 51.06708565896752)
4,Barlow Trail and 61 Avenue SE,Traffic incident.,2022/06/20 04:42:12 PM,2022/06/20 05:28:21 PM,SE,-113.985727,50.998727,1,2022-06-20T16:42:1250.99872748477766-113.98572...,POINT (-113.98572655353505 50.99872748477766)


In [40]:
# checking if all counts of traffic incidents are 1
traffic_incidents_df["Count"].unique()

array([1])

In [41]:
# dropping columns not needed
traffic_incidents_df.drop(columns = ["id", "Count", "Point"], inplace = True)

In [42]:
# checking for missing values in each column
display(traffic_incidents_df.isna().sum())

INCIDENT INFO        0
DESCRIPTION          2
START_DT             0
MODIFIED_DT      14057
QUADRANT         14059
Longitude            0
Latitude             0
dtype: int64

In [43]:
# dropping the columns with a significant number of missing values
traffic_incidents_df.drop(columns = ["MODIFIED_DT", "QUADRANT"], inplace = True)

In [44]:
# checking the datatypes of dataframes
display(traffic_incidents_df.dtypes)

INCIDENT INFO     object
DESCRIPTION       object
START_DT          object
Longitude        float64
Latitude         float64
dtype: object

In [45]:
# converting the START_DT column into a datetime object
traffic_incidents_df["START_DT"] = pd.to_datetime(traffic_incidents_df["START_DT"])

In [46]:
# filtering for all incidents before jan 1, 2024 12:00:00 am
traffic_incidents_df = traffic_incidents_df[traffic_incidents_df["START_DT"] < "2024-01-01 00:00:00"]

In [47]:
# changing column names
traffic_incidents_df.rename(columns = {"INCIDENT INFO":"Incident info", "DESCRIPTION":"Description", "START_DT":"Date"}, inplace = True)

In [48]:
# displaying the dataframe
display(traffic_incidents_df.head())

Unnamed: 0,Incident info,Description,Date,Longitude,Latitude
0,Westbound 16 Avenue at Deerfoot Trail NE,Stalled vehicle. Partially blocking the right...,2022-06-21 07:31:40,-114.026687,51.067485
1,11 Avenue and 4 Street SW,Traffic incident. Blocking multiple lanes,2022-06-21 04:02:11,-114.071481,51.042624
2,68 Street and Memorial Drive E,Traffic incident.,2022-06-20 23:53:08,-113.935553,51.052474
3,Eastbound 16 Avenue and 36 Street NE,Traffic incident. Blocking the left shoulder,2022-06-20 16:43:21,-113.989219,51.067086
4,Barlow Trail and 61 Avenue SE,Traffic incident.,2022-06-20 16:42:12,-113.985727,50.998727


In [49]:
# exporting the dataset as a csv
traffic_incidents_df.to_csv("Traffic_Incidents_Cleaned.csv", index = False)

In [50]:
# creating a geojson file from the cleaned dataset
# importing the cleaned dataset into geopandas
traffic_incidents_gdf = gpd.read_file("Traffic_Incidents_Cleaned.csv", crs = 'EPSG:4326')
# changing the empty geometry column to include the longitude and latitude values as coordinates
traffic_incidents_gdf['geometry'] = gpd.points_from_xy(traffic_incidents_gdf['Longitude'], traffic_incidents_gdf['Latitude'])
# dropping the original longitude and latitude columns
traffic_incidents_gdf = traffic_incidents_gdf.drop(columns=['Longitude', 'Latitude'])
# exporting the geodataframe as a geojson file
traffic_incidents_gdf.to_file("Traffic_Incidents_Cleaned.geojson", driver = 'GeoJSON')

### References

City of Calgary. (2024). Traffic Incidents: Open Calgary. https://data.calgary.ca/Transportation-Transit/Traffic-Incidents/35ra-9556/about_data