In [115]:
# set up libraries
import numpy as np
import pandas as pd
from matplotlib import pyplot as plt
from math import radians, sin, cos, sqrt, atan2

%matplotlib inline

In [116]:
# set up datasets
road_crash = pd.read_csv("data/ACT_Road_Crash_Data.csv")
speed_cameras = pd.read_csv("data/Traffic_speed_camera_locations.csv")

In [117]:
# remove all rows with Unknown or nan

road_crash = road_crash[road_crash.SUBURB_LOCATION.notnull()]
road_crash = road_crash.drop(road_crash[road_crash['LIGHTING_CONDITION'] == 'Unknown'].index)
road_crash = road_crash.drop(road_crash[road_crash['ROAD_CONDITION'] == 'Unknown'].index)
road_crash = road_crash.drop(road_crash[road_crash['WEATHER_CONDITION'] == 'Unknown'].index)

In [118]:
# remove unecessary columns

road_crash = road_crash.drop('LONGITUDE', axis=1)
road_crash = road_crash.drop('LATITUDE', axis=1)
road_crash = road_crash.drop('MIDBLOCK', axis=1)
road_crash = road_crash.drop('CRASH_DIRECTION', axis=1)

In [119]:
# convert CRASH_DATE and CRASH_TIME to datetime
road_crash["CRASH_DATE"] = pd.to_datetime(road_crash["CRASH_DATE"], 
    format = "%d/%m/%Y", 
    errors = "coerce")
road_crash["CRASH_TIME"] = pd.to_datetime(road_crash["CRASH_TIME"], 
    format = "%H:%M", 
    errors = "coerce")

# extract month, dayofweek, and hour
road_crash["CRASH_DATE_month"] = road_crash["CRASH_DATE"].dt.month
road_crash["CRASH_DATE_dayofweek"] = road_crash["CRASH_DATE"].dt.dayofweek
road_crash["CRASH_TIME_hour"] = road_crash["CRASH_TIME"].dt.hour

# drop CRASH_DATE and CRASH_TIME
road_crash = road_crash.drop('CRASH_DATE', axis=1)
road_crash = road_crash.drop('CRASH_TIME', axis=1)

In [120]:
road_crash

Unnamed: 0,CRASH_ID,SUBURB_LOCATION,INTERSECTION,CRASH_TYPE,CRASH_SEVERITY,LIGHTING_CONDITION,ROAD_CONDITION,WEATHER_CONDITION,Location,CRASH_DATE_month,CRASH_DATE_dayofweek,CRASH_TIME_hour
0,1025691,KAMBAH,NO,Struck object (Off Road),Injury,Daylight,Good dry surface,Fine,"(-35.392006654135606, 149.05505612950844)",1,4,9
1,1039779,PHILLIP,YES,Struck pedestrian (On Road),Injury,Daylight,Good dry surface,Fine,"(-35.34892137433122, 149.0844465057644)",4,3,17
2,1048742,HALL,NO,Struck object (Off Road),Injury,Daylight,Good dry surface,Fine,"(-35.172862707340954, 149.0614313347401)",10,6,11
3,1041937,BELCONNEN,YES,Rear end collision,Property Damage Only,Daylight,Good dry surface,Fine,"(-35.201315499751615, 149.09407196860244)",8,1,17
4,1048339,HARRISON,YES,Rear end collision,Injury,Daylight,Good dry surface,Fine,"(-35.19730828458103, 149.15639997910796)",11,3,14
...,...,...,...,...,...,...,...,...,...,...,...,...
71791,1052228,CRACE,NO,Other - Vehicle to Vehicle,Property Damage Only,Daylight,Good dry surface,Fine,"(-35.20863373403501, 149.06267307593114)",12,3,15
71792,1225463,RURAL - BELCONNEN,NO,Struck animal (not ridden On Road),Property Damage Only,Daylight,Good dry surface,Fine,"(-35.20863373403501, 149.06267307593114)",3,0,7
71793,1243579,RED HILL,NO,Other - Vehicle to Vehicle,Property Damage Only,Daylight,Good dry surface,Fine,"(-35.34137565927604, 149.1313181181877)",4,0,10
71794,1247671,GUNGAHLIN,YES,Rear end collision,Property Damage Only,Dark - good street lighting,Good dry surface,Fine,"(-35.182638434932166, 149.12926943569062)",5,2,17


In [121]:
# encoding categorical variables with one-hot encoding
road_crash = pd.get_dummies(road_crash, columns=["SUBURB_LOCATION", "CRASH_TYPE", "LIGHTING_CONDITION", "ROAD_CONDITION", "WEATHER_CONDITION"],)

# replace YES / NO values in INTERSECTION with 1 / 0
road_crash.INTERSECTION.replace(('YES', 'NO'), (1, 0), inplace=True)

In [122]:
road_crash

Unnamed: 0,CRASH_ID,INTERSECTION,CRASH_SEVERITY,Location,CRASH_DATE_month,CRASH_DATE_dayofweek,CRASH_TIME_hour,SUBURB_LOCATION_ACTON,SUBURB_LOCATION_AINSLIE,SUBURB_LOCATION_AMAROO,...,ROAD_CONDITION_Snow or ice,ROAD_CONDITION_Wet surface,WEATHER_CONDITION_Cloudy or Overcast,WEATHER_CONDITION_Fine,WEATHER_CONDITION_Fog,WEATHER_CONDITION_Heavy rain,WEATHER_CONDITION_Light rain,WEATHER_CONDITION_Other,WEATHER_CONDITION_Smoke or dust,WEATHER_CONDITION_Snow or sleet
0,1025691,0,Injury,"(-35.392006654135606, 149.05505612950844)",1,4,9,0,0,0,...,0,0,0,1,0,0,0,0,0,0
1,1039779,1,Injury,"(-35.34892137433122, 149.0844465057644)",4,3,17,0,0,0,...,0,0,0,1,0,0,0,0,0,0
2,1048742,0,Injury,"(-35.172862707340954, 149.0614313347401)",10,6,11,0,0,0,...,0,0,0,1,0,0,0,0,0,0
3,1041937,1,Property Damage Only,"(-35.201315499751615, 149.09407196860244)",8,1,17,0,0,0,...,0,0,0,1,0,0,0,0,0,0
4,1048339,1,Injury,"(-35.19730828458103, 149.15639997910796)",11,3,14,0,0,0,...,0,0,0,1,0,0,0,0,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
71791,1052228,0,Property Damage Only,"(-35.20863373403501, 149.06267307593114)",12,3,15,0,0,0,...,0,0,0,1,0,0,0,0,0,0
71792,1225463,0,Property Damage Only,"(-35.20863373403501, 149.06267307593114)",3,0,7,0,0,0,...,0,0,0,1,0,0,0,0,0,0
71793,1243579,0,Property Damage Only,"(-35.34137565927604, 149.1313181181877)",4,0,10,0,0,0,...,0,0,0,1,0,0,0,0,0,0
71794,1247671,1,Property Damage Only,"(-35.182638434932166, 149.12926943569062)",5,2,17,0,0,0,...,0,0,0,1,0,0,0,0,0,0


In [123]:
# Convert a string to 2 floats for latitude and longitude pair
def convert_to_float(points):
    if(type(points) == float):
        return None
    points_list = points.replace('(', '').replace(')', '').split(', ')
    result = [float(i) for i in points_list]
    return result

# Find the closest camera and its distance from a "crash" point
def find_closest(crash, cameras):
    # Convert latitude and longitude of crash location to radians
    crash_lat, crash_lon = map(radians, crash)

    # Initialize variables for closest location and distance
    closest_location = None
    closest_distance = float('inf')

    # Iterate through each location
    for camera in cameras:
        # Convert latitude and longitude of camera to radians
        if(convert_to_float(camera) == None):
            continue
        camera_lat, camera_lon = map(radians, convert_to_float(camera))

        # Haversine formula to calculate distance between two points
        dlat = camera_lat - crash_lat
        dlon = camera_lon - crash_lon
        a = sin(dlat/2)**2 + cos(crash_lat) * cos(camera_lat) * sin(dlon/2)**2
        c = 2 * atan2(sqrt(a), sqrt(1-a))
        distance = 6371 * c  # Earth radius in kilometers

        # Update closest location and distance if current location is closer
        if distance < closest_distance:
            closest_location = camera
            closest_distance = distance

    return closest_location, closest_distance

# Testing with data sets, can change the crash location for different crashes
cameras = speed_cameras['Location']
crash = road_crash['Location'][0]
crash = convert_to_float(crash)
print(crash)

closest_location, closest_distance = find_closest(crash, cameras)
print(f"The closest location to crash is {closest_location}, and the distance is {closest_distance:.2f} km")

[-35.392006654135606, 149.05505612950844]
The closest location to crash is (-35.38997, 149.05492), and the distance is 0.23 km


In [None]:
# Add column representing the closest distance to speed camera