# Accident Propensity Index Calculation v03
Including efficiency features. Assumes that splines are not longer than around 1km since for each start point, only accidents within a radius of 1km are checked. This now takes 0.02 seconds. When using 10km radius, it takes .17 seconds. With 100km radius it thakes .36 seconds.

In [1]:
import pandas as pd
import math
import time
import folium

## Split accident data into buckets

In [2]:
raw_data = pd.read_csv('accident_locations.csv')
raw_data.head()

Unnamed: 0,ID,Start_Lat,Start_Lng
0,A-1,40.10891,-83.09286
1,A-2,39.86542,-84.0628
2,A-3,39.10266,-84.52468
3,A-4,41.06213,-81.53784
4,A-5,39.172393,-84.492792


In [3]:
def get_lat_id(lat):
    return int(lat*100)

def get_lng_id(lng):
    return int(lng*100)

raw_data["lat_id"] = raw_data["Start_Lat"].apply(get_lat_id)
raw_data["lng_id"] = raw_data["Start_Lng"].apply(get_lng_id)

raw_data.head(10)

Unnamed: 0,ID,Start_Lat,Start_Lng,lat_id,lng_id
0,A-1,40.10891,-83.09286,4010,-8309
1,A-2,39.86542,-84.0628,3986,-8406
2,A-3,39.10266,-84.52468,3910,-8452
3,A-4,41.06213,-81.53784,4106,-8153
4,A-5,39.172393,-84.492792,3917,-8449
5,A-6,39.06324,-84.03243,3906,-8403
6,A-7,39.77565,-84.18603,3977,-8418
7,A-8,41.37531,-81.82017,4137,-8182
8,A-9,40.702247,-84.075887,4070,-8407
9,A-10,40.10931,-82.96849,4010,-8296


In [4]:
groups = raw_data.groupby(['lat_id', 'lng_id'])

# Iterate over the groups and create individual dataframes
for name, group in groups:
    # Create the dataframe name
    df_name = f"accidents_{name[0]}_{name[1]}"
    
    # Create the dataframe
    vars()[df_name] = group.copy()

# Get a copy of all global variables
global_vars = globals().copy()

## Identify accidents close to the segmetn

In [5]:
# Define a function to calculate the distance between two points
def distance(point1, point2):
    lat1, lon1 = point1
    lat2, lon2 = point2
    km_per_lat = 110.574 # km per degree latitude
    km_per_lon = 111.320 # km per degree longitude at the equator
    dx = (lon2 - lon1) * km_per_lon * math.cos((lat1 + lat2) / 2)
    dy = (lat2 - lat1) * km_per_lat
    return math.sqrt(dx**2 + dy**2)

# Define a function to calculate the distance between a point and a line segment
def distance_to_segment(point, segment_start, segment_end):
    px, py = point
    x1, y1 = segment_start
    x2, y2 = segment_end
    dx, dy = x2 - x1, y2 - y1
    segment_length_squared = dx*dx + dy*dy
    if segment_length_squared == 0:
        return distance(point, segment_start)
    t = max(0, min(1, ((px - x1) * dx + (py - y1) * dy) / segment_length_squared))
    x = x1 + t * dx
    y = y1 + t * dy
    return distance(point, (x, y))

# Define a function to find accidents on a given route within a maximum distance
def find_accidents_on_route(start_point, end_point, max_distance):
    # Create a mask for accidents that are within the maximum distance from the route
    mask = data.apply(lambda row: distance_to_segment((row['Start_Lat'], row['Start_Lng']), start_point, end_point) <= max_distance, axis=1)

    # Return the accidents that match the mask
    return data.loc[mask]

In [6]:
# Route start and end point
start_point = (33.77299, -84.39020)
end_point = (33.790347, -84.391530)

# Extract the integer values of the start point lat and lng
start_lat = int(start_point[0]*100)
start_lng = int(start_point[1]*100)

# Get the dataframes that match the criteria
dfs_to_use = []
for lat_offset in [-1, 0, 1]:
    for lng_offset in [-1, 0, 1]:
        lat_id = start_lat + lat_offset
        lng_id = start_lng + lng_offset
        df_name = f"accidents_{lat_id}_{lng_id}"
        if df_name in global_vars and isinstance(global_vars[df_name], pd.DataFrame):
            dfs_to_use.append(global_vars[df_name])

# Concatenate the dataframes
combined_df = pd.concat(dfs_to_use)

# Reset the index of the combined dataframe
data = combined_df.reset_index(drop=True)

# Maximal distance of accidents from route in kilometers
max_distance = 0.05

In [7]:
# Run after entering the accident, route, and distance data
start_time = time.time()
accidents = find_accidents_on_route(start_point, end_point, max_distance)
print("Accidents on Route")
print(accidents)
print("\nAccidents off Route")
not_on_route = ~data['ID'].isin(accidents['ID'])
print(data[not_on_route])
end_time = time.time()
print("Time taken: {:.2f} seconds".format(end_time-start_time))

Accidents on Route
             ID  Start_Lat  Start_Lng  lat_id  lng_id
356     A-32296   33.77758  -84.39076    3377   -8439
357     A-32462   33.77758  -84.39076    3377   -8439
358     A-33764   33.77758  -84.39076    3377   -8439
359     A-34199   33.77758  -84.39076    3377   -8439
361     A-37983   33.77758  -84.39076    3377   -8439
...         ...        ...        ...     ...     ...
1209  A-2814038   33.78827  -84.39093    3378   -8439
1210  A-2832652   33.78821  -84.39149    3378   -8439
1211  A-2836706   33.78821  -84.39149    3378   -8439
1212  A-2837221   33.78153  -84.39121    3378   -8439
1213  A-2838350   33.78821  -84.39149    3378   -8439

[723 rows x 5 columns]

Accidents off Route
             ID  Start_Lat  Start_Lng  lat_id  lng_id
0     A-2229496  33.763460 -84.402930    3376   -8440
1       A-45581  33.768891 -84.390460    3376   -8439
2       A-46657  33.765810 -84.390370    3376   -8439
3       A-49172  33.768891 -84.390460    3376   -8439
4      A-218224  3

In [8]:
# create a map object centered at the mean latitude and longitude of the accidents
map_accidents = folium.Map(location=[accidents.Start_Lat.mean(), accidents.Start_Lng.mean()], zoom_start=10)

# add markers for each accident to the map
for index, row in accidents.iterrows():
    folium.Marker(location=[row['Start_Lat'], row['Start_Lng']],
                  popup=f"Accident ID: {row['ID']}",
                 ).add_to(map_accidents)

# add start point marker to the map
folium.Marker(location=[start_point[0], start_point[1]],
              icon=folium.Icon(color='green', icon='glyphicon-home'),
              popup='Start Point',
             ).add_to(map_accidents)

# add end point marker to the map
folium.Marker(location=[end_point[0], end_point[1]],
              icon=folium.Icon(color='red', icon='glyphicon-flag'),
              popup='End Point',
             ).add_to(map_accidents)

# display the map
map_accidents