# Accident Propensity Index Calculation v15

Working with 0 and 1 values, only using 1 values for calculations, outputting both. also outputting hex color codes.

In [1]:
# Import relevant libraries
import pandas as pd
import math
import numpy as np
import json
import os.path

In [2]:
# Define a function to calculate the distance between two points
def distance(point1, point2):
    lat1, lon1 = point1
    lat2, lon2 = point2
    km_per_lat = 110.574
    km_per_lon = 111.320
    dx = (lon2 - lon1) * km_per_lon * math.cos((lat1 + lat2) / 2)
    dy = (lat2 - lat1) * km_per_lat
    return math.sqrt(dx**2 + dy**2)

# Define a function to calculate the distance between a point and a line segment
def distance_to_segment(point, segment_start, segment_end):
    px, py = point
    x1, y1 = segment_start
    x2, y2 = segment_end
    dx, dy = x2 - x1, y2 - y1
    segment_length_squared = dx*dx + dy*dy
    if segment_length_squared == 0:
        return distance(point, segment_start)
    t = max(0, min(1, ((px - x1) * dx + (py - y1) * dy) / segment_length_squared))
    x = x1 + t * dx
    y = y1 + t * dy
    return distance(point, (x, y))

# Define a function to find accidents on a given route within a maximum distance
def find_accidents_on_route(start_point, end_point, all_relevant_accidents):
    # Maximal distance of accidents from route in kilometers
    max_distance = 0.05
    # Create a mask for accidents that are within the maximum distance from the route
    mask = all_relevant_accidents.apply(lambda row: distance_to_segment((row['Start_Lat'], row['Start_Lng']), start_point, end_point) <= max_distance, axis=1)

    # Return the accidents that match the mask
    accidents = all_relevant_accidents.loc[mask]
    return accidents

In [8]:
def find_accidents(route_data):
    # load route data from json file
    with open(route_data) as f:
        json_data = json.load(f)

    # create DataFrame from loaded data & create DataFrame from list of dictionaries
    data_dict_list = [{"lat": item[0]["lat"], "lng": item[0]["lng"], "assign": item[1]} for item in json_data]
    route_data = pd.DataFrame(data_dict_list)

    # split the route DataFrame into 5 equally sized parts
    df_list = np.array_split(route_data, 5)

    # loop through the route DataFrames
    route_dict = {}
    accident_dict = {}
    for i, df in enumerate(df_list):
        

        
        # Safe each split dataframe as a new route_df_{i} dataframe
        route_dict[f"route_df_{i+1}"] = df

        # reduce data to needed columns
        route_data_assigned = df[df['assign'] == 1]
        route_data_lat_lng = route_data_assigned[['lat','lng']]


        # Create a new DataFrame to store the results
        accidents_df = pd.DataFrame()
        
        # Loop through the pairs of subsequent coordinates
        for j in range(len(route_data_lat_lng) - 1):
            start_point = j
            end_point = j + 1

            # Get relevant data
            point_lat = (route_data_lat_lng.iloc[end_point]['lat'] + route_data_lat_lng.iloc[start_point]['lat']) / 2
            point_lng = (route_data_lat_lng.iloc[end_point]['lng'] + route_data_lat_lng.iloc[start_point]['lng']) / 2
            dataset_id = np.char.add(np.char.add(np.char.mod('%s', point_lat.astype(str)[:4]), '_'), point_lng.astype(str)[:5])

            filename = f'data/ga_accidents_{dataset_id}.csv'
            if os.path.isfile(filename):
                all_relevant_accidents = pd.read_csv(filename)
                # Call the function and append the results to the accidents DataFrame
                accidents_df = pd.concat([accidents_df, find_accidents_on_route(route_data_lat_lng.iloc[start_point], route_data_lat_lng.iloc[end_point], all_relevant_accidents)], ignore_index=True)
            
        # Drop duplicate rows from the accidents DataFrame
        accidents_df.drop_duplicates(inplace=True)
        
        # Assign a name to the accidents DataFrame based on the index of the original DataFrame
        accident_dict[f"accidents_df_{i+1}"] = accidents_df

    # list of accident dataframes
    accidents_dfs = list(accident_dict.values())

    # create a list of dataframes for the API calculation & calculate api per segment
    api_dict = {}
    for i, df in enumerate(accidents_dfs, start=1):
        api = df["Severity"].sum() / 6035011
        api_dict[f'api_{i}'] = round(api, 8)

    # Converting API values into colors
    min_api_value = min(api_dict.values())
    max_api_value = max(api_dict.values())
    for key in api_dict:
        api_dict[key] = (api_dict[key] - min_api_value) / (max_api_value - min_api_value)
    def get_hex_color(value):
        # Convert a normalized value to a hex color code representing a gradient from green to red.
        r = int(255 * value)
        g = int(255 * (1 - value))
        b = 0
        return f'{r:02x}{g:02x}{b:02x}'
    for key in api_dict:
        api_dict[key] = get_hex_color(api_dict[key])

    # List of the segment dataframes, each containing the coordinates that are assigned to the respective segment
    route_dfs = list(route_dict.values())

    # Create a dictionary to store the JSON data
    json_dict = {}

    # Loop over the segments and add the API and route data to the JSON dictionary
    for i in range(5):
        segment_name = f"segment_{i+1}"
        json_dict[segment_name] = {}
        json_dict[segment_name]["api"] = api_dict[f"api_{i+1}"]
        json_dict[segment_name]["route"] = route_dfs[i][["lat", "lng"]].to_dict(orient="records")

    # Write the JSON data to a file
    with open("backend_output.json", "w") as f:
        json.dump(json_dict, f)

In [12]:
find_accidents('selection_coded_path_large_subset.json')