# Accident Propensity Index Calculation v09

## Import libraries

In [9]:
# Import relevant libraries
import pandas as pd
import math
import numpy as np
import json

## Functions to calculate distances and find accidents on route - run once at start

In [10]:
# Define a function to calculate the distance between two points
def distance(point1, point2):
    lat1, lon1 = point1
    lat2, lon2 = point2
    km_per_lat = 110.574 # km per degree latitude
    km_per_lon = 111.320 # km per degree longitude at the equator
    dx = (lon2 - lon1) * km_per_lon * math.cos((lat1 + lat2) / 2)
    dy = (lat2 - lat1) * km_per_lat
    return math.sqrt(dx**2 + dy**2)

# Define a function to calculate the distance between a point and a line segment
def distance_to_segment(point, segment_start, segment_end):
    px, py = point
    x1, y1 = segment_start
    x2, y2 = segment_end
    dx, dy = x2 - x1, y2 - y1
    segment_length_squared = dx*dx + dy*dy
    if segment_length_squared == 0:
        return distance(point, segment_start)
    t = max(0, min(1, ((px - x1) * dx + (py - y1) * dy) / segment_length_squared))
    x = x1 + t * dx
    y = y1 + t * dy
    return distance(point, (x, y))

# Define a function to find accidents on a given route within a maximum distance
def find_accidents_on_route(start_point, end_point, all_relevant_accidents):
    # Maximal distance of accidents from route in kilometers
    max_distance = 0.05
    # Create a mask for accidents that are within the maximum distance from the route
    mask = all_relevant_accidents.apply(lambda row: distance_to_segment((row['Start_Lat'], row['Start_Lng']), start_point, end_point) <= max_distance, axis=1)

    # Return the accidents that match the mask
    accidents = all_relevant_accidents.loc[mask]
    return accidents

## Find accidents on route - run every time

In [13]:
def find_accidents():
    # load route data from json file
    with open('route_data.json') as f:
        json_data = json.load(f)

    # create DataFrame from loaded data
    route_data = pd.DataFrame(json_data)

    # load relevant accident data csv files and concatenate
    unique_values = route_data['route_lat'].astype(str).str[:2].unique().tolist()
    dfs = []
    for val in unique_values:
        file_path = f'new_data/accident_data_{val}.csv'
        df = pd.read_csv(file_path)
        dfs.append(df)
    all_relevant_accidents = pd.concat(dfs, ignore_index=True)

    # split the route DataFrame into 5 equally sized parts
    df_list = np.array_split(route_data, 5)

    # loop through the route DataFrames
    for i, df in enumerate(df_list):
        # Safe each split dataframe as a new route_df_{i} dataframe
        globals()[f"route_df_{i+1}"] = df
        # Create a new DataFrame to store the results
        accidents_df = pd.DataFrame()
        # Loop through the pairs of subsequent coordinates
        for j in range(len(df) - 1):
            start_point = j
            end_point = j + 1
            # Call the function and append the results to the accidents DataFrame
            accidents_df = accidents_df.append(find_accidents_on_route(df.iloc[start_point], df.iloc[end_point], all_relevant_accidents), ignore_index=True)
        # Drop duplicate rows from the accidents DataFrame
        accidents_df.drop_duplicates(inplace=True)
        
        # Assign a name to the accidents DataFrame based on the index of the original DataFrame
        df_index = i+1
        globals()[f'accidents_df_{df_index}'] = accidents_df

    # create a list of dataframes for the API calculation
    accidents_dfs = [accidents_df_1, accidents_df_2, accidents_df_3, accidents_df_4, accidents_df_5]
    api_dict = {}

    # loop through the list of dataframes and join each one with the all_accidents dataframe
    for i, df in enumerate(accidents_dfs, start=1):
        api = df["Severity"].sum() / 6035011
        api_dict[f'api_{i}'] = round(api, 8)


    # List of the segment dataframes, each containing the coordinates that are assigned to the respective segment
    route_dfs = [route_df_1, route_df_2, route_df_3, route_df_4, route_df_5]

    # Create a dictionary to store the JSON data
    json_dict = {}

    # Loop over the segments and add the API and route data to the JSON dictionary
    for i in range(5):
        segment_name = f"segment_{i+1}"
        json_dict[segment_name] = {}
        json_dict[segment_name]["api"] = api_dict[f"api_{i+1}"]
        json_dict[segment_name]["route"] = route_dfs[i].to_dict(orient="records")

    # Write the JSON data to a file
    with open("backend_output.json", "w") as f:
        json.dump(json_dict, f)

In [14]:
find_accidents()

  accidents_df = accidents_df.append(find_accidents_on_route(df.iloc[start_point], df.iloc[end_point], all_relevant_accidents), ignore_index=True)
  accidents_df = accidents_df.append(find_accidents_on_route(df.iloc[start_point], df.iloc[end_point], all_relevant_accidents), ignore_index=True)
  accidents_df = accidents_df.append(find_accidents_on_route(df.iloc[start_point], df.iloc[end_point], all_relevant_accidents), ignore_index=True)
  accidents_df = accidents_df.append(find_accidents_on_route(df.iloc[start_point], df.iloc[end_point], all_relevant_accidents), ignore_index=True)
  accidents_df = accidents_df.append(find_accidents_on_route(df.iloc[start_point], df.iloc[end_point], all_relevant_accidents), ignore_index=True)
