### Description:

- script for processing un-automated datasets to compare with the tool (such as car crashes and traffic)

In [8]:
import osmnx as ox
import pandas as pd
from shapely.geometry import Point, Polygon
import geopandas as gpd
import os

def filter_crashes_within_area(data_file, place_name, output_file):
    """
    Reads crash data and saves entries located within the bounding area defined by osmnx's graph_from_place.

    Parameters:
        data_file (str): Path to the CSV file containing crash data.
        place_name (str): The place name to create the bounding area (e.g., "Fredericksburg, VA, USA").
        output_file (str): Path to save the filtered data.

    Returns:
        None: Saves the filtered data to a new CSV file.
    """

      # 1. Load the graph and convert it into a GeoDataFrame (bounding area)
    print("Loading graph for the specified place...")
    graph = ox.graph_from_place(place_name, network_type="drive")
    gdf_boundary = ox.geocode_to_gdf(place_name)
    print(gdf_boundary)
    
    # 2. Read the crash dataset
    print("Reading crash dataset...")
    df = pd.read_csv(data_file)

    # Check if the required columns (LAT and LON) exist
    if 'LAT' not in df.columns or 'LON' not in df.columns:
        print("Error: LAT and LON columns are missing in the dataset.")
        return

    # 3. Convert crash data to GeoDataFrame using LAT and LON
    print("Converting crash data to GeoDataFrame...")
    df['geometry'] = df.apply(lambda row: Point(row['LON'], row['LAT']), axis=1)
    gdf_crashes = gpd.GeoDataFrame(df, geometry='geometry', crs="EPSG:4326")

    # 4. Filter crashes that fall within the bounding area
    print("Filtering crashes within the area...")
    gdf_crashes_within = gdf_crashes[gdf_crashes.within(gdf_boundary.unary_union)]

    # 5. Save the filtered crashes to a new CSV file
    print(f"Saving filtered crashes to {output_file}...")
    gdf_crashes_within.drop(columns=['geometry'], inplace=True)  # Remove geometry column before saving
    gdf_crashes_within.to_csv(output_file, index=False)

    print("Process complete. Filtered data saved successfully.")



def main():
    
    place_name = "fredericksburg, VA, USA"
    simple_place_name = place_name.split(",")[0]

    os.makedirs(f"data/{simple_place_name}", exist_ok=True)

    data_file = f"data/{simple_place_name}/features (1).csv"
    output_file = f"data/{simple_place_name}/car_crashes_{simple_place_name}.csv"
    filter_crashes_within_area(data_file, place_name, output_file)



main()

Loading graph for the specified place...
                                            geometry  bbox_west  bbox_south  \
0  POLYGON ((-77.53259 38.30853, -77.53184 38.308...  -77.53259   38.270151   

   bbox_east  bbox_north   place_id  osm_type   osm_id        lat       lon  \
0 -77.446793   38.326638  320417545  relation  1633328  38.303184 -77.46054   

      class            type  place_rank  importance addresstype  \
0  boundary  administrative          12    0.555167        city   

             name                             display_name  
0  Fredericksburg  Fredericksburg, Virginia, United States  
Reading crash dataset...
Converting crash data to GeoDataFrame...
Filtering crashes within the area...


  gdf_crashes_within = gdf_crashes[gdf_crashes.within(gdf_boundary.unary_union)]


Saving filtered crashes to data/fredericksburg/car_crashes_fredericksburg.csv...
Process complete. Filtered data saved successfully.


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  gdf_crashes_within.drop(columns=['geometry'], inplace=True)  # Remove geometry column before saving


### Viewing Crashes

In [10]:
from sklearn.neighbors import BallTree
import numpy as np
import pandas as pd
def find_nearest_pano(crash_df, ball_tree, pano_df, threshold=25):
    """
    Finds the nearest panoramic point to each crash point within a given threshold.
    """
    crash_coords = np.radians(crash_df[['LAT', 'LON']].values)  # Convert to radians for latitude and longitude
    distances, indices = ball_tree.query(crash_coords, k=1)  # Query nearest neighbor

    # Convert distances from radians to meters (Earth's radius ~ 6371000 meters)
    distances_meters = distances[:, 0] * 6371000

    # Assign pano_id if within threshold
    crash_df['matched_pano_id'] = [
        pano_df.iloc[indices[i][0]]['pano_id'] if distances_meters[i] <= threshold else None
        for i in range(len(distances_meters))
    ]

    return crash_df

def get_car_crashes_with_panaramics():
    simple_name = "fredericksburg"
    base_directory = f"data/{simple_name}"

    # Load datasets
    panoramic_dataset = pd.read_csv(f"{base_directory}/{simple_name}_panoramic_data.csv")
    crash_dataset = pd.read_csv(f"{base_directory}/car_crashes_{simple_name}.csv")

    # Check the column names of the panoramic dataset
    print(panoramic_dataset.columns)  # Debug: Check actual column names

    # Assuming the columns are different, adjust based on the output.
    # If the actual names are 'lat' and 'long', modify the following line:
    pano_coords = np.radians(panoramic_dataset[['lat', 'long']].values)  # Adjust based on your column names

    # Build the BallTree
    ball_tree = BallTree(pano_coords, metric='haversine')
    # Match crash points to panoramic points
    result_df = find_nearest_pano(crash_dataset, ball_tree, panoramic_dataset, threshold=25)

    # Display the result for verification
    print(result_df.head())

    # Remove rows where matched_pano_id is None
    result_df = result_df.dropna(subset=['matched_pano_id'])

    # Save the result to a new CSV file
    result_df.to_csv(f"{base_directory}/car_crashes_with_panoramics_{simple_name}.csv", index=False)

# Call the function to execute
get_car_crashes_with_panaramics()


Index(['pano_id', 'segment_id', 'lat', 'long', 'heading', 'tilt', 'year',
       'month', 'segment_headings', 'segment_links', 'segment_heading_links',
       'segment_line_strings'],
      dtype='object')
   OBJECTID  Document Nbr Local Case CD  Crash Year  Crash Date  \
0        46     160915088           NaN        2016   3/30/2016   
1       278     163615034    2016007086        2016  12/23/2016   
2       305     161295070    2016002406        2016    5/6/2016   
3       372     162755056  DIV216103686        2016   9/29/2016   
4       404     160805191  DIV216027356        2016   3/19/2016   

   Crash Military Time             Crash Severity  K_People  A_People  \
0                  236  PDO. Property Damage Only         0         0   
1                 1724          B. Visible Injury         0         0   
2                 1132          B. Visible Injury         0         0   
3                  640  PDO. Property Damage Only         0         0   
4                 1433  PD

In [None]:
from geopy.distance import geodesic
import math
import osmnx as ox
import networkx as nx
import numpy as np
from shapely.geometry import Point
import pandas as pd
from shapely.geometry import Point, LineString


def convert_military_integer_to_time(military_int):
    # Ensure the military integer is a string with leading zeros if necessary
    military_time = str(military_int).zfill(4)

    # Extract hours and minutes
    hours = int(military_time[:len(military_time)-2]) if len(military_time) > 2 else 0
    minutes = int(military_time[len(military_time)-2:])

    # Return formatted time
    return hours,minutes

def calculate_sun_glare_for_crashes(location):
    simple_name = location.split(",")[0]
    base_directory = f"data/{simple_name}"
    graph = ox.graph_from_place(location, network_type="drive")

    # Load the crash data
    crash_data = pd.read_csv(f"{base_directory}/car_crashes_with_panoramics_{simple_name}.csv")
    panoramic_data = pd.read_csv(f"{base_directory}/{simple_name}_panoramic_data.csv")

    # iterate to find the sun glare for each crash
    for index, row in crash_data.iterrows():
        print(f"==={index}===")
        print(f"    matched_pano_id: {row['matched_pano_id']}")
        # Get the crash coordinates
        lat, long = row['y'], row['x']
        car_crash_date = row['Crash Date']
        month, day, year = car_crash_date.split("/")
        hour, minutes = convert_military_integer_to_time(row['Crash Military Time'])
        print(f"    {month}/{day}/{year} {hour}:{minutes}")
        
        

        matched_pano_id = row['matched_pano_id']
        panoramic_row = panoramic_data[matched_pano_id == panoramic_data['pano_id']].iloc[0]
        print(f"    panoramic_row: {panoramic_row}")




    # loop through all the crashes and see if there was sun 
    # save the results to a new CSV file
    # and save an html map with the crashes and 



def main():
    location = "fredericksburg, VA, USA"
    calculate_sun_glare_for_crashes(location)


main()


===0===
    matched_pano_id: CAoSLEFGMVFpcE03NlREd0RjcXhKdzdVZG1DSzV6R3VpUmEzMjdSMlpsQVhLVElx
    3/30/2016 2:36
    panoramic_row: pano_id                  CAoSLEFGMVFpcE03NlREd0RjcXhKdzdVZG1DSzV6R3VpUm...
segment_id                            38.30036085252513_-77.50899853804593
lat                                                              38.300361
long                                                            -77.508999
heading                                                           4.445854
tilt                                                              -3.14332
year                                                                  2024
month                                                                    9
segment_headings                                       [44.14601413975663]
segment_links                     ['38.30087822046671_-77.50835606431639']
segment_heading_links    {44.14601413975663: ['38.30087822046671_-77.50...
segment_line_strings     {'38.3008782204667