In [1]:
import pandas as pd
import json
import numpy as np

# Import Data

In [None]:
# Import the leon people 2021 to 2021 Q2 csv with pandas
crashes = pd.read_csv('../leon-county-crshes-2021-q2/2021-q2_202412251115.csv')
crashes.head(3)

In [None]:
# sort by report number
crashes_sorted = crashes.sort_values(by=['report_number']).reset_index(drop=True)
crashes_sorted.head(3)

In [4]:
def create_geojson_features(grouped_data):
    features = []
    crash_type_map = {
        None: "MOTOR VEHICLE",
        1: "PEDESTRIAN",
        3: "BICYCLIST",
    }

    for report_number, rows in grouped_data.items():
        # Extract latitude, longitude, and shared properties
        lat = rows[0]['latitude']
        long = rows[0]['longitude']

        # Skip if latitude or longitude is missing
        if lat is None or long is None:
            continue

        shared_properties = {
            'crash_year': rows[0]['crash_year'],
            'crash_date_time': rows[0]['crash_date_time'],
        }
        
        # Extract details and additional properties
        is_fatal = any(row.get('injury_severity') == 5 for row in rows)
        crash_types_set = set(
            crash_type_map.get(row.get('non_motorist_description_code'), "MOTOR VEHICLE")
            for row in rows
        )
        crash_types = list(crash_types_set)


        # Determine crash_type
        if "PEDESTRIAN" in crash_types:
            crash_type = "Pedestrian"
        elif "BICYCLIST" in crash_types:
            crash_type = "Bicyclist"
        else:
            crash_type = "Motor Vehicle"

        # Calculate Vehicles Involved and People Involved
        # Use total_number_of_vehicles and total_number_of_persons
        total_number_of_vehicles = rows[0]['total_number_of_vehicles']
        total_number_of_persons = rows[0]['total_number_of_persons']

        
        # # Extract details
        # details = [
        #     {
        #         'role': row['role'],
        #         'person_number': row['person_number'],
        #         'injury_severity': None if isinstance(row.get('injury_severity'), float) and np.isnan(row.get('injury_severity')) else row.get('injury_severity'),
        #         'vehicle_number': None if isinstance(row.get('vehicle_number'), float) and np.isnan(row.get('vehicle_number')) else row.get('vehicle_number'),
        #         'non_motorist_description_code': None if isinstance(row.get('non_motorist_description_code'), float) and np.isnan(row.get('non_motorist_description_code')) else row.get('non_motorist_description_code')
        #     }
        #     for row in rows
        # ]
        
        # Create a GeoJSON feature
        feature = {
            "type": "Feature",
            "id": int(report_number),
            "geometry": {
                "type": "Point",
                "coordinates": [long, lat]
            },
            "properties": {
                **shared_properties,
                # "details": details,
                "report_number": int(report_number),
                "is_fatal": is_fatal,
                "crash_types": crash_types,
                "crash_type": crash_type,
                "vehicles_involved": total_number_of_vehicles,
                "people_involved": total_number_of_persons,
            }
        }
        features.append(feature)
    return features

# Extract unique rows by report_number and convert to GeoJSON format


In [None]:
# Filter out rows with missing latitude or longitude
filtered_data = crashes_sorted.dropna(subset=['latitude', 'longitude'])

# Regroup data by 'report_number' after filtering
filtered_grouped_data = (
    filtered_data.groupby('report_number')
    .apply(lambda x: x.to_dict(orient='records'))
    .to_dict()
)

# Convert the grouped data to JSON
grouped_json = json.dumps(filtered_grouped_data, indent=4)


geojson_features = create_geojson_features(json.loads(grouped_json))
geojson_data = {
    "type": "FeatureCollection",
    "features": geojson_features
}


Export to CSV

In [6]:
# export to csv

# Define the file path and name for the GeoJSON file
geojson_file_path = '../leon-county-crshes-2021-q2/leon-county-2021-to-2021-q2.geojson'

# Write the GeoJSON data to the file
with open(geojson_file_path, "w") as geojson_file:
    json.dump(geojson_data, geojson_file, indent=4)