In [1]:
import pandas as pd
import json
import numpy as np

# Import Data

In [2]:
# Import the leon people 2019 Q2 csv with pandas
df = pd.read_csv('../quarterly-tranches/leon-people-2019-q2.csv')
df.head(3)

Unnamed: 0,report_number,crash_year,role,person_number,home_zip,age,sex,injury_severity,vehicle_number,driver_distraction_code,...,driver_action_3,driver_action_4,non_motorist_description_code,location_code,action_prior_to_crash_code,action_code1,action_code2,crash_date_time,latitude,longitude
0,87505408,2019,D,1,32305,36.0,2.0,1.0,1.0,1.0,...,,,,,,,,2019-04-24T14:55:00Z,30.456964,-84.366811
1,87505410,2019,D,1,32303,67.0,1.0,1.0,1.0,88.0,...,,,,,,,,2019-04-25T16:10:00Z,,
2,89140109,2019,D,2,32312,40.0,2.0,1.0,2.0,1.0,...,,,,,,,,2019-04-11T13:05:00Z,30.445048,-84.261891



What do we want our first map to look like

    What data do we want to display?
        Accidents
        Injury severity
        Pedestrians & Cyclists
        Location

What do we want to keep

* report_number
* crash_year
* role
* person_number
* injury_severity
* vehicle_number
* crash_date_time
* latitude
* longitude
* non_motorist_description_code

In [3]:
# Filter the dataframe to only include the columns we want
crashes = df[["report_number", "crash_year", "role", "person_number", "injury_severity", "vehicle_number",  "non_motorist_description_code", "crash_date_time", "latitude", "longitude" ]]
crashes.head(3)

Unnamed: 0,report_number,crash_year,role,person_number,injury_severity,vehicle_number,non_motorist_description_code,crash_date_time,latitude,longitude
0,87505408,2019,D,1,1.0,1.0,,2019-04-24T14:55:00Z,30.456964,-84.366811
1,87505410,2019,D,1,1.0,1.0,,2019-04-25T16:10:00Z,,
2,89140109,2019,D,2,1.0,2.0,,2019-04-11T13:05:00Z,30.445048,-84.261891


In [4]:
# sort by report number
crashes_sorted = crashes.sort_values(by=['report_number']).reset_index(drop=True)
crashes_sorted.head(3)

Unnamed: 0,report_number,crash_year,role,person_number,injury_severity,vehicle_number,non_motorist_description_code,crash_date_time,latitude,longitude
0,82014362,2019,P,3,1.0,1.0,,2019-06-07T11:10:00Z,30.48687,-84.159208
1,82014362,2019,P,2,1.0,1.0,,2019-06-07T11:10:00Z,30.48687,-84.159208
2,82014362,2019,D,1,1.0,1.0,,2019-06-07T11:10:00Z,30.48687,-84.159208


Export to CSV

In [5]:
# export to csv
crashes_sorted.to_csv('../quarterly-tranches/processed/leon-people-2019-q2.csv', index=False)

In [12]:
def create_geojson_features(grouped_data):
    features = []
    crash_type_map = {
        None: "MOTOR VEHICLE",
        1: "PEDESTRIAN",
        3: "BICYCLIST",
    }

    for report_number, rows in grouped_data.items():
        # Extract latitude, longitude, and shared properties
        lat = rows[0]['latitude']
        long = rows[0]['longitude']

        shared_properties = {
            'crash_year': rows[0]['crash_year'],
            'crash_date_time': rows[0]['crash_date_time'],
        }
        
        # Extract details and additional properties
        is_fatal = any(row.get('injury_severity') == 5 for row in rows)
        crash_types_set = set(
            crash_type_map.get(row.get('non_motorist_description_code'), "MOTOR VEHICLE")
            for row in rows
        )
        crash_types = list(crash_types_set)


        # Determine crash_type
        if "PEDESTRIAN" in crash_types:
            crash_type = "Pedestrian"
        elif "BICYCLIST" in crash_types:
            crash_type = "Bicyclist"
        else:
            crash_type = "Motor Vehicle"

        # Calculate Vehicles Involved and People Involved
        vehicles_involved = max(
            (
                int(row.get('vehicle_number'))
                for row in rows
                if row.get('vehicle_number') is not None and not (
                    isinstance(row.get('vehicle_number'), float) and np.isnan(row.get('vehicle_number'))
                )
            ),
            default=0
        )
        people_involved = max(
            (row.get('person_number') for row in rows if row.get('person_number') is not None), 
            default=0
        )
        
        # Extract details
        details = [
            {
                'role': row['role'],
                'person_number': row['person_number'],
                'injury_severity': None if isinstance(row.get('injury_severity'), float) and np.isnan(row.get('injury_severity')) else row.get('injury_severity'),
                'vehicle_number': None if isinstance(row.get('vehicle_number'), float) and np.isnan(row.get('vehicle_number')) else row.get('vehicle_number'),
                'non_motorist_description_code': None if isinstance(row.get('non_motorist_description_code'), float) and np.isnan(row.get('non_motorist_description_code')) else row.get('non_motorist_description_code')
            }
            for row in rows
        ]
        
        # Create a GeoJSON feature
        feature = {
            "type": "Feature",
            "id": report_number,
            "geometry": {
                "type": "Point",
                "coordinates": [long, lat]
            },
            "properties": {
                **shared_properties,
                "details": details,
                "is_fatal": is_fatal,
                "crash_types": crash_types,
                "crash_type": crash_type,
                "vehicles_involved": vehicles_involved,
                "people_involved": people_involved,
            }
        }
        features.append(feature)
    return features

# Extract unique rows by report_number and convert to GeoJSON format


In [13]:
# Filter out rows with missing latitude or longitude
filtered_data = crashes_sorted.dropna(subset=['latitude', 'longitude'])

# Regroup data by 'report_number' after filtering
filtered_grouped_data = (
    filtered_data.groupby('report_number')
    .apply(lambda x: x.to_dict(orient='records'))
    .to_dict()
)

# Convert the grouped data to JSON
grouped_json = json.dumps(filtered_grouped_data, indent=4)


geojson_features = create_geojson_features(json.loads(grouped_json))
geojson_data = {
    "type": "FeatureCollection",
    "features": geojson_features
}


  .apply(lambda x: x.to_dict(orient='records'))


Export to CSV

In [14]:
# export to csv

# Define the file path and name for the GeoJSON file
geojson_file_path = '../quarterly-tranches/processed/leon-people-2019-q2.geojson'

# Write the GeoJSON data to the file
with open(geojson_file_path, "w") as geojson_file:
    json.dump(geojson_data, geojson_file, indent=4)