In [109]:
import streamlit as st
import pandas as pd
import folium
from streamlit_folium import folium_static
import datetime
import plotly.express as px

In [85]:
# Load the data
df = pd.read_csv('MVCollisionDataset.csv')
print(df.head)


<bound method NDFrame.head of                DATE   TIME    BOROUGH  ZIP CODE   LATITUDE  LONGITUDE  \
0        09-11-2021  02:39        NaN       NaN        NaN        NaN   
1        03/26/2022  11:45        NaN       NaN        NaN        NaN   
2        06/29/2022  06:55        NaN       NaN        NaN        NaN   
3        09-11-2021  09:35   BROOKLYN   11208.0  40.667202 -73.866500   
4        12/14/2021  08:13   BROOKLYN   11233.0  40.683304 -73.917274   
...             ...    ...        ...       ...        ...        ...   
1048570  09-07-2016  20:10        NaN       NaN  40.715813 -73.940025   
1048571  09-02-2016  13:00        NaN       NaN  40.681446 -73.946440   
1048572  09-04-2016  05:00        NaN       NaN  40.810333 -73.953640   
1048573  09-10-2016  18:39  MANHATTAN   10011.0  40.746490 -74.001335   
1048574  08/22/2016  15:30     QUEENS   11414.0  40.665504 -73.856120   

                        LOCATION                    ON STREET NAME  \
0                      

In [86]:
# Set page title and layout
st.set_page_config(page_title="NYC Motor Vehicle Collisions", page_icon=":car:", layout="wide")

In [87]:
print(df.columns)

Index(['DATE', 'TIME', 'BOROUGH', 'ZIP CODE', 'LATITUDE', 'LONGITUDE',
       'LOCATION', 'ON STREET NAME', 'CROSS STREET NAME', 'OFF STREET NAME',
       'NUMBER OF PERSONS INJURED', 'NUMBER OF PERSONS KILLED',
       'NUMBER OF PEDESTRIANS INJURED', 'NUMBER OF PEDESTRIANS KILLED',
       'NUMBER OF CYCLIST INJURED', 'NUMBER OF CYCLIST KILLED',
       'NUMBER OF MOTORIST INJURED', 'NUMBER OF MOTORIST KILLED',
       'CONTRIBUTING FACTOR VEHICLE 1', 'CONTRIBUTING FACTOR VEHICLE 2',
       'CONTRIBUTING FACTOR VEHICLE 3', 'CONTRIBUTING FACTOR VEHICLE 4',
       'CONTRIBUTING FACTOR VEHICLE 5', 'COLLISION_ID', 'VEHICLE TYPE CODE 1',
       'VEHICLE TYPE CODE 2', 'VEHICLE TYPE CODE 3', 'VEHICLE TYPE CODE 4',
       'VEHICLE TYPE CODE 5'],
      dtype='object')


In [102]:
# Set up the sidebar
st.sidebar.title("Filters")
time_range = st.sidebar.slider(
    "Time range",
    min_value=pd.to_datetime(df['TIME'], format='%H:%M').min().time(),
    max_value=pd.to_datetime(df['TIME'], format='%H:%M').max().time(),
)
time_range_str = (str(time_range.hour).zfill(2) + ':' + str(time_range.minute).zfill(2),
                  str(time_range.hour).zfill(2) + ':' + str(time_range.minute + 1).zfill(2))
filtered_data = df[
    (pd.to_datetime(df['TIME'], format='%H:%M').dt.strftime('%H:%M').between(*time_range_str))
]


In [105]:
# Show the map
st.header("Motor Vehicle Collisions in NYC")
st.subheader("Map")
st.markdown("Filtered by date range and borough, if selected")
st.map(filtered_data[['LATITUDE', 'LONGITUDE']].dropna(how="any"))

DeltaGenerator()

In [118]:
def plot_collisions_by_borough(data):
    st.write("Number of Collisions by Borough")
    collisions_by_borough = data["BOROUGH"].value_counts().reset_index()
    collisions_by_borough.columns = ["Borough", "Number of Collisions"]
    collisions_by_borough = collisions_by_borough.sort_values(by="Number of Collisions", ascending=False)
    fig = px.bar(collisions_by_borough, x="Borough", y="Number of Collisions")
    st.plotly_chart(fig)

In [131]:
def plot_collision_map(data):
    st.write("Location of Collisions")
    tooltip_cols = ["DATE", "TIME", "BOROUGH", "ZIP CODE", "LATITUDE", "LONGITUDE", "NUMBER OF PERSONS INJURED", "NUMBER OF PERSONS KILLED", "CONTRIBUTING FACTOR VEHICLE 1"]
    fig = px.scatter_mapbox(
        data,
        lat="LATITUDE",
        lon="LONGITUDE",
        hover_name="DATE",
        hover_data=tooltip_cols,
        color_discrete_sequence=["red"],
        zoom=9,
        height=600,
    )
    fig.update_layout(mapbox_style="open-street-map")
    st.plotly_chart(fig)

In [132]:

def main():
    st.set_page_config(page_title="Motor Vehicle Collisions in NYC", page_icon=":car:", layout="wide")
    st.title("Motor Vehicle Collisions in NYC")

    plot_collisions_by_borough(df)


    plot_collision_map(df)


if __name__ == "__main__":
    main()

In [107]:
# Show the data table
st.subheader("Data Table")
st.markdown("Filtered by date range and borough, if selected")
st.dataframe(filtered_data[['DATE','TIME', 'BOROUGH', 'ZIP CODE', 'LATITUDE', 'LONGITUDE']])

DeltaGenerator()