-
Notifications
You must be signed in to change notification settings - Fork 0
/
streamlit_app.py
87 lines (63 loc) · 3.32 KB
/
streamlit_app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import glob
import os
import pandas as pd
import streamlit as st
st.set_page_config(page_title="Motor Vehicle Collisions in NYC", page_icon='🚓',
layout='centered', initial_sidebar_state='collapsed')
@st.cache_resource
def get_local_feather_files():
list_of_files = glob.glob('*.feather')
files_with_size = [(file_path, os.stat(file_path).st_size) for file_path in list_of_files]
df = pd.DataFrame(files_with_size)
df.columns = ['File Name', 'File Size in KBytes']
df['File Size in KBytes'] = (df['File Size in KBytes'] / 1024).astype(int)
return df
@st.cache_resource
def load_data():
data = pd.read_feather('crashes.feather')
data.drop(columns=['index'], inplace=True)
# drop rows with no lat/long values:
data.dropna(subset=['latitude', 'longitude'], inplace=True)
# drop rows outside bounding box of NYC:
data = data[(data['latitude'] > 40.0) & (data['latitude'] < 42.0) &
(data['longitude'] > -76.0) & (data['longitude'] < -70.0)]
return data
@st.cache_data(show_spinner=False)
def query_data_by_persons_injured(injured_people):
return data.query(f'number_of_persons_injured >= {injured_people}')[["latitude", "longitude"]].dropna(how="any")
@st.cache_data(show_spinner=False)
def get_query_persons_string(selection: str):
return f'number_of_{selection.lower().split()[0]}_{selection.lower().split()[1]}'
@st.cache_data(show_spinner=False)
def filter_data_by_type_of_people(type_of_people, amount=8):
return data[(data[type_of_people] > 0)][['on_street_name', 'off_street_name', type_of_people]].sort_values(
by=[type_of_people], ascending=False).dropna(thresh=2).fillna('')[:amount]
@st.cache_resource
def get_all_contributing_factors():
contrib_cols = [col for col in data.columns if col.startswith('contributing_factor')]
contrib_sum = data[contrib_cols].apply(pd.Series.value_counts).fillna(0).sum(axis=1).sort_values(ascending=False).astype(int).to_frame()
contrib_sum.index.rename('Contributing Factors', inplace=True)
contrib_sum.columns = ['Frequency of mention']
return contrib_sum
st.title("Motor Vehicle Collisions in NYC")
st.markdown("This application is a Streamlit Demo App to test Git-LFS on Streamlit Cloud.")
st.subheader("All local feather files found")
st.table(get_local_feather_files())
with st.spinner("Loading data..."):
data = load_data()
st.subheader("Where are the most people injured in NYC")
max_injured_people = int(data['number_of_persons_injured'].max())
injured_people = st.slider("Number of persons injured in vehicle collisions", 0, max_injured_people)
data_by_persons_injured = query_data_by_persons_injured(injured_people)
st.map(data=data_by_persons_injured)
st.subheader("Top 5 dangerous streets by affected type")
select = st.selectbox('Affected type of people injured or killed',
['Persons Injured', 'Persons Killed', 'Pedestrians Injured', 'Pedestrians Killed',
'Cyclist Injured', 'Cyclist Killed', 'Motorist Injured', 'Motorist Killed',])
st.table(filter_data_by_type_of_people(get_query_persons_string(select)))
st.subheader("Contributing Factors for accidents")
st.table(get_all_contributing_factors())
st.subheader("Show Raw Data")
if st.checkbox("Show Raw Data", False):
st.subheader('Raw Data')
st.write(data.head(100).fillna('')) # limit to first 100 rows