In [18]:
import streamlit as st
import pandas as pd
import altair as alt
import folium
from folium.plugins import MarkerCluster


In [19]:

# Page title
st.set_page_config(page_title='Permit Data Exploration', page_icon='📊')
st.title('📊 Permit Data Exploration')

with st.expander('About this app'):
    st.markdown('**What can this app do?**')
    st.info('Interact with the permit dataset/Parquet files and explore each column.')
    st.markdown('**How to use the app?**')
    st.warning('This is a prototype, functionality will be added as we develop further.')

st.subheader('Explore Permit Data by Construction Type and State')

# Load data
df50 = pd.read_parquet('data/csv_reveal-gc-2020-50.parquet')

# Input widgets
## Construction Type selection
const_type_list = df50.CONST_TYPE.unique()
const_type_selection = st.multiselect('Select Construction Types', const_type_list, const_type_list[:3])

## State selection
state_list = df50.SITE_STATE.unique()
state_selection = st.multiselect('Select States', state_list, state_list[:3])


In [30]:

# Filter data based on selections
df_selection = df50[df50.CONST_TYPE.isin(const_type_selection) & df50.SITE_STATE.isin(state_selection)]

# Display DataFrame
st.dataframe(df_selection)

# Pivot table to aggregate data
reshaped_df = df_selection.pivot_table(index='SITE_STATE', columns='CONST_TYPE', values='PERMITID', aggfunc='count', fill_value=0)
reshaped_df = reshaped_df.sort_values(by='SITE_STATE', ascending=False)

# Display reshaped DataFrame
st.subheader('Aggregated Data by State and Construction Type')
st.dataframe(reshaped_df)

# Prepare data for chart
df_chart = reshaped_df.reset_index().melt(id_vars='SITE_STATE', var_name='CONST_TYPE', value_name='COUNT')

# Display chart
chart = alt.Chart(df_chart).mark_bar().encode(
            x=alt.X('SITE_STATE:N', title='State'),
            y=alt.Y('COUNT:Q', title='Permit Count'),
            color='CONST_TYPE:N'
            ).properties(height=320)
st.altair_chart(chart, use_container_width=True)


DeltaGenerator()

In [31]:
print(df_selection)

       PERMITID       PRJ_STATUS                      CONST_TYPE  \
0      17855172  Approved Permit         Res Rmdl, Addn, Int Fin   
1      17858630  Approved Permit             Single Family Homes   
2      17855104  Approved Permit         Res Rmdl, Addn, Int Fin   
3      17855372  Approved Permit  Alternative Residential Energy   
4      17824051  Approved Permit         Res Rmdl, Addn, Int Fin   
...         ...              ...                             ...   
46204  17836441  Approved Permit  Alternative Residential Energy   
46211  17826013  Approved Permit  Alternative Residential Energy   
46214  17849863  Approved Permit  Alternative Residential Energy   
46221  17864971  Approved Permit  Alternative Residential Energy   
46231  17826992  Approved Permit  Alternative Residential Energy   

                    SITE_AREA SITE_STATE            SITE_CNTY  \
0                 Connecticut         CT     Fairfield County   
1               Orange County         CA        Orang

In [23]:

# Map visualization
st.subheader('Permit Locations Map')

# Clean data for map visualization
df_selection = df_selection.dropna(subset=['SITE_LAT', 'SITE_LONG'])

df_selection = df_selection[df_selection['SITE_LAT'].apply(lambda x: str(x).replace('.', '', 1).isdigit())]

df_selection = df_selection[df_selection['SITE_LONG'].apply(lambda x: str(x).replace('.', '', 1).isdigit())]

df_selection['SITE_LAT1'] = df_selection['SITE_LAT'].astype(float)

df_selection['SITE_LONG1'] = df_selection['SITE_LONG'].astype(float)


# # Create a map centered on the average latitude and longitude
if not df_selection.empty:
    map_center = [df_selection['SITE_LAT1'].mean(), df_selection['SITE_LONG1'].mean()]
    m = folium.Map(location=map_center, zoom_start=5)

    # Add marker cluster to the map
    marker_cluster = MarkerCluster().add_to(m)

    # Add points to the map
    for idx, row in df_selection.iterrows():
        folium.Marker(
            location=[row['SITE_LAT'], row['SITE_LONG1']],
            popup=row['SITE_ADDRS']
        ).add_to(marker_cluster)

    # Save the map as an HTML file
    map_path = 'map.html'
    m.save(map_path)

    # Display the map in Streamlit
    with open(map_path, 'r') as f:
        html_map = f.read()
    st.components.v1.html(html_map, width=700, height=500)
else:
    st.write("No valid coordinates available for mapping.")

# Optional: Add more charts and statistics if necessary
# Example: Construction Type Distribution
st.subheader('Construction Type Distribution')
const_type_dist = df_selection['CONST_TYPE'].value_counts().reset_index()
const_type_dist.columns = ['CONST_TYPE', 'COUNT']

chart2 = alt.Chart(const_type_dist).mark_bar().encode(
    x=alt.X('CONST_TYPE:N', title='Construction Type'),
    y=alt.Y('COUNT:Q', title='Count'),
    color='CONST_TYPE:N'
).properties(height=320)
st.altair_chart(chart2, use_container_width=True)


DeltaGenerator()

In [27]:

print(df_selection)

       PERMITID       PRJ_STATUS                      CONST_TYPE  \
0      17855172  Approved Permit         Res Rmdl, Addn, Int Fin   
1      17858630  Approved Permit             Single Family Homes   
2      17855104  Approved Permit         Res Rmdl, Addn, Int Fin   
3      17855372  Approved Permit  Alternative Residential Energy   
4      17824051  Approved Permit         Res Rmdl, Addn, Int Fin   
...         ...              ...                             ...   
46204  17836441  Approved Permit  Alternative Residential Energy   
46211  17826013  Approved Permit  Alternative Residential Energy   
46214  17849863  Approved Permit  Alternative Residential Energy   
46221  17864971  Approved Permit  Alternative Residential Energy   
46231  17826992  Approved Permit  Alternative Residential Energy   

                    SITE_AREA SITE_STATE            SITE_CNTY  \
0                 Connecticut         CT     Fairfield County   
1               Orange County         CA        Orang

In [32]:
df_selection = df_selection[df_selection['SITE_LAT'].apply(lambda x: str(x).replace('.', '', 1).isdigit())]
print(df_selection)

       PERMITID       PRJ_STATUS                      CONST_TYPE  \
0      17855172  Approved Permit         Res Rmdl, Addn, Int Fin   
1      17858630  Approved Permit             Single Family Homes   
2      17855104  Approved Permit         Res Rmdl, Addn, Int Fin   
3      17855372  Approved Permit  Alternative Residential Energy   
4      17824051  Approved Permit         Res Rmdl, Addn, Int Fin   
...         ...              ...                             ...   
46204  17836441  Approved Permit  Alternative Residential Energy   
46211  17826013  Approved Permit  Alternative Residential Energy   
46214  17849863  Approved Permit  Alternative Residential Energy   
46221  17864971  Approved Permit  Alternative Residential Energy   
46231  17826992  Approved Permit  Alternative Residential Energy   

                    SITE_AREA SITE_STATE            SITE_CNTY  \
0                 Connecticut         CT     Fairfield County   
1               Orange County         CA        Orang

In [56]:
s = df_selection['SITE_LONG'][0]
s.astype(str).replace('.', '', 1).lstrip('-').isdigit()

True

In [58]:
df_selection = df_selection[df_selection['SITE_LONG'].apply(lambda x: str(x).replace('.', '', 1).lstrip('-').isdigit())]
print(df_selection)

       PERMITID       PRJ_STATUS                      CONST_TYPE  \
0      17855172  Approved Permit         Res Rmdl, Addn, Int Fin   
1      17858630  Approved Permit             Single Family Homes   
2      17855104  Approved Permit         Res Rmdl, Addn, Int Fin   
3      17855372  Approved Permit  Alternative Residential Energy   
4      17824051  Approved Permit         Res Rmdl, Addn, Int Fin   
...         ...              ...                             ...   
46204  17836441  Approved Permit  Alternative Residential Energy   
46211  17826013  Approved Permit  Alternative Residential Energy   
46214  17849863  Approved Permit  Alternative Residential Energy   
46221  17864971  Approved Permit  Alternative Residential Energy   
46231  17826992  Approved Permit  Alternative Residential Energy   

                    SITE_AREA SITE_STATE            SITE_CNTY  \
0                 Connecticut         CT     Fairfield County   
1               Orange County         CA        Orang

In [59]:
df_selection['SITE_LAT1'] = df_selection['SITE_LAT'].astype(float)
print(df_selection)

       PERMITID       PRJ_STATUS                      CONST_TYPE  \
0      17855172  Approved Permit         Res Rmdl, Addn, Int Fin   
1      17858630  Approved Permit             Single Family Homes   
2      17855104  Approved Permit         Res Rmdl, Addn, Int Fin   
3      17855372  Approved Permit  Alternative Residential Energy   
4      17824051  Approved Permit         Res Rmdl, Addn, Int Fin   
...         ...              ...                             ...   
46204  17836441  Approved Permit  Alternative Residential Energy   
46211  17826013  Approved Permit  Alternative Residential Energy   
46214  17849863  Approved Permit  Alternative Residential Energy   
46221  17864971  Approved Permit  Alternative Residential Energy   
46231  17826992  Approved Permit  Alternative Residential Energy   

                    SITE_AREA SITE_STATE            SITE_CNTY  \
0                 Connecticut         CT     Fairfield County   
1               Orange County         CA        Orang

In [60]:
df_selection['SITE_LONG1'] = df_selection['SITE_LONG'].astype(float)
print(df_selection)

       PERMITID       PRJ_STATUS                      CONST_TYPE  \
0      17855172  Approved Permit         Res Rmdl, Addn, Int Fin   
1      17858630  Approved Permit             Single Family Homes   
2      17855104  Approved Permit         Res Rmdl, Addn, Int Fin   
3      17855372  Approved Permit  Alternative Residential Energy   
4      17824051  Approved Permit         Res Rmdl, Addn, Int Fin   
...         ...              ...                             ...   
46204  17836441  Approved Permit  Alternative Residential Energy   
46211  17826013  Approved Permit  Alternative Residential Energy   
46214  17849863  Approved Permit  Alternative Residential Energy   
46221  17864971  Approved Permit  Alternative Residential Energy   
46231  17826992  Approved Permit  Alternative Residential Energy   

                    SITE_AREA SITE_STATE            SITE_CNTY  \
0                 Connecticut         CT     Fairfield County   
1               Orange County         CA        Orang