In [9]:
pip install streamlit plotly pandas



In [11]:
import streamlit as st
import pandas as pd
import plotly.express as px
import plotly.graph_objects as go
import os

In [4]:
st.set_page_config(
    page_title="Urban Traffic & Weather Analytics",
    page_icon="ðŸš¦",
    layout="wide"
)

st.title("ðŸš¦ Urban Traffic Analytics Under Weather Conditions")
st.markdown("### Data Lake Final Project Dashboard")
st.markdown("---")

2025-12-14 16:29:33.997 
  command:

    streamlit run /usr/local/lib/python3.12/dist-packages/colab_kernel_launcher.py [ARGUMENTS]


DeltaGenerator()

In [5]:
@st.cache_data
def load_data():
    data = {}

    if os.path.exists("merged_data.parquet"):
        data['main'] = pd.read_parquet("merged_data.parquet")

    if os.path.exists("simulation_results.csv"):
        data['sim'] = pd.read_csv("simulation_results.csv")

    if os.path.exists("factor_loadings.csv"):
        data['factors'] = pd.read_csv("factor_loadings.csv", index_col=0)

    return data

data = load_data()

2025-12-14 16:33:25.912 No runtime found, using MemoryCacheStorageManager
2025-12-14 16:33:25.915 No runtime found, using MemoryCacheStorageManager


In [6]:
tab1, tab2, tab3 = st.tabs(["Dataset Overview", "Monte Carlo Simulation", "Factor Analysis"])

with tab1:
    st.header("Cleaned Dataset Statistics")

    if 'main' in data:
        df = data['main']

        col1, col2, col3, col4 = st.columns(4)
        col1.metric("Total Records", len(df))
        col2.metric("Avg Temperature", f"{df['temperature_c'].mean():.1f} Â°C")
        col3.metric("Avg Traffic Speed", f"{df['avg_speed_kmh'].mean():.1f} km/h")
        col4.metric("Total Accidents", df['accident_count'].sum())

        st.subheader("Traffic Speed vs. Weather Conditions")
        x_axis = st.selectbox("Select X-Axis", ['rain_mm', 'visibility_m', 'wind_speed_kmh'], index=0)

        fig = px.scatter(df, x=x_axis, y='avg_speed_kmh', color='congestion_level',
                         title=f"Impact of {x_axis} on Traffic Speed",
                         opacity=0.6)
        st.plotly_chart(fig, use_container_width=True)

        with st.expander("View Raw Data Sample"):
            st.dataframe(df.head(100))
    else:
        st.error("File 'merged_data.parquet' not found. Please run Phase 4.")

2025-12-14 16:33:35.482 Session state does not function when running a script without `streamlit run`
2025-12-14 16:33:35.983 Please replace `use_container_width` with `width`.

`use_container_width` will be removed after 2025-12-31.

For `use_container_width=True`, use `width='stretch'`. For `use_container_width=False`, use `width='content'`.


In [7]:
with tab2:
    st.header("Predictive Risk Analysis (Monte Carlo)")
    st.markdown("Simulating traffic congestion probability under different weather scenarios.")

    if 'sim' in data:
        sim_df = data['sim']

        scenario = st.selectbox("Select Weather Scenario", sim_df['scenario'].unique())

        filtered_sim = sim_df[sim_df['scenario'] == scenario]

        high_risk_prob = filtered_sim['congestion_prob'].mean() * 100
        st.metric(label=f"Average Congestion Probability ({scenario})", value=f"{high_risk_prob:.2f}%")

        fig_hist = px.histogram(filtered_sim, x="congestion_prob", nbins=50,
                                title=f"Distribution of Congestion Probability - {scenario}",
                                color_discrete_sequence=['#FF4B4B'])
        st.plotly_chart(fig_hist, use_container_width=True)

    else:
        st.error("File 'simulation_results.csv' not found. Please run Phase 5.")

2025-12-14 16:33:51.094 Please replace `use_container_width` with `width`.

`use_container_width` will be removed after 2025-12-31.

For `use_container_width=True`, use `width='stretch'`. For `use_container_width=False`, use `width='content'`.


In [8]:
with tab3:
    st.header("Factor Analysis Insights")
    st.markdown("Identifying latent variables that drive traffic patterns.")

    if 'factors' in data:
        loadings = data['factors']

        col_text, col_viz = st.columns([1, 2])

        with col_text:
            st.markdown("### Interpretation")
            st.info("""
            **Factor 1 (Weather Severity):** heavily loaded with Rain, Wind, and Visibility.
            **Factor 2 (Traffic Stress):** heavily loaded with Vehicle Count and Speed.
            **Factor 3 (Accident Risk):** heavily loaded with Accident Counts.
            """)
            st.dataframe(loadings)

        with col_viz:
            fig_heat = px.imshow(loadings, text_auto=True, aspect="auto",
                                 title="Factor Loadings Heatmap",
                                 color_continuous_scale='RdBu_r')
            st.plotly_chart(fig_heat, use_container_width=True)

    else:
        st.warning("File 'factor_loadings.csv' not found. Please run Phase 6.")
        st.image("factor_heatmap.png", caption="Static Factor Heatmap (from Phase 6)")

2025-12-14 16:33:57.227 Please replace `use_container_width` with `width`.

`use_container_width` will be removed after 2025-12-31.

For `use_container_width=True`, use `width='stretch'`. For `use_container_width=False`, use `width='content'`.
