<a href="https://colab.research.google.com/github/Medynal/Pollution/blob/main/eda.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import pandas as pd
import plotly.express as px

@st.cache_data
def load_data():
    return pd.read_csv("cleaned_pollution_dataset.csv")

def show_eda():

    st.header("Exploratory Data Analysis")

    df = load_data()

    pollutants = [
        'PM2.5','PM10','NO','NO2','NOx',
        'NH3','CO','SO2','O3','Benzene','Toluene','Xylene' ]

    # Global Filters

    col_filter1, col_filter2 = st.columns(2)

    selected_pollutant = col_filter1.selectbox(
        "Select Pollutant",
        pollutants)

    city_options = ["All Cities"] + sorted(df["City"].unique())
    selected_city = col_filter2.selectbox(
        "Select City",
        city_options)

    # Cardinal Layout (2x2)
    col_nw, col_ne = st.columns(2)
    col_sw, col_se = st.columns(2)

    # NORTH-WEST: Trend
    with col_nw:
        st.subheader("Pollutant Trend Over Time")

        if selected_city == "All Cities":
            trend_df = df.groupby("Date")[selected_pollutant].mean().reset_index()
            title = f"Average {selected_pollutant} Trend (All Cities)"
        else:
            trend_df = df[df["City"] == selected_city]
            title = f"{selected_pollutant} Trend in {selected_city}"

        fig = px.line(trend_df, x="Date", y=selected_pollutant, title=title)
        st.plotly_chart(fig, use_container_width=True)

    # NORTH-EAST: Average by City
    with col_ne:
        st.subheader("Average Pollutant Levels")

        if selected_city == "All Cities":
            avg_df = df.groupby("City")[pollutants].mean().reset_index()
            fig = px.bar(
                avg_df,
                x="City",
                y=pollutants,
                barmode="stack",
                title="Average Pollutant Levels by City")
        else:
            avg_df = (
                df[df["City"] == selected_city][pollutants]
                .mean()
                .reset_index()
                .rename(columns={0: "Average"}))
            fig = px.bar(
                avg_df,
                x="index",
                y="Average",
                labels={"index": "Pollutant"},
                title=f"Average Pollutant Levels in {selected_city}")

        st.plotly_chart(fig, use_container_width=True)

    # SOUTH-WEST: Top 5 Cities
    with col_sw:
        st.subheader("Top 5 Cities by Pollutant")

        top5_df = (
            df.groupby("City")[selected_pollutant]
            .mean()
            .reset_index()
            .sort_values(by=selected_pollutant, ascending=False)
            .head(5))

        fig = px.bar(
            top5_df,
            x="City",
            y=selected_pollutant,
            title=f"Top 5 Cities by Average {selected_pollutant}")

        st.plotly_chart(fig, use_container_width=True)

    # SOUTH-EAST: Correlation
    with col_se:
        st.subheader("Pollutant Correlation Heatmap")

        corr_df = df[pollutants].corr()

        fig = px.imshow(
            corr_df,
            text_auto=".2f",
            aspect="auto",
            title="Correlation Between Pollutants")

        st.plotly_chart(fig, use_container_width=True)
