<a href="https://colab.research.google.com/github/Medynal/Pollution/blob/main/App.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import streamlit as st
import pandas as pd
import numpy as np
import joblib
import plotly.express as px
from PIL import Image

In [None]:
# Page configuration
st.set_page_config(
    page_title="Air Quality Analytics Platform",
    layout="wide")

# Header Image (Top of App)

image = Image.open("images.jpeg")
image= image.resize((800,250))
st.image(image)

# Sidebar Navigation
section = st.sidebar.radio(
    "Navigate",
    ["Data Overview", "Exploratory Data Analysis (EDA)", "Modelling & Prediction"])

with st.sidebar:
    st.markdown("### About This App")
    st.markdown(
        """
        **Air Quality Analytics and Prediction System**
        This application provides:
        - Air quality insights in major cities in India
        - Interactive exploratory analysis
        - AQI and AQI-Bucket prediction""")

# Load Dataset

@st.cache_data
def load_data():
    return pd.read_csv("cleaned_pollution_dataset.csv")

df = load_data()

# Load Models
@st.cache_resource
def load_models():
    aqi_model = joblib.load("aqi_regressor.pkl")
    bucket_model = joblib.load("aqib_classifier.pkl")
    bucket_encoder = joblib.load("bucket_encoder.pkl")
    return aqi_model, bucket_model, bucket_encoder

aqi_model, bucket_model, bucket_encoder = load_models()

ModuleNotFoundError: No module named 'streamlit'

In [None]:
# SECTION 1 — DATA OVERVIEW

if section == "Data Overview":

    st.header("Dataset Overview")
    st.subheader("Basic Information")
    st.markdown("""
                    The dataset contains air quality observations from multiple cities in India.
                    These observations were collected over several years **(2015–2020)** and focus on
                    key atmospheric pollutants commonly used to assess environmental and public health risks.

                    ### Key Pollutants Covered
                    - **Particulate Matter:** PM2.5, PM10
                    - **Nitrogen Compounds:** NO, NO₂, NOx
                    - **Gaseous Pollutants:** SO₂, CO, O₃
                    - **Volatile Organic Compounds (VOCs):** Benzene, Toluene, Xylene
                    """)

    st.write(f"Number of Records: {df.shape[0]}")
    st.write(f"Number of Features: {df.shape[1]}")

    cities = ["All Cities"] + sorted(df["City"].unique())

    selected_city = st.radio(
    "Select City",
    cities,
    horizontal=True)

    st.subheader("Sample Records")

    if selected_city == "All Cities":
        filtered_df = df
        st.write("Showing dataset for **all cities**")
    else:
        filtered_df = df[df["City"] == selected_city]
        st.write(
            f"Showing dataset for **{selected_city}** "
            f"({filtered_df.shape[0]} records)")

    st.dataframe(filtered_df.head(100), use_container_width=True)

In [None]:
# SECTION 2 : EPLORATORY DATA ANALYSIS
elif section == "Exploratory Data Analysis (EDA)":

    st.header("Exploratory Data Analysis")

    pollutants = [
        'PM2.5','PM10','NO','NO2','NOx',
        'NH3','CO','SO2','O3','Benzene','Toluene','Xylene']


    # Global Filters

    col_filter1, col_filter2 = st.columns(2)

    selected_pollutant = col_filter1.selectbox(
        "Select Pollutant",
        pollutants )

    city_options = ["All Cities"] + sorted(df["City"].unique())
    selected_city = col_filter2.selectbox(
        "Select City",
        city_options)

    # Cardinal Layout (2x2 Grid)

    col_nw, col_ne = st.columns(2)
    col_sw, col_se = st.columns(2)

    # NORTH-WEST: Line Chart (Trend)

    with col_nw:
        st.subheader("Pollutant Trend Over Time")

        if selected_city == "All Cities":
            trend_df = (
                df.groupby("Date")[selected_pollutant]
                .mean()
                .reset_index())
            title = f"Average {selected_pollutant} Trend (All Cities)"
        else:
            trend_df = df[df["City"] == selected_city]
            title = f"{selected_pollutant} Trend in {selected_city}"

        fig_line = px.line(
            trend_df,
            x="Date",
            y=selected_pollutant,
            title=title)
        st.plotly_chart(fig_line, use_container_width=True)

    # NORTH-EAST: Average Pollutant by City

    with col_ne:
        st.subheader("Average Pollutant Levels")

        if selected_city == "All Cities":
            avg_city_df = (
                df.groupby("City")[pollutants]
                .mean()
                .reset_index())

            fig_avg = px.bar(
                avg_city_df,
                x="City",
                y=pollutants,
                title="Average Pollutant Levels by City",
                barmode="stack")
        else:
            avg_single_city = (
                df[df["City"] == selected_city][pollutants]
                .mean()
                .reset_index()
                .rename(columns={0: "Average"}))

            fig_avg = px.bar(
                avg_single_city,
                x="index",
                y="Average",
                title=f"Average Pollutant Levels in {selected_city}",
                labels={"index": "Pollutant"})

        st.plotly_chart(fig_avg, use_container_width=True)

    # SOUTH-WEST: Top 5 Cities by Pollutant
    with col_sw:
        st.subheader("Top 5 Cities by Pollutant")

        top5_df = (
            df.groupby("City")[selected_pollutant]
            .mean()
            .reset_index()
            .sort_values(by=selected_pollutant, ascending=False)
            .head(5))

        fig_top5 = px.bar(
            top5_df,
            x="City",
            y=selected_pollutant,
            title=f"Top 5 Cities by Average {selected_pollutant}")

        st.plotly_chart(fig_top5, use_container_width=True)


    # SOUTH-EAST: Correlation Heatmap

    with col_se:
        st.subheader("Pollutant Correlation Heatmap")

        corr_df = df[pollutants].corr()

        fig_corr = px.imshow(
            corr_df,
            text_auto=".2f",
            aspect="auto",
            title="Correlation Between Pollutants")

        st.plotly_chart(fig_corr, use_container_width=True)


In [None]:
# SECTION 3 — MODELLING & PREDICTION

elif section == "Modelling & Prediction":

    st.header("AQI Modelling and Prediction")

    col1, col2 = st.columns(2)

    city = col1.selectbox("City", sorted(df["City"].unique()))
    date = col1.date_input("Date")

    pollutants = [
        'PM2.5','PM10','NO','NO2','NOx',
        'NH3','CO','SO2','O3','Benzene','Toluene','Xylene']

    pollutant_values = {}
    for p in pollutants:
        pollutant_values[p] = col2.number_input(f"{p}", min_value=0.0)

    if st.button("Predict AQI"):

        year = date.year
        month = date.month
        day = date.day

        input_df = pd.DataFrame([{
            "City": city,
            "year": year,
            "month": month,
            "day": day,
            **pollutant_values}])

        predicted_aqi = aqi_model.predict(input_df)[0]
        input_df["AQI"] = predicted_aqi

        predicted_bucket = bucket_model.predict(input_df)[0]
        bucket_label = bucket_encoder.inverse_transform([predicted_bucket])[0]

        st.success(f"Predicted AQI: {predicted_aqi:.2f}")
        st.info(f"AQI Category: {bucket_label}")