# 📌 Dynamic Air Quality Dashboard — Setup & Imports

This dashboard automatically:
- Detects all pollutant metric columns (`*_mean`, `*_max`) dynamically  
- Creates a dropdown widget without any hard-coded metric names  
- Unpivots the dataset dynamically (no materialized views needed)  
- Supports date/city filters and mean/max toggle  
- Generates a final visualization-ready table for dashboard tiles  


In [0]:
from pyspark.sql import functions as F

# Source table (change to gold.* if needed)
SRC_TABLE = "env_catalog.env_data.silver_air_daily"

# Key columns
KEY_TIMESTAMP_COL = "data_timestamp"
KEY_CITY_COL = "city"
# KEY_DIM_COL = "dim_key"

# 📌 Create Widgets (Dynamic Metric Selector + Filters)
The metric dropdown is populated by auto-detecting all *_mean and *_max columns.

In [0]:
tbl = spark.table(SRC_TABLE)
all_cols = tbl.columns

mean_cols = [c for c in all_cols if c.lower().endswith("_mean")]
max_cols  = [c for c in all_cols if c.lower().endswith("_max")]
bases = sorted({c.rsplit("_",1)[0] for c in (mean_cols + max_cols)})

if not bases:
    raise Exception(f"No *_mean or *_max columns found in {SRC_TABLE}")

# Recreate widgets cleanly
for w in ("metric_name","date_from","date_to","city_list","agg_type"):
    try: dbutils.widgets.remove(w)
    except: pass

dbutils.widgets.dropdown("metric_name", bases[0], bases, "Metric (dynamic)")
dbutils.widgets.text("date_from", "", "Start Date (YYYY-MM-DD)")
dbutils.widgets.text("date_to", "", "End Date (YYYY-MM-DD)")
dbutils.widgets.dropdown("agg_type", "mean", ["mean","max"], "Aggregation")

# collect distinct cities safely (avoid .rdd)
cities_rows = spark.table("env_catalog.env_data.city_master") \
    .select("city_name").distinct().orderBy("city_name").collect()
cities_lst = [r["city_name"] for r in cities_rows]

# create combobox (user can type or pick from list). Use a comma-separated string for multi selections.
default = cities_lst[0] if cities_lst else ""
dbutils.widgets.multiselect("city_list", default, cities_lst, "City (choose or type comma-separated)")

# Later in your code read and split:
city_raw = dbutils.widgets.get("city_list").strip()
cities_selected = [c.strip() for c in city_raw.split(",") if c.strip()] if city_raw else None

# 📌 Read Widgets + Validate Date Range

In [0]:
metric_name = dbutils.widgets.get("metric_name").strip()
date_from = dbutils.widgets.get("date_from").strip() or None
date_to   = dbutils.widgets.get("date_to").strip() or None
city_raw  = dbutils.widgets.get("city_list").strip()
cities    = [c.strip() for c in city_raw.split(",") if c.strip()] if city_raw else None
agg_type  = dbutils.widgets.get("agg_type").strip()

# Validation
assert (not date_from) or (not date_to) or (date_to > date_from), \
       "Validation Error: date_to must be greater than date_from"

# 📌 Dynamic Unpivot / Explode for All Metrics (No Static Views Required)

In [0]:
keep_cols = [KEY_TIMESTAMP_COL, KEY_CITY_COL]
# if KEY_DIM_COL in all_cols:
#     keep_cols.append(KEY_DIM_COL)

src_df = tbl.select(*keep_cols, *[c for c in all_cols if c not in keep_cols])
metric_cols = src_df.columns

# Build arrays for pollutant, mean, max
pollutant_arr = F.array(*[F.lit(b) for b in bases]).alias("pollutant_arr")
mean_arr = F.array(*[
    F.col(f"{b}_mean") if f"{b}_mean" in metric_cols else F.lit(None) 
    for b in bases
]).alias("mean_arr")
max_arr = F.array(*[
    F.col(f"{b}_max") if f"{b}_max" in metric_cols else F.lit(None)
    for b in bases
]).alias("max_arr")

arr_df = src_df.select(*keep_cols, pollutant_arr, mean_arr, max_arr)

exploded = (
    arr_df
    .select(*keep_cols,
            F.posexplode("mean_arr").alias("pos","mean_value"),
            F.col("pollutant_arr"), F.col("max_arr"))
    .withColumn("pollutant", F.element_at("pollutant_arr", F.col("pos") + 1))
    .withColumn("max_value", F.element_at("max_arr", F.col("pos") + 1))
    .drop("pos","pollutant_arr","max_arr")
)

# 📌 Apply Filters (metric, city, date)

In [0]:
q = exploded
q = q.filter(F.col("pollutant") == metric_name)

if date_from:
    q = q.filter(F.to_date(KEY_TIMESTAMP_COL) >= F.lit(date_from))
if date_to:
    q = q.filter(F.to_date(KEY_TIMESTAMP_COL) <= F.lit(date_to))
if cities:
    q = q.filter(F.col(KEY_CITY_COL).isin(cities))

# 📌 Aggregate for Visualization (Daily)

In [0]:
viz = (
    q.withColumn("day", F.to_date(KEY_TIMESTAMP_COL))
     .groupBy("day", KEY_CITY_COL, "pollutant")
     .agg(
         F.round(F.avg("mean_value"),2).alias("agg_mean"),
         F.round(F.max("max_value"),2).alias("agg_max")
     )
     .orderBy("day")
)

metric_col = "agg_mean" if agg_type == "mean" else "agg_max"
viz_display = viz.select(
    "day", KEY_CITY_COL, "pollutant", 
    F.col(metric_col).alias("metric_value")
)

# 📌 Final Output (Add to Dashboard as Chart)
Use Databricks chart UI:
- **X-axis:** `day`  
- **Y-axis:** `metric_value`  
- **Series/Group:** `city` or `pollutant`  
Then:  
**⋮ → Add to Dashboard**

In [0]:
display(viz_display)

displayHTML(f"<br><b>Available Metrics:</b> {', '.join(bases)}")
displayHTML(f"<br><b>Available Cities:</b> {', '.join(cities_lst)}")

day,city,pollutant,metric_value
2025-10-15,Ahmedabad,ozone,93.79
2025-10-15,Amsterdam,ozone,27.67
2025-10-15,Bengaluru,ozone,39.38
2025-10-16,Ahmedabad,ozone,85.96
2025-10-16,Amsterdam,ozone,30.54
2025-10-16,Bengaluru,ozone,49.63
2025-10-17,Amsterdam,ozone,26.42
2025-10-17,Ahmedabad,ozone,83.29
2025-10-17,Bengaluru,ozone,31.13
2025-10-18,Amsterdam,ozone,37.0


Databricks visualization. Run in Databricks to view.

- Only SQL segments in Notebook support AIBI Dashboard 
- For python, only notebook dashboards are available

In [0]:
%sql 
WITH ilv_city AS (
  SELECT trim(col) AS city
  FROM explode(split('${city_list}', ',')) AS city
)
-- SELECT * FROM ilv_city
SELECT d.city, data_timestamp
, ${metric_name}_${agg_type} AS pollutant
-- , pm10_max, pm2_5_max 
FROM env_catalog.env_data.silver_air_daily d
JOIN ilv_city c
  ON d.city = c.city
WHERE data_timestamp BETWEEN '${date_from}' AND '${date_to}'

city,data_timestamp,pollutant
Ahmedabad,2025-10-15T00:00:00.000Z,93.79166666666669
Amsterdam,2025-10-18T00:00:00.000Z,37.0
Ahmedabad,2025-10-30T00:00:00.000Z,72.25
Amsterdam,2025-10-26T00:00:00.000Z,56.04166666666666
Amsterdam,2025-10-24T00:00:00.000Z,55.625
Amsterdam,2025-10-22T00:00:00.000Z,23.58333333333333
Amsterdam,2025-10-15T00:00:00.000Z,27.666666666666668
Ahmedabad,2025-10-22T00:00:00.000Z,97.54166666666669
Ahmedabad,2025-10-26T00:00:00.000Z,73.95833333333333
Amsterdam,2025-10-23T00:00:00.000Z,40.333333333333336


Databricks visualization. Run in Databricks to view.

In [0]:
%sql 
WITH ilv_city AS (
  SELECT trim(col) AS city
  FROM explode(split(:city_list, ',')) AS city
)
-- SELECT * FROM ilv_city
SELECT d.city, data_timestamp
-- , concat(:metric_name, '_', :agg_type) AS pollutant
-- TO DO Dynamic mapper for column name
, pm10_max AS pollutant
FROM env_catalog.env_data.silver_air_daily d
JOIN ilv_city c
  ON d.city = c.city
WHERE data_timestamp BETWEEN :date_from AND :date_to

city,data_timestamp,pollutant
Ahmedabad,2025-10-15T00:00:00.000Z,60.1
Amsterdam,2025-10-18T00:00:00.000Z,15.9
Ahmedabad,2025-10-30T00:00:00.000Z,51.3
Amsterdam,2025-10-26T00:00:00.000Z,16.6
Amsterdam,2025-10-24T00:00:00.000Z,12.2
Amsterdam,2025-10-22T00:00:00.000Z,23.0
Amsterdam,2025-10-15T00:00:00.000Z,17.9
Ahmedabad,2025-10-22T00:00:00.000Z,55.5
Ahmedabad,2025-10-26T00:00:00.000Z,43.9
Amsterdam,2025-10-23T00:00:00.000Z,17.7


In [0]:
query = f"""
SELECT city, data_timestamp, pm10_max, pm2_5_max FROM env_catalog.env_data.silver_air_daily
WHERE city IN ({cities_sql})
AND data_timestamp BETWEEN '{date_from}' AND '{date_to}' 
"""

df = spark.sql(query)
display(df)

city,data_timestamp,pm10_max,pm2_5_max
Mumbai,2025-10-20T00:00:00.000Z,97.6,93.8
Mumbai,2025-10-25T00:00:00.000Z,45.6,45.5
Mumbai,2025-10-31T00:00:00.000Z,20.5,17.3
Mumbai,2025-10-21T00:00:00.000Z,79.9,78.5
Mumbai,2025-10-30T00:00:00.000Z,33.9,27.4
Mumbai,2025-10-17T00:00:00.000Z,63.6,63.3
Mumbai,2025-10-15T00:00:00.000Z,72.2,69.6
Mumbai,2025-10-19T00:00:00.000Z,120.6,118.0
Mumbai,2025-10-26T00:00:00.000Z,22.2,21.8
Mumbai,2025-10-23T00:00:00.000Z,54.7,54.6


Databricks visualization. Run in Databricks to view.