In [0]:
# Databricks notebook source
# MAGIC %md
# MAGIC # 🧪 Databricks DQX: Data Quality Profiling
# MAGIC 
# MAGIC This notebook:
# MAGIC - Loads the `departuredelays.csv` dataset
# MAGIC - Runs DQX profiling on the dataset
# MAGIC - Displays DQX profile properties
# MAGIC - Generates a DQX quality dashboard

# COMMAND ----------

# Load the dataset
df = spark.read.option("header", "true").option("inferSchema", "true") \
    .csv("dbfs:/databricks-datasets/flights/departuredelays.csv")

df.display()

# COMMAND ----------

# Import DQX
from dqx import DQXProfiler

# COMMAND ----------

# Run DQX profiling on the dataset
profiler = DQXProfiler(df)
profile = profiler.compute()

# COMMAND ----------

# Display overall data quality metrics
display(profile.overview())

# COMMAND ----------

# Display individual column metrics
display(profile.columns())

# COMMAND ----------

# Show schema-specific metrics
display(profile.schema())

# COMMAND ----------

# Show value distribution insights
display(profile.distribution())

# COMMAND ----------

# MAGIC %md
# MAGIC ## 📊 DQX Dashboard
# MAGIC 
# MAGIC You can generate a full dashboard UI for this DQX profile using the `display` function:

# COMMAND ----------

# Display the full dashboard
display(profile.dashboard())

# COMMAND ----------

# MAGIC %md
# MAGIC ## ✅ Summary
# MAGIC - Data loaded from: `dbfs:/databricks-datasets/flights/departuredelays.csv`
# MAGIC - Columns profiled: `date`, `delay`, `distance`, `origin`, `destination`
# MAGIC - Used DQX to compute data quality metrics
# MAGIC - Displayed comprehensive dashboard for further exploration
