# Unified Analytics Demo
This notebook demonstrates loading data with PySpark, saving to Delta Lake, querying with SQL, and logging with MLflow — all in one workflow.

In [None]:
from pyspark.sql import SparkSession

spark = SparkSession.builder.appName("Unified Analytics Demo").getOrCreate()
df = spark.read.format("json").load("dbfs:/mnt/raw/users.json")
df.show()

In [None]:
df_clean = df.filter("age > 18").select("name", "email")
df_clean.show()

In [None]:
df_clean.write.format("delta").mode("overwrite").saveAsTable("users_clean")

In [None]:
%sql
SELECT email, COUNT(*) AS user_count
FROM users_clean
WHERE email LIKE '%@gmail.com'
GROUP BY email
ORDER BY user_count DESC
LIMIT 10

In [None]:
import mlflow
mlflow.start_run()
mlflow.log_metric("gmail_users", df_clean.filter("email LIKE '%@gmail.com'").count())
mlflow.end_run()

✅ This completes the unified analytics workflow in one Databricks notebook.

[![Databricks](https://databricks.com/wp-content/uploads/2020/08/databricks-logo.png)](https://databricks.com)
