In [0]:
%pip install databricks_sdk --upgrade

In [0]:
dbutils.library.restartPython()

In [0]:
dbutils.widgets.text("catalog_name", "users")
dbutils.widgets.text("schema_name", "")

catalog_name = dbutils.widgets.get("catalog_name")
schema_name = dbutils.widgets.get("schema_name")
assert catalog_name and schema_name, "catalog_name and schema_name must be provided"


In [0]:
spaces = []
page_token = None

from databricks.sdk import WorkspaceClient
from datetime import datetime
import pandas as pd

w = WorkspaceClient()

while True:
    response = w.genie.list_spaces(page_token=page_token)
    for s in response.spaces:
        spaces.append({
            "space_id": getattr(s, "space_id", None),
            "name": getattr(s, "title", None),
            "description": getattr(s, "description", None),
            "warehouse_id": getattr(s, "warehouse_id", None)
        })
    if not response.next_page_token or response.next_page_token == "":
        break
    page_token = response.next_page_token

pdf = pd.DataFrame(spaces)
if not pdf.empty:
    spark_df = spark.createDataFrame(pdf)
    spark_df.write.format("delta").mode("overwrite").option("mergeSchema", "true").saveAsTable(
        f"{catalog_name}.{schema_name}.adb_genie_spaces"
    )
    print(f"Loaded {spark_df.count()} Genie spaces into table adb_genie_spaces")
else:
    print("No Genie spaces found.")

In [0]:
from databricks.sdk import WorkspaceClient
from datetime import datetime

#w = WorkspaceClient()

rows = []
for d in w.lakeview.list(page_size=100):
    d_dict = d.as_dict()
    # Convert timestamps to Python datetime if they are not already
    create_time = d_dict.get("create_time")
    update_time = d_dict.get("update_time")
    if isinstance(create_time, str):
        try:
            create_time = datetime.fromisoformat(create_time)
        except Exception:
            create_time = None
    if isinstance(update_time, str):
        try:
            update_time = datetime.fromisoformat(update_time)
        except Exception:
            update_time = None
    rows.append((
        d_dict.get("dashboard_id"),
        d_dict.get("display_name"),
        create_time,
        d_dict.get("lifecycle_state"),
        update_time,
        d_dict.get("warehouse_id")
    ))

from pyspark.sql.types import StructType, StructField, StringType, TimestampType
schema = StructType([
    StructField('dashboard_id', StringType(), True),
    StructField('display_name', StringType(), True),
    StructField('create_time', TimestampType(), True),
    StructField('lifecycle_state', StringType(), True),
    StructField('update_time', TimestampType(), True),
    StructField('warehouse_id', StringType(), True)
])

df = spark.createDataFrame(
    rows,
    schema
)
df.write.mode("overwrite").option("mergeSchema", "true").saveAsTable(f"{catalog_name}.{schema_name}.adb_dashboards")
#display(df)

In [0]:
from databricks.sdk import WorkspaceClient
from datetime import datetime

schedules = []

dashboard_ids = [
    row.dashboard_id
    for row in spark.table(f"{catalog_name}.{schema_name}.adb_dashboards").select("dashboard_id").collect()
]

for dashboard_id in dashboard_ids:
    try:
        for sched in w.lakeview.list_schedules(dashboard_id=dashboard_id):
            sched_dict = sched.as_dict()
            subscriber = sched_dict.get("subscriber", {})
            destination_subscriber = subscriber.get("destination_subscriber")
            user_subscriber = subscriber.get("user_subscriber")
            schedules.append({
                "dashboard_id": dashboard_id,
                "schedule_id": sched_dict.get("schedule_id"),
                "create_time": sched_dict.get("create_time"),
                "display_name": sched_dict.get("display_name"),
                "pause_status": sched_dict.get("pause_status")
            })
    except Exception as e:
        # Optionally log or print the dashboard_id that failed
        continue

if schedules:
    import pandas as pd
    pdf_sched = pd.DataFrame(schedules)
    spark_df_sched = spark.createDataFrame(pdf_sched)
    spark.sql(f"DROP TABLE IF EXISTS {catalog_name}.{schema_name}.adb_dashboard_schedules")
    spark_df_sched.write.format("delta").mode("overwrite").option("mergeSchema", "true").saveAsTable(f"{catalog_name}.{schema_name}.adb_dashboard_schedules")
    #display(spark_df_sched)

In [0]:
# Databricks notebook Python
from databricks.sdk import WorkspaceClient
import pandas as pd

source_table = f"{catalog_name}.{schema_name}.adb_dashboards"
target_table = f"{catalog_name}.{schema_name}.abd_dashboard_subscriptions"  # as requested

#w = WorkspaceClient()

# ---------- helpers ----------
def get_field(obj_or_dict, *names):
    """Return the first non-None among possible field names (works for dicts or objects)."""
    for n in names:
        if isinstance(obj_or_dict, dict):
            if n in obj_or_dict and obj_or_dict[n] is not None:
                return obj_or_dict[n]
        else:
            v = getattr(obj_or_dict, n, None)
            if v is not None:
                return v
    return None

def to_dict_safe(x):
    return (getattr(x, "as_dict", lambda: {})() or {}) if x is not None else {}

# ---------- get dashboard ids ----------
#dash_df = spark.table(source_table).select("dashboard_id").distinct().collect()
sched_df = spark.table(f"{catalog_name}.{schema_name}.adb_dashboard_schedules").collect()
rows = []
count = 0

for r in sched_df:  # avoids collecting everything to driver at once
    dashboard_id = r["dashboard_id"]
    schedule_id = r["schedule_id"]
    try:
                    # enumerate subscriptions for this schedule
        for sub in w.lakeview.list_subscriptions(dashboard_id=dashboard_id, schedule_id=schedule_id):
                subdict = to_dict_safe(sub)
                subscriber  = get_field(subdict, "subscriber") or get_field(sub, "subscriber") or {}

                user_subscriber = get_field(subscriber, "user_subscriber")
                destination_subscriber = get_field(subscriber, "destination_subscriber")

                user_id = (
                    get_field(user_subscriber or {}, "user_id", "id")
                    if user_subscriber is not None else None
                )

                subscription_id = get_field(subdict, "subscription_id") or get_field(sub, "subscription_id")

                # capture destination info if present (email/slack/webhook/etc.)
                destination_type = get_field(destination_subscriber or {}, "destination_type", "type")
                destination_id   = get_field(destination_subscriber or {}, "destination_id", "destination", "id")

                rows.append({
                    "dashboard_id": dashboard_id,
                    "schedule_id": schedule_id,
                    "subscription_id": subscription_id,
                    "create_time": create_time,
                    "user_id": user_id,
                    "destination_id": destination_id,
                })
                count += 1

    except Exception as e:
        # If youâ€™d prefer to see failures in the table, uncomment the append below
        # rows.append({"dashboard_id": dashboard_id, "error": str(e)})
        continue

print(f"Collected {count} subscription rows across dashboards.")

# ---------- write to Delta table ----------
pdf = pd.DataFrame(rows)

# If there are no rows, create an empty DF with the expected schema so the table still exists
if pdf.empty:
    pdf = pd.DataFrame(columns=[
        "dashboard_id","schedule_id","subscription_id","create_time",
        "user_id","destination_id"
    ])

spark.sql(f"DROP TABLE IF EXISTS {target_table}")
spark_df = spark.createDataFrame(pdf)

# Optional: try to cast timestamps/ids nicely (depends on your upstream data)
from pyspark.sql import functions as F
spark_df = (
    spark_df
    .withColumn("create_time", F.to_timestamp("create_time"))
    .select("dashboard_id", "schedule_id", "subscription_id", "create_time", "user_id", "destination_id")
)

spark_df.write.format("delta").mode("overwrite").saveAsTable(target_table)

#display(spark.table(target_table).limit(50))