## Pull in UDFs

In [39]:
%run nb_udfs

StatementMeta(, b47ebd35-b384-44b7-8e34-d1421a0cfde6, 61, Finished, Available, Finished)

Requires the Fabric REST API if you're using Dataflow Gen 2 with CI/CD enabled.

## Run Dataflows

In [40]:
workspace = 'Admin%20Center' #have to escape the & symbol and spaces
lakehouse = 'lh_monitoring'

dataflow_table = 'dimDataflows'


StatementMeta(, b47ebd35-b384-44b7-8e34-d1421a0cfde6, 62, Finished, Available, Finished)

In [41]:
#get a list of all workspaces and load to a table in the lakehouse
response = fab_client.get(f"/v1/admin/workspaces")
df_workspaces = pd.json_normalize(response.json()['workspaces'])
#df_workspaces
df_workspaces = spark.createDataFrame(df_workspaces)

#creates a list of workspaces we want access to for dataset refresh, history, and workspace users
df_np_workspaces = df_workspaces \
    .filter(df_workspaces["type"] == "Workspace")  \
    .filter(df_workspaces["state"] =="Active") \
    .withColumnRenamed("id","workspaceid")

#df_np_workspaces.show()

StatementMeta(, b47ebd35-b384-44b7-8e34-d1421a0cfde6, 63, Finished, Available, Finished)

In [42]:

# define schema directly
schema = StructType([
    StructField("workspaceId",  StringType(), True),
    StructField("dataflowId",   StringType(), True),
    StructField("itemType",     StringType(), True),
    StructField("displayName",  StringType(), True),
    StructField("description",  StringType(), True),
    StructField("isParametric", BooleanType(), True),
])

rows = []  # we'll append only non-empty responses here

for row in (
    df_np_workspaces
      .select("workspaceid")
      .na.drop(subset=["workspaceid"])
      .toLocalIterator()
):
    ws_id = row["workspaceid"]

    resp = fab_client.get(f"/v1/workspaces/{ws_id}/dataflows")
    try:
        payload = resp.json()
    except Exception as e:
        print(f"{ws_id} -> bad JSON: {e}")
        continue

    items = payload.get("value") or []
    if not items:
        continue  # skip empty responses

    for it in items:
        rows.append({
            "workspaceId":  ws_id,
            "dataflowId":   it.get("id"),
            "itemType":     it.get("type"),
            "displayName":  it.get("displayName"),
            "description":  it.get("description"),
            "isParametric": (it.get("properties") or {}).get("isParametric"),
        })

# build a Spark DF
df_dataflows = spark.createDataFrame(rows, schema=schema) if rows else spark.createDataFrame([], schema)

# sanity check
print(f"Non-empty dataflows found: {df_dataflows.count()}")
df_dataflows.show()

StatementMeta(, b47ebd35-b384-44b7-8e34-d1421a0cfde6, 64, Finished, Available, Finished)

Non-empty dataflows found: 1
+--------------------+--------------------+--------+---------------+-----------+------------+
|         workspaceId|          dataflowId|itemType|    displayName|description|isParametric|
+--------------------+--------------------+--------+---------------+-----------+------------+
|978cbefd-434d-424...|8f9765b3-75c4-42b...|Dataflow|DF_F-195 Budget|           |       false|
+--------------------+--------------------+--------+---------------+-----------+------------+



In [43]:

if df_dataflows.count() !=0:
    #creates a slowly changing dimension so we can keep an eye on any deleted dataflows
    dataflowPath = udf_GetFilePath(workspace, lakehouse, dataflow_table)
    #print(dataflowPath)
    naturalKeyColumnList = ['dataflowId']
    primaryKeyColumnName = "tableId"
    returnVal = udf_UpsertDimension(df_dataflows,2,dataflowPath,naturalKeyColumnList,primaryKeyColumnName,True)
    print(returnVal)
else:
    print("No dataflows")
    sys.exit("No dataflows")



StatementMeta(, b47ebd35-b384-44b7-8e34-d1421a0cfde6, 65, Finished, Available, Finished)

📦 Initial load complete
{'startTime': '2025-10-08 02:30:42.064081', 'stopTime': '2025-10-08 02:30:56.845211', 'details': '0 records updated, 1 records inserted from 1 staging rows to abfss://e54b972a-76a7-4a96-90ab-77441da0157e@onelake.dfs.fabric.microsoft.com/9b744bc6-b68b-4136-9983-4a665a8d5c9c/Tables/dimDataflows'}
