In [1]:
import notebookutils 

In [None]:
######################################################################################### 
# Read secretes from Azure Key Vault
#########################################################################################
## This is the name of my Azure Key Vault 
key_vault = "https://company-keyvault.vault.azure.net/"
## I have stored my tenant id as one of the secrets to make it easier to use when needed 
tenant = notebookutils.credentials.getSecret(key_vault , "tenantid") 
## This is my application Id for my service principal account 
client = notebookutils.credentials.getSecret(key_vault , "powerbi-applicationid") 
## This is my Client Secret for my service principal account 
client_secret = notebookutils.credentials.getSecret(key_vault , "powerbi-clientsecret")  

######################################################################################### 
# Authentication - Replace string variables with your relevant values 
#########################################################################################  

import json, requests, pandas as pd 
import datetime  

try: 
    from azure.identity import ClientSecretCredential 
except Exception:
     !pip install azure.identity 
     from azure.identity import ClientSecretCredential 

# Generates the access token for the Service Principal 
api = 'https://analysis.windows.net/powerbi/api/.default' 
auth = ClientSecretCredential(authority = 'https://login.microsoftonline.com/', 
               tenant_id = tenant, 
               client_id = client, 
               client_secret = client_secret) 
access_token = auth.get_token(api)
access_token = access_token.token 

## This is where I store my header with the Access Token, because this is required when authenticating 
## to the Power BI Admin APIs 
header = {'Authorization': f'Bearer {access_token}'}  

print('\nSuccessfully authenticated.')

In [None]:
# # Get all Users and download into JSON File
# # If needed to get more details here is how: https://stackoverflow.com/questions/48229949/get-all-user-properties-from-microsoft-graph
# # Here is an example for the API to get all details: https://graph.microsoft.com/v1.0/groups?$filter=displayname eq 'sec_Office365_FabricAdmin_Role'&$expand=members

import requests
import msal
import datetime
from datetime import datetime,date,timedelta

# Microsoft Graph API endpoint for listing users
GRAPH_API_URL = 'https://api.fabric.microsoft.com/v1/admin/items'


# Define headers for API request
headers = {
    'Authorization': 'Bearer ' + access_token,
    'Content-Type': 'application/json'
}

# Make the API request to list users
response = requests.get(GRAPH_API_URL, headers=headers)
# data = response.json()
# display(data)

# To hold all users
all_users = []

# Pagination loop
while GRAPH_API_URL:
    response = requests.get(GRAPH_API_URL, headers=headers)
    data = response.json()
    
    # Process users
    all_users.extend(data['itemEntities'])
    
    # Get the next page URL
    GRAPH_API_URL = data.get('@odata.nextLink')

# Create Directory if it does not exist
notebookutils.fs.mkdirs("Files/Fabric_Items/")

fileName = 'Fabric_Items_' + (datetime.today()).strftime('%Y%m%d') + '.json'

# Write the output to a JSON file
with open(f"/lakehouse/default/Files/Fabric_Items/{fileName}", "w") as json_file:
    json.dump(all_users, json_file, indent=4)



In [None]:
# # Getting the JSON Users File into Dataframe
# REFERENCE: https://medium.com/@mariusz_kujawski/python-in-microsoft-fabric-data-factory-vs-polars-and-duckdb-06dc4383475b
import duckdb
from deltalake import write_deltalake, DeltaTable
import pyarrow
import pandas as pd
import time

fileName = 'Fabric_Items_' + (datetime.today()).strftime('%Y%m%d') + '.json'

with open(f'/lakehouse/default/Files/Fabric_Items/{fileName}') as f:
#with open(df, encoding="utf-16") as f:
   data = json.load(f)

# Get the Expanded Members, by using the json normalize and then have the higher levels of the json structure.
df = pd.json_normalize(data)

storage_options = {"use_fabric_endpoint": "true", "allow_unsafe_rename":"true", "bearer_token": notebookutils.credentials.getToken('storage')}


path_table = f"/lakehouse/default/Tables/staging_all_fabric_items"
write_deltalake(path_table, df, mode="overwrite", engine='rust', storage_options=storage_options)
 


In [None]:
import duckdb
from deltalake import DeltaTable, write_deltalake

# Connect to DuckDB
con = duckdb.connect()

# Install the delta extension
con.execute("INSTALL delta;")

# Load the delta extension
con.execute("LOAD delta;")

# Load source and target as Delta tables (replace paths with your lakehouse paths)
source_path = "/lakehouse/default/Tables/staging_all_fabric_items"
target_path = "/lakehouse/default/Tables/all_fabric_items"

# Register Delta tables
con.execute(f"CREATE TABLE target_table AS SELECT * FROM delta_scan('{target_path}')")
con.execute(f"CREATE TABLE source_table AS SELECT * FROM delta_scan('{source_path}')")
# con.execute(f"CREATE TABLE final_table AS SELECT * FROM delta_scan('{target_path}')")

# Perform the merge (same as above)
con.execute("""
    UPDATE target_table AS t
    SET name = s.name,
        type = s.type
    FROM source_table AS s
    WHERE t.id = s.id
""")
con.execute("""
    INSERT INTO target_table
    SELECT s.*
    FROM source_table AS s
    LEFT JOIN target_table AS t ON s.id = t.id
    WHERE t.id IS NULL
""")

# Verify and create a dataframe with the resulting target table.
result = con.execute("SELECT * FROM target_table").fetchdf()
# result = con.execute("SELECT s.* FROM target_table as s LEFT JOIN final_table AS t ON s.id = t.id WHERE t.id IS NULL").fetchdf()
display(result)

# Execute and fetch results
try:
    # Write back to Delta (replace target_path with your actual path)
    # Reference: https://datamonkeysite.com/2023/11/01/loading-delta-table-to-fabric-onelake-using-delta-rust/
    write_deltalake(f"/lakehouse/default/Tables/all_fabric_items_New",result,engine='rust',mode="overwrite",  storage_options={"allow_unsafe_rename":"true"})
except Exception as e:
    print("Error:", e)

