In [1]:
# Credit to the person who I got the original code from
# https://github.com/dataxbi/powerbi-api-admin-scan/blob/main/admin-scan.py 

import os
import time
import json

import msal
import requests
import pandas as pd

######################################################################################### 
# Read secretes from Azure Key Vault
#########################################################################################
## This is the name of my Azure Key Vault 
key_vault = "https://my-keyvault.vault.azure.net/"
## I have stored my tenant id as one of the secrets to make it easier to use when needed 
tenant = mssparkutils.credentials.getSecret(key_vault , "tenantid") 
## This is my application Id for my service principal account 
client = mssparkutils.credentials.getSecret(key_vault , "powerbi-applicationid") 
## This is my Client Secret for my service principal account 
client_secret = mssparkutils.credentials.getSecret(key_vault , "powerbi-clientsecret")  


PBI_TENANT_NAME = tenant
PBI_ADMIN_API_CLIENT_ID = client
PBI_ADMIN_API_SECRET = client_secret

PBI_AUTHORITY = f'https://login.microsoftonline.com/{PBI_TENANT_NAME}'
PBI_SCOPES = ['https://analysis.windows.net/powerbi/api/.default']

TENANT_DIRECTOY = f'MyDomainName'

WORKSPACES_PER_CHUNK = 100
SCAN_TIMEOUT = 30
MAX_SCAN_STATUS_POLL = 10


def get_access_token():
    '''Returns an AAD token using MSAL'''

    response = None
    try:
        clientapp = msal.ConfidentialClientApplication(
            PBI_ADMIN_API_CLIENT_ID, authority=PBI_AUTHORITY, client_credential=PBI_ADMIN_API_SECRET)
        response = clientapp.acquire_token_silent(PBI_SCOPES, account=None)
        if not response:
            response = clientapp.acquire_token_for_client(scopes=PBI_SCOPES)

        try:
            return response['access_token']
        except KeyError:
            raise Exception(response['error_description'])

    except Exception as ex:
        raise Exception('Error retrieving Access token\n' + str(ex))


def scan_worspaces(access_token, excludePersonalWorkspaces=True):
    '''Gets a list of workspace IDs in the organization'''

    api_url = f'https://api.powerbi.com/v1.0/myorg/admin/workspaces/modified?excludePersonalWorkspaces={excludePersonalWorkspaces}'

    headers = {
        'Content-Type': 'application/json',
        'Authorization': 'Bearer ' + access_token
    }

    r = requests.get(api_url, headers=headers)

    r.raise_for_status()
    r.encoding = 'utf-8-sig'
    return r.json()


def get_workspace_info(access_token, workspaces):
    '''Initiate a call to receive metadata for the requested list of workspaces'''

    api_url = f'https://api.powerbi.com/v1.0/myorg/admin/workspaces/getInfo'
    api_url += '?datasetExpressions=True&datasetSchema=True&datasourceDetails=True&getArtifactUsers=True&lineage=True'

    headers = {'Authorization': f'Bearer {access_token}'} 

    r = requests.post(
        api_url,
        headers=headers,
        json={
            'workspaces': workspaces
        },
    )

    r.raise_for_status()
    r.encoding = 'utf-8-sig'
    return r.headers.get('location')


def get_scan_status(access_token, api_url):
    '''Gets scan status for the specified scan.'''

    headers = {'Authorization': f'Bearer {access_token}'} 
    
    r = requests.get(api_url, headers=headers)

    r.raise_for_status()
    response_data = r.json()
    response_data['location'] = r.headers.get('location')
    return response_data


def get_scan_result(access_token, api_url):
    '''Gets scan result for the specified scan (should be called only after getting status Succeeded in the scan status API). 
    Scan result will be available for up to 24 hours.'''

    headers = {'Authorization': f'Bearer {access_token}'} 

    r = requests.get(api_url, headers=headers)

    r.raise_for_status()
    r.encoding = 'utf-8-sig'
    return r.json()

# Get the Access token into Variable
access_token = get_access_token()

# Get a list of the workspaces that need to be scanned
workspaces = scan_worspaces(access_token)



StatementMeta(, b0b2ceee-c9e9-40d9-8087-f16b1e6e8fdf, 3, Finished, Available, Finished)

In [2]:
# This will get all the Workspaces data

scan_results = {'workspaces': []}

ws_len = len(workspaces)
ws_index = 0
print(f'Workspaces to scan: {ws_len}')
while ws_index < ws_len:
    ws_index_end = ws_index + WORKSPACES_PER_CHUNK
    print(f'Scanning workspaces {ws_index} to {ws_index_end}')
    workspaces_ids = [ws['id'] for ws in workspaces[ws_index:ws_index_end]]
    ws_index = ws_index_end
    scan_api_url = get_workspace_info(access_token, workspaces_ids)

    finish_poll = False
    poll_index = 0
    status_response = None
    while not finish_poll and poll_index < MAX_SCAN_STATUS_POLL:
        print(f'Waiting {SCAN_TIMEOUT} seconds')
        time.sleep(SCAN_TIMEOUT)
        status_response = get_scan_status(access_token, scan_api_url)
        finish_poll = status_response['status'] == 'Succeeded'
        poll_index += 1

    if status_response is not None:
        scan_results_partial = get_scan_result(access_token, status_response['location'])
        scan_results['workspaces'].extend(
            scan_results_partial['workspaces'])



StatementMeta(, b0b2ceee-c9e9-40d9-8087-f16b1e6e8fdf, 4, Finished, Available, Finished)

Workspaces to scan: 53
Scanning workspaces 0 to 100
Waiting 30 seconds


In [7]:
import datetime
from datetime import datetime,date,timedelta

# This will then write it out to a File.

# Create Directory if it does not exist
mssparkutils.fs.mkdirs("Files/Scanner_API/")

# Save file to Lakehouse Files
with open('/lakehouse/default/Files/Scanner_API/pbi_scannerapi_output_' + (datetime.today()).strftime('%Y%m%d') + '.json', 'wb') as file:
    json_data = json.dumps(scan_results)
    file.write(json_data.encode('utf-8'))



StatementMeta(, b0b2ceee-c9e9-40d9-8087-f16b1e6e8fdf, 9, Finished, Available, Finished)