**Helper notebook**

In [47]:
%run nb_helper

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 93, Finished, Available, Finished)



**Define a logging dataframe**

In [48]:
dfLogging = pd.DataFrame(columns = ['LoadId','NotebookId', 'NotebookName', 'WorkspaceId', 'SourceWorkspaceName','TargetWorkspaceName','Item', 'CellId', 'Timestamp', 'ElapsedTime', 'Message', 'ErrorMessage'])
vContext = mssparkutils.runtime.context
vNotebookId = vContext["currentNotebookId"]
vLogNotebookName = vContext["currentNotebookName"]
vWorkspaceId = vContext["currentWorkspaceId"] # where the notebook is running, to not confuse with source and target workspaces

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 94, Finished, Available, Finished)

**Parameters --> convert to code for debugging the notebook. otherwise, keep commented as parameters are passed from DevOps pipelines**

pSourceWorkspaceId = "35f100c1-d910-482c-9763-aaf500918816"
pTargetWorkspaceId = "2fc80f23-1f8f-4c00-b2de-507863e8def4"
pTargetStage = "Stage3"
pDebugMode = "yes"
pProjectName = "fabric-cicd"
pMappingConnections = '[{"ConnectionStage0":"0c0702d4-9c1d-435a-a03e-9635e1fbded8","ConnectionStage1":"0c0702d4-9c1d-435a-a03e-9635e1fbded8","ConnectionStage2":"feb079dc-6fe7-4f0c-9537-33d7fa72fcb4","ConnectionStage3":"feb079dc-6fe7-4f0c-9537-33d7fa72fcb4"},{"ConnectionStage0":"a24fefc1-e5f4-4606-a3e1-a337b7056627","ConnectionStage1":"a24fefc1-e5f4-4606-a3e1-a337b7056627","ConnectionStage2":null,"ConnectionStage3":null},{"ConnectionStage0":"0c0702d4-9c1d-435a-a03e-9635e1fbded8","ConnectionStage1":"0c0702d4-9c1d-435a-a03e-9635e1fbded8","ConnectionStage2":null,"ConnectionStage3":null},{"ConnectionStage0":"b8d19a81-9f45-4eed-aef0-314a28c1b16f","ConnectionStage1":"b8d19a81-9f45-4eed-aef0-314a28c1b16f","ConnectionStage2":null,"ConnectionStage3":null},{"ConnectionStage0":"2c52c32b-1d27-4de6-852c-9fd8be27cad1","ConnectionStage1":"39e95e92-8338-4ad9-8a97-14b39388349b","ConnectionStage2":null,"ConnectionStage3":null},{"ConnectionStage0":"Sql.Database(''rs-synapse-dev-ondemand.sql.azuresynapse.net'', ''misc'')","ConnectionStage1":"Sql.Database(''rs-synapse-dev-ondemand.sql.azuresynapse.net'', ''misc'')","ConnectionStage2":"Sql.Database(''rs-synapse-dev-ondemand.sql.azuresynapse.net'', ''misc_new'')","ConnectionStage3":"Sql.Database(''rs-synapse-dev-ondemand.sql.azuresynapse.net'', ''misc_new'')"}]'


**Resolve source and target workspace**

In [50]:
vSourceWorkspaceName = fabric.resolve_workspace_name(pSourceWorkspaceId)
vTargetWorkspaceName = fabric.resolve_workspace_name(pTargetWorkspaceId)
vSourceWorkspaceId = pSourceWorkspaceId
vTargetWorkspaceId = pTargetWorkspaceId

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 96, Finished, Available, Finished)

**List of semantic models in source workspace**

In [52]:
df_source_semantic_models = fabric.list_datasets(workspace=vSourceWorkspaceName) 

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 98, Finished, Available, Finished)

**Verify that there is a least one semantic model in the source workspace**

In [53]:
if df_source_semantic_models.empty:
    vMessage = f"workspace <vSourceWorkspaceName> have 0 semantic model. post-update is not required."

    # Display an exit message
    display(Markdown("### ✅ Notebook execution stopped successfully!"))

    # Exit without error
    mssparkutils.notebook.exit(vMessage)

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 99, Finished, Available, Finished)

**Get the connections mapping between Stages and list existing fabric connections**

In [54]:
# get the mapping of connections between stages
mapping_connections_json = json.loads(pMappingConnections)
df_mapping_connections = pd.DataFrame(mapping_connections_json)

# get the list of existing connections in the tenant. the list will be used for lookups 
df_existing_connections = labs.list_connections()

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 100, Finished, Available, Finished)

**Functions**
- validate_stage_connection_id
- find_connection_id
- update_partition_source_expression

In [55]:
def validate_stage_connection_id(connectionId):

    if connectionId in df_existing_connections['Connection Id'].values:
        vMessage = f"connection id <{connectionId}> is valid>"
        print(f"{vMessage}") 
        vConnectionValidation = "valid"
    else:
        vMessage = f"connection id <{connectionId}> is unvalid>"
        print(f"{vMessage}") 
        vConnectionValidation = "unvalid"
    return vConnectionValidation

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 101, Finished, Available, Finished)

In [56]:
# function to find a connection id based on the target stage
# the csv provided with the mapping between stages is used, with the assomption of 4 stages maximun (dev, test, uat, prod)
def find_connection_id(devConnectionId, targetStage, validateConnection):

    global df_mapping_connections

    vMessage = f"dev connection id is <{devConnectionId}>"
    print(f"{vMessage}") 

    # filter the DataFrame based on a condition
    df_mapping_connections_filtered = df_mapping_connections[(df_mapping_connections['ConnectionStage1'] == devConnectionId)]

    # extract the value of a target connection id
    # if the target connection cannot be found assign it the dev connection to avoid breaking the json definition of the pipeline
    if not df_mapping_connections_filtered.empty:

        first_row = df_mapping_connections_filtered.iloc[0]  # Get the first matching row

        if targetStage == "Stage2":
            targetConnectionId = first_row["ConnectionStage2"]
        elif targetStage == "Stage3":
            targetConnectionId = first_row["ConnectionStage3"]
        else:
            targetConnectionId = first_row["ConnectionStage4"]

        # if the stage column in the mapping has no value, assing NA
        targetConnectionId = "NA" if pd.isna(targetConnectionId) or targetConnectionId == "" else targetConnectionId


        if validateConnection == "yes":

            # validate that the stage connection exists
            vConnectionValidation = validate_stage_connection_id(targetConnectionId)

            # if the validation of the connection fails , keep the dev connection
            if vConnectionValidation == "unvalid":
                targetConnectionId = devConnectionId

    else:
        
        vMessage = f"no valid connection found in the mapping matching the condition, source connection will be kept."
        print(f"{vMessage}") 

        # assign the dev connection to the target connection
        targetConnectionId = devConnectionId


    # return the found values
    return targetConnectionId

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 102, Finished, Available, Finished)

In [57]:
# function for semantic models where the connection to the source system and database is an M code
def update_partition_source_expression(obj, targetStage):
    
    # iterate on tables 
    for table in obj.get("model", {}).get("tables", []):

        # iterate on partitions
        for partition in table.get("partitions", []):

            # extract the source and from the source the expression
            source = partition.get("source", {})
            expression = source.get("expression", [])
            
            # M expression are multi lines, iterate over lines and extract the pattern that matches "Source = "
            for i, line in enumerate(expression):
                
                # pattern
                vMatch = re.match(r'\s*Source\s*=\s*(.*),', line)
                
                # if there is a match
                if vMatch:

                    # set the indentation
                    vIndentation = "    "

                    # get the connection
                    # Power BI has hundreds of connectors and each has specifics exprections
                    # extracting values based on each connector requires knowledge of the syntax
                    # for simplicity, extract the full expression
                    devConnectionId = vMatch.group(1).strip()
                    # print(devConnectionId)

                    # get the mapping connection expression
                    targetConnectionId = find_connection_id(devConnectionId=devConnectionId, targetStage=targetStage, validateConnection = 'no')
                    print(f"devConnectionId <{devConnectionId}>, targetConnectionId <{targetConnectionId}>")

                    # set the expression
                    expression[i] = f'{vIndentation}Source = {targetConnectionId},'
                    break  # stop the iteratin after the first match
    
    # return the 
    return obj

StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 103, Finished, Available, Finished)

**Update direct lake model lakehouse connection**

In [59]:
# get the list of semantic models in the workspace
df_target_semantic_models = fabric.list_datasets(workspace=vTargetWorkspaceName)

# iterate over each dataset in the dataframe
for index, row in df_target_semantic_models.iterrows():

    # get the semantic model name
    vSemanticModelName = row['Dataset Name']


    # update the connection of semantic models 
    try:

        # Check if the dataset is not the default semantic model
        if not labs.is_default_semantic_model(vSemanticModelName, vTargetWorkspaceId):
            
            print(f'updating semantic model <{vSemanticModelName}> connection in workspace <{vTargetWorkspaceName}>.')

            # check if the semantic model has a direct lake lakehouse
            try:
                vDatasetDirectLakehouse=labs.directlake.get_direct_lake_lakehouse(
                    dataset=vSemanticModelName, 
                    workspace= vTargetWorkspaceName,
                )
                vValidationDirectLake = "valid"
            
            except Exception as e:
                if "SQL Endpoint not found" in str(e):
                    vValidationDirectLake = "unvalid"
                    

            # if the semantic lake has a direct lake lakehouse, update the connection and refresh it
            if vValidationDirectLake == "valid":

                print(f'semantic model <{vSemanticModelName}> has a direct lake connection. using model.bim instead')
                
                # update the connection
                labs.directlake.update_direct_lake_model_connection(
                    dataset=vSemanticModelName, 
                    workspace= vTargetWorkspaceName,
                    source=labs.directlake.get_direct_lake_source(vSemanticModelName, workspace=vTargetWorkspaceName)[1], 
                    source_type=labs.directlake.get_direct_lake_source(vSemanticModelName, workspace=vTargetWorkspaceName)[0], 
                    source_workspace=vTargetWorkspaceName
                )
                
                # refresh the semantic mode (metadata only)
                labs.refresh_semantic_model(dataset=vSemanticModelName, workspace=vTargetWorkspaceName)

            else:
                print(f'semantic model <{vSemanticModelName}> has no direct lake connection. using the json structure instead')

                # get the current definition as in the source workspace
                semantic_model_json = labs.get_semantic_model_bim(dataset=vSemanticModelName, workspace=vSourceWorkspaceName)

                # print(json.dumps(semantic_model_json, indent=4))

                # replace M expressions using the connection mapping
                semantic_model_json_new = update_partition_source_expression(semantic_model_json, pTargetStage)

                
                # update the semantic model from the new json structure
                labs.update_semantic_model_from_bim(
                    dataset=vSemanticModelName, 
                    bim_file=semantic_model_json_new, 
                    workspace=vTargetWorkspaceName
                )
    
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vSemanticModelName, 'update semantic model connection', datetime.now(), None, vMessage, ''] 
    
    except Exception as e:
        vMessage = f"failed"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vSemanticModelName, 'update semantic model connection', datetime.now(), None, vMessage, str(e) ] 
        if pDebugMode == "yes":
            print(str(e))


StatementMeta(, 220708dc-1330-4d2e-a379-bb193810ff23, 105, Finished, Available, Finished)

updating semantic model <AdventureWorks> connection in workspace <ci-cd-prod-05>.
semantic model <AdventureWorks> has no direct lake connection. using the json structure instead
None
<re.Match object; span=(0, 51), match='    Source = Sql.Database(".", "AdventureWorksDW">
dev connection id is <Sql.Database(".", "AdventureWorksDW")>
Sql.Database(".", "AdventureWorksDW") Sql.Database("localhost", "AdventureWorksDW")
None
<re.Match object; span=(0, 51), match='    Source = Sql.Database(".", "AdventureWorksDW">
dev connection id is <Sql.Database(".", "AdventureWorksDW")>
Sql.Database(".", "AdventureWorksDW") Sql.Database("localhost", "AdventureWorksDW")
None
<re.Match object; span=(0, 51), match='    Source = Sql.Database(".", "AdventureWorksDW">
dev connection id is <Sql.Database(".", "AdventureWorksDW")>
Sql.Database(".", "AdventureWorksDW") Sql.Database("localhost", "AdventureWorksDW")
None
<re.Match object; span=(0, 51), match='    Source = Sql.Database(".", "AdventureWorksDW">
dev con

**Rebind reports**

In [None]:
# get the list of reports
df_target_reports = fabric.list_reports(workspace=vTargetWorkspaceName)

# iterate over the reports
for index, row in df_target_reports.iterrows():

    # get the semantic model name
    vReportName = row['Name']
    vSemanticModelId = row['Dataset Id']
    vSemanticModelName = fabric.resolve_item_name(item_id=vSemanticModelId, workspace=vTargetWorkspaceName)

    # update report connection
    try:
        print(f'rebinding report <{vReportName}> to semantic model <{vSemanticModelName}> in workspace <{vTargetWorkspaceName}>.')

        labs.report.report_rebind(
            report=vReportName,
            dataset=vSemanticModelName, 
            report_workspace=vTargetWorkspaceName, 
            dataset_workspace=vTargetWorkspaceName
        )


        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vReportName, 'update report connection', datetime.now(), None, vMessage, ''] 
    except Exception as e:
        vMessage = f"failed"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vReportName, 'update report connection', datetime.now(), None, vMessage, str(e) ] 
        if pDebugMode == "yes":
            print(str(e))


**Logging**

In [None]:
try:
    # perform the conversion of columns
    dfLogging = dfLogging.astype({
            "LoadId": "string",	
            "NotebookId": "string", 	
            "NotebookName": "string", 
            "WorkspaceId": "string", 
            "SourceWorkspaceName" : "string",
            "TargetWorkspaceName" : "string",
            "Item":"string",
            "CellId": "string", 
            "Timestamp": "datetime64[ns]", 
            "ElapsedTime": "string", 
            "Message": "string", 
            "ErrorMessage" : "string"
        })

    # save panda dataframe to a spark dataframe 
    sparkDF_Logging = spark.createDataFrame(dfLogging) 

    # save to the lakehouse
    sparkDF_Logging.write.mode("append").format("delta").option("mergeSchema", "true").saveAsTable("staging.notebook_logging_cicd")

except Exception as e:
    vMessage = "saving logs to the lakehouse failed"
    if pDebugMode == "yes":
        print(str(e))