**Helper notebook**

In [None]:
%run nb_helper

**Define a logging dataframe**

In [None]:
dfLogging = pd.DataFrame(columns = ['LoadId','NotebookId', 'NotebookName', 'WorkspaceId', 'SourceWorkspaceName','TargetWorkspaceName','Item', 'CellId', 'Timestamp', 'ElapsedTime', 'Message', 'ErrorMessage'])
vContext = mssparkutils.runtime.context
vNotebookId = vContext["currentNotebookId"]
vLogNotebookName = vContext["currentNotebookName"]
vWorkspaceId = vContext["currentWorkspaceId"] # where the notebook is running, to not confuse with source and target workspaces

**Parameters --> convert to code for debugging the notebook. otherwise, keep commented as parameters are passed from DevOps pipelines**

pSourceWorkspaceId = ""
pTargetWorkspaceId = ""
pTargetStage = "Stage2"
pDebugMode = "yes"
pProjectName = "fabric-cicd"
pMappingConnections = ''

**Resolve source and target workspace**

In [None]:
vSourceWorkspaceName = fabric.resolve_workspace_name(pSourceWorkspaceId)
vTargetWorkspaceName = fabric.resolve_workspace_name(pTargetWorkspaceId)
vSourceWorkspaceId = pSourceWorkspaceId
vTargetWorkspaceId = pTargetWorkspaceId

**List of data pipelines in source workspace**

In [None]:
df_source_data_pipelines = labs.list_data_pipelines(workspace=vSourceWorkspaceName)

**Verify that there is a least one data pipeline in the source workspace**

In [None]:
if df_source_data_pipelines.empty:
    vMessage = f"workspace <vSourceWorkspaceName> have 0 data pipeline. post-update is not required."

    # Display an exit message
    display(Markdown("### ✅ Notebook execution stopped successfully!"))

    # Exit without error
    mssparkutils.notebook.exit(vMessage)

**Get the connections mapping between Stages and list existing fabric connections**

In [None]:
# get the mapping of connections between stages
mapping_connections_json = json.loads(pMappingConnections)
df_mapping_connections = pd.DataFrame(mapping_connections_json)

# get the list of existing connections in the tenant. the list will be used for lookups 
df_existing_connections = labs.list_connections()

**Functions**
- validate_stage_connection_id
- find_connection_id
- update_pipeline_connections
- update_linked_services
- update_notebooks
- update_fabric_pipelines
- update_semantic_models
- update_data_pipeline_definition

In [None]:
def validate_stage_connection_id(connectionId):

    if connectionId in df_existing_connections['Connection Id'].values:
        vMessage = f"connection id <{connectionId}> is valid>"
        print(f"{vMessage}") 
        vConnectionValidation = "valid"
    else:
        vMessage = f"connection id <{connectionId}> is unvalid>"
        print(f"{vMessage}") 
        vConnectionValidation = "unvalid"
    return vConnectionValidation

In [None]:
# function to find a connection id based on the target stage
# the csv provided with the mapping between stages is used, with the assomption of 4 stages maximun (dev, test, uat, prod)
def find_connection_id(devConnectionId, targetStage):

    global df_mapping_connections

    vMessage = f"dev connection id is <{devConnectionId}>"
    print(f"{vMessage}") 

    # filter the DataFrame based on a condition
    df_mapping_connections_filtered = df_mapping_connections[(df_mapping_connections['ConnectionStage1'] == devConnectionId)]

    # extract the value of a target connection id
    # if the target connection cannot be found assign it the dev connection to avoid breaking the json definition of the pipeline
    if not df_mapping_connections_filtered.empty:

        first_row = df_mapping_connections_filtered.iloc[0]  # Get the first matching row

        if targetStage == "Stage2":
            targetConnectionId = first_row["ConnectionStage2"]

        elif targetStage == "Stage3":
            targetConnectionId = first_row["ConnectionStage3"]
        else:
            targetConnectionId = first_row["ConnectionStage4"]

        # if the stage column in the mapping has no value, assing NA
        targetConnectionId = "NA" if pd.isna(targetConnectionId) or targetConnectionId == "" else targetConnectionId

        # validate that the stage connection exists
        vConnectionValidation = validate_stage_connection_id(targetConnectionId)

        # if the validation of the connection fails , keep the dev connection
        if vConnectionValidation == "unvalid":
            targetConnectionId = devConnectionId

    else:
        
        vMessage = f"no valid connection found in the mapping matching the condition, source connection will be kept"
        print(f"{vMessage}") 

        # assign the dev connection to the target connection
        targetConnectionId = devConnectionId


    # return the found values
    return targetConnectionId

In [None]:
# function to parse the json of the pipeline and update connections
def update_pipeline_connections(obj, stage):

    if isinstance(obj, dict):
        for key, value in obj.items():
            # if the key is a connection
            if key == "connection":
                
                # find the dev connection id (Stage1) 
                devConnectionId = value

                # lookup the requested stage connection id
                targetConnectionId = find_connection_id(devConnectionId = devConnectionId, targetStage=stage)

                obj[key] = targetConnectionId
            else:
                update_pipeline_connections(value, stage)
    
    elif isinstance(obj, list):
        for item in obj:
            update_pipeline_connections(item, stage)
    
    # return pl_json

In [None]:
# function to parse the json of the pipeline and update the Warehouse and Lakehouse linked services
def update_linked_services(obj):

    if isinstance(obj, dict):  # If the object is a dictionary

        if "linkedService" in obj and isinstance(obj["linkedService"], dict):
            properties = obj["linkedService"].get("properties", {})
            
            if properties.get("type") == "DataWarehouse":
                
                type_properties = properties.get("typeProperties", {})

                # get the source values
                source_artifactId = type_properties.get("artifactId", "Not Found")
                source_workspaceId = type_properties.get("workspaceId", "Not Found")
                source_endpoint = type_properties.get("endpoint", "Not Found")

                # get the target values 
                source_artifact_name = fabric.resolve_item_name(item_id=source_artifactId, workspace=vSourceWorkspaceId)
                target_artifact_id = fabric.resolve_item_id(item_name=source_artifact_name, type='Warehouse', workspace=vTargetWorkspaceId)
                artifact_url  = f"v1/workspaces/{vTargetWorkspaceId}/warehouses/{target_artifact_id}"
                response = client.get(artifact_url)
                target_endpoint = response.json()['properties']['connectionString']
                target_values = {
                    "endpoint": f"{target_endpoint}",
                    "artifactId": f"{target_artifact_id}",
                    "workspaceId": f"{vTargetWorkspaceId}"
                }

                # update the properties using the target values
                type_properties["endpoint"] = target_values["endpoint"]
                type_properties["artifactId"] = target_values["artifactId"]
                type_properties["workspaceId"] = target_values["workspaceId"]

            if properties.get("type") == "Lakehouse":
                
                type_properties = properties.get("typeProperties", {})

                # get the source values
                source_artifactId = type_properties.get("artifactId", "Not Found")
                source_workspaceId = type_properties.get("workspaceId", "Not Found")


                # get the target values 
                source_artifact_name = fabric.resolve_item_name(item_id = source_artifactId, workspace=vSourceWorkspaceId)
                target_artifact_id = fabric.resolve_item_id(item_name = source_artifact_name, type='Lakehouse', workspace=vTargetWorkspaceId)
                target_values = {
                    "artifactId": f"{target_artifact_id}",
                    "workspaceId": f"{vTargetWorkspaceId}"
                }

                # update the properties using the target values
                type_properties["artifactId"] = target_values["artifactId"]
                type_properties["workspaceId"] = target_values["workspaceId"]
        
        # Recursively search all keys in the dictionary
        for key in obj:
            update_linked_services(obj[key])
    
    elif isinstance(obj, list):  # If the object is a list, iterate over elements
        for item in obj:
            update_linked_services(item)

In [None]:
# function to parse the json of the pipeline and update notebooks
def update_notebooks(obj):
    if isinstance(obj, dict):  # If the object is a dictionary
        if obj.get("type") == "TridentNotebook":
            type_properties = obj.get("typeProperties", {})

            # get the source values
            source_notebook_id = type_properties.get("notebookId", "Not Found")
            vSourceWorkspaceId = type_properties.get("workspaceId", "Not Found")

            # get the target values 
            source_notebook_name = fabric.resolve_item_name(item_id=source_notebook_id, workspace=vSourceWorkspaceId)
            target_notebook_id = fabric.resolve_item_id(item_name=source_notebook_name, type='Notebook', workspace=vTargetWorkspaceId)
            target_values = {
                "notebookId": f"{target_notebook_id}",
                "workspaceId": f"{vTargetWorkspaceId}"
            }

            # update the properties using the target values
            type_properties["notebookId"] = target_values["notebookId"]
            type_properties["workspaceId"] = target_values["workspaceId"]

        # Recursively search all keys in the dictionary
        for key in obj:
            update_notebooks(obj[key])

    elif isinstance(obj, list):  # If the object is a list, iterate over elements
        for item in obj:
            update_notebooks(item)



In [None]:
# function to parse the json of the pipeline and update invoked fabric pipelines
def update_fabric_pipelines(obj):
    if isinstance(obj, dict):  # If the object is a dictionary
        if obj.get("type") == "InvokePipeline":
            type_properties = obj.get("typeProperties", {})

            # get the source values
            operation_type = type_properties.get("operationType", "Not Found")

            if operation_type == "InvokeFabricPipeline":
                source_pipeline_id = type_properties.get("pipelineId", "Not Found")
                vSourceWorkspaceId = type_properties.get("workspaceId", "Not Found")

                # get the target values 
                source_pipeline_name = fabric.resolve_item_name(item_id=source_pipeline_id, workspace=vSourceWorkspaceId)
                target_pipeline_id = fabric.resolve_item_id(item_name=source_pipeline_name, type='DataPipeline', workspace=vTargetWorkspaceId)
                target_values = {
                    "pipelineId": f"{target_pipeline_id}",
                    "workspaceId": f"{vTargetWorkspaceId}"
                }

                # update the properties using the target values
                type_properties["pipelineId"] = target_values["pipelineId"]
                type_properties["workspaceId"] = target_values["workspaceId"]

        # Recursively search all keys in the dictionary
        for key in obj:
            update_fabric_pipelines(obj[key])

    elif isinstance(obj, list):  # If the object is a list, iterate over elements
        for item in obj:
            update_fabric_pipelines(item)

In [None]:
# function to parse the json of the pipeline and update semantic models
def update_semantic_models(obj):
    if isinstance(obj, dict):  # If the object is a dictionary
        if obj.get("type") == "PBISemanticModelRefresh":
            type_properties = obj.get("typeProperties", {})

            # get the source values
            operation_type = type_properties.get("operationType", "Not Found")

            source_dataset_id = type_properties.get("datasetId", "Not Found")
            vSourceWorkspaceId = type_properties.get("groupId", "Not Found")

            # get the target values 
            source_dataset_name = fabric.resolve_item_name(item_id=source_dataset_id, workspace=vSourceWorkspaceId)
            target_dataset_id = fabric.resolve_item_id(item_name=source_dataset_name, type='SemanticModel', workspace=vTargetWorkspaceId)
            target_values = {
                "datasetId": f"{target_dataset_id}",
                "groupId": f"{vTargetWorkspaceId}"
            }

            # update the properties using the target values
            type_properties["datasetId"] = target_values["datasetId"]
            type_properties["groupId"] = target_values["groupId"]

        # Recursively search all keys in the dictionary
        for key in obj:
            update_semantic_models(obj[key])

    elif isinstance(obj, list):  # If the object is a list, iterate over elements
        for item in obj:
            update_semantic_models(item)

In [None]:
# function to update the data pipeline definition
def update_data_pipeline_definition(
    name: str, pipelineContent: dict, workspace: Optional[str] = None
):
    """
    Updates an existing data pipeline with a new definition.

    Parameters
    ----------
    name : str
        The name of the data pipeline.
    pipelineContent : dict
        The data pipeline content (not in Base64 format).
    workspace : str, default=None
        The name of the workspace.
        Defaults to None which resolves to the workspace of the attached lakehouse
        or if no lakehouse attached, resolves to the workspace of the notebook.
    """

    # resolve the workspace name and id
    (vWorkspace, vWorkspaceId) = resolve_workspace_name_and_id(workspace)

    # get the pipeline payload
    vPipelinePayload = base64.b64encode(json.dumps(pipelineContent).encode('utf-8')).decode('utf-8')
    
    # resolve the pipeline id
    vPipelineId = fabric.resolve_item_id(item_name=name, type="DataPipeline", workspace=vWorkspace)

    # prepare the request body
    vRequestBody = {
        "definition": {
            "parts": [
                {
                    "path": "pipeline-content.json",
                    "payload": vPipelinePayload,
                    "payloadType": "InlineBase64"
                }
            ]
        }
    }

    # response
    vResponse = client.post(
        f"v1/workspaces/{vWorkspaceId}/items/{vPipelineId}/updateDefinition",
        json=vRequestBody,
    )

    lro(client, vResponse, return_status_code=True)

    print(f"{icons.green_dot} The '{name}' pipeline was updated within the '{vWorkspace}' workspace.")


**Replacement of linked services, connections, notebooks, fabric pipelines, etc..**

In [None]:

# get the list of data pipelines in the target workspace
df_pipeline = labs.list_data_pipelines(vTargetWorkspaceName)

# iterate over the data pipelines
for index, row in df_pipeline.iterrows():

    vPipelineName = row['Data Pipeline Name']

    # retrieve the pipeline json definition
    vPipelineJson = json.loads(labs.get_data_pipeline_definition(vPipelineName, vSourceWorkspaceName))
    # print(json.dumps(vPipelineJson, indent=4))


    # update linked services
    try:
        update_linked_services(vPipelineJson.get("properties", {}).get("activities", []))
        
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update linked services', datetime.now(), None, vMessage, ''] 
    except Exception as e:
        vMessage = f"failed"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update linked services', datetime.now(), None, vMessage, str(e) ] 
        if pDebugMode == "yes":
            print(str(e))
            

    # update connections
    try:
        update_pipeline_connections(vPipelineJson, pTargetStage)
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update connections', datetime.now(), None, vMessage, ''] 
    except Exception as e:
        vMessage = f"failed"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update connections', datetime.now(), None, vMessage, str(e) ] 
        if pDebugMode == "yes":
            print(str(e))

    # update notebooks
    try:
        update_notebooks(vPipelineJson)
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update notebooks', datetime.now(), None, vMessage, ''] 
    except Exception as e:
        vMessage = f"failed"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update notebooks', datetime.now(), None, vMessage, str(e) ] 
        if pDebugMode == "yes":
            print(str(e))

    # update fabric pipeline 
    try:
        update_fabric_pipelines(vPipelineJson)
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update fabric pipeline', datetime.now(), None, vMessage, ''] 
    except Exception as e:
        vMessage = f"failed"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update fabric pipeline', datetime.now(), None, vMessage, str(e) ] 
        if pDebugMode == "yes":
            print(str(e))

    # update semantic models
    try:
        update_semantic_models(vPipelineJson)
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update semantic models', datetime.now(), None, vMessage, ''] 
    except Exception as e:
        vMessage = f"failed"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update semantic models', datetime.now(), None, vMessage, str(e) ] 
        if pDebugMode == "yes":
            print(str(e))

    # update pipeline definition
    try:
        update_data_pipeline_definition(name=vPipelineName,pipelineContent=vPipelineJson, workspace=vTargetWorkspaceName)
        vMessage = f"succeeded"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update pipeline definition', datetime.now(), None, vMessage, ''] 
    except Exception as e:
        vMessage = f"failed"
        dfLogging.loc[len(dfLogging.index)] = [None, vNotebookId, vLogNotebookName, vWorkspaceId, vSourceWorkspaceName, vTargetWorkspaceName, vPipelineName, 'update pipeline definition', datetime.now(), None, vMessage, str(e) ] 
        if pDebugMode == "yes":
            print(str(e))



**Logging**

In [None]:
try:
    # perform the conversion of columns
    dfLogging = dfLogging.astype({
            "LoadId": "string",	
            "NotebookId": "string", 	
            "NotebookName": "string", 
            "WorkspaceId": "string", 
            "CellId": "string", 
            "Timestamp": "datetime64[ns]", 
            "ElapsedTime": "string", 
            "Message": "string", 
            "ErrorMessage" : "string"
        })

    # save panda dataframe to a spark dataframe 
    sparkDF_Logging = spark.createDataFrame(dfLogging) 

    # save to the lakehouse
    sparkDF_Logging.write.mode("append").format("delta").option("mergeSchema", "true").saveAsTable("staging.notebook_logging_cicd")

except Exception as e:
    vMessage = "saving logs to the lakehouse failed"
    if pDebugMode == "yes":
        print(str(e))