# DAG silver tables update

The main goal of this notebook is to orchestrate and run multiple notebooks (Workspaces/Silver layer/silver-tables-update). It does this by retrieving all Bronze layer tables that are linked (shortcut) to the silver lakehouse and distributing them across multiple runs of the silver-tables-update notebook

In [None]:
import json

StatementMeta(, 51b5d780-d173-4323-9bb3-d6077e118848, 3, Finished, Available)

In [None]:
# Select all distinct tables name from tableschema_silver table. and convert them to a list
df_table_name = spark.sql("SELECT DISTINCT(TABLE_NAME) FROM silver_protheus.tableschema_silver")
column_values = [row['TABLE_NAME'].lower() for row in df_table_name.collect()]

StatementMeta(, 51b5d780-d173-4323-9bb3-d6077e118848, 4, Finished, Available)

In [None]:
# this function takes a list and split it into n parts

def split_list_into_n_parts(lst, n):

    part_len = len(lst) // n
    
    remainder = len(lst) % n
    
    start = 0
    
    parts = []
    
    for i in range(n):
        end = start + part_len + (1 if i < remainder else 0)
        parts.append(lst[start:end])
        start = end
    
    return parts




StatementMeta(, 51b5d780-d173-4323-9bb3-d6077e118848, 6, Finished, Available)

In [None]:
# Transform the list column_values into a list of lists with 5 items each. All table names are split into smaller lists to be distributed across multiple notebook runs.
splited_lists = split_list_into_n_parts(column_values,10)


StatementMeta(, 51b5d780-d173-4323-9bb3-d6077e118848, 7, Finished, Available)

In [None]:

'''
This script defines a Directed Acyclic Graph (DAG) representing a sequence of activities to be executed in this notebook.
The DAG consists of multiple activities, each corresponding to running the "silver-tables-update" notebook with different parameters. These activities are numbered sequentially from 1 to 5.
Each activity includes details such as the notebook path, timeout per cell, and notebook parameters, which are dynamically populated from a list called splited_lists.
After defining the DAG, the script invokes the runMultiple function from the mssparkutils.notebook module to execute the DAG. It also specifies options such as displaying the DAG via Graphviz with a spectral layout.
'''

# define all the DAG plan to be executed
DAG = {
    "activities": [
        {
            "name": "1-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[0]}"}, # notebook parameters
        },
        {
            "name": "2-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[1]}"}, # notebook parameters
        },
        {
            "name": "3-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[2]}"}, # notebook parameters
        },
        {
            "name": "4-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[3]}"}, # notebook parameters
        },
        {
            "name": "5-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[4]}"}, # notebook parameters
        },
                {
            "name": "6-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[5]}"}, # notebook parameters
        },
        {
            "name": "7-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[6]}"}, # notebook parameters
        },
        {
            "name": "8-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[7]}"}, # notebook parameters
        },
        {
            "name": "9-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[8]}"}, # notebook parameters
        },
        {
            "name": "10-silver-ingestion", # activity name, must be unique
            "path": "silver-tables-update", # notebook path
            "timeoutPerCellInSeconds": 800, # max timeout for each cell, default to 90 seconds
            "args": {"tables_names": f"{splited_lists[9]}"}, # notebook parameters
        }
    ],
    "timeoutInSeconds": 5000,
    "concurrency": 10
}


# Execute the DAG and save the log into log_mensage
log_mensage = mssparkutils.notebook.runMultiple(DAG, {"displayDAGViaGraphviz":True, "DAGLayout":"spectral", "DAGSize":15})


StatementMeta(, 51b5d780-d173-4323-9bb3-d6077e118848, 8, Finished, Available)

In [None]:
# exit the log_mensage as an output from this notebook to be used in the pipeline
output = {}
output['successes'] = f'{log_mensage}'

mssparkutils.notebook.exit(output)


StatementMeta(, 51b5d780-d173-4323-9bb3-d6077e118848, 9, Finished, Available)

ExitValue: {'successes': "{'1-silver-ingestion': {'exitVal': '', 'exception': None}, '2-silver-ingestion': {'exitVal': '', 'exception': None}, '3-silver-ingestion': {'exitVal': '', 'exception': None}, '4-silver-ingestion': {'exitVal': '', 'exception': None}, '5-silver-ingestion': {'exitVal': '', 'exception': None}}"}

In [None]:
#just list all avaiable notebooks in this workspace
import sempy.fabric as fabric
notebooks = fabric.list_items().query("Type == 'Notebook'")
notebooks

StatementMeta(, , , Cancelled, )