### **1. Importing required pyspark and user defined modules**

In [None]:
# Importing AnalysisException for handling exceptions
from pyspark.sql import SparkSession
from pyspark.sql.utils import AnalysisException

In [None]:
%run "./custom_logging"

In [None]:
%run "./metadata"

In [None]:
%run "./spark_utils"

### **2. Creating widgets to receive the parameters from ADF**

In [None]:
# Creating widgets to receive the parameters from ADF
dbutils.widgets.text("job_name", "")
dbutils.widgets.text("file_location", "")
dbutils.widgets.text("log_target_dir", "")

job_name = dbutils.widgets.get("job_name")
file_location = dbutils.widgets.get("file_location")
log_target_dir = dbutils.widgets.get("log_target_dir")

### **3. Configuring the custom logger from custom_logging module**

In [None]:
# Creating object for the CustomLogger class
cust_log = CustomLogger()
logger, log_file = cust_log.custom_logger(job_name)

### **4. Staring pipeline log**

In [None]:
# Pipeline log
logger.info(f"JOB_NAME : {job_name}")
logger.info(f"The {job_name} for gold dimensions delta data load is started.")

### **5.  Establish SparkSession**

In [None]:
logger.info("Establishing sparksession with service principal authentication configs.")

try:
    spark = (
            SparkSession \
            .builder \
            .appName("Olist_Gold_Dims") \
            .config("fs.azure.account.auth.type.<storage-account-name>.dfs.core.windows.net", "OAuth") \
            .config("fs.azure.account.oauth.provider.type.<storage-account-name>.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") \
            .config("fs.azure.account.oauth2.client.id.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope="<secret-scope-name>", key="sp-client-id")) \
            .config("fs.azure.account.oauth2.client.secret.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope="<secret-scope-name>", key="sp-client-pwd")) \
            .config("fs.azure.account.oauth2.client.endpoint.<storage-account-name>.dfs.core.windows.net", f"https://login.microsoftonline.com/{dbutils.secrets.get(scope='<secret-scope-name>', key='sp-directory-id')}/oauth2/token") \
            .getOrCreate()
        )
except Exception as e:
    # Logging Exception details
    logger.critical(f"An unexpected error occurred: {str(e)}")

logger.info("Sparksession with service principal authentication configs established successfully.")

### **6. Main function definition**

In [None]:
def main():

    logger.info(f"In the {__name__} function.")

    # Creating metadata variable from the user defined modules
    logger.info("Creating metadata variable from the user defined modules.")

    meta_data = ReadJson(file_location)
    gold_dims_meta = meta_data.get_metadata()

    logger.info("Variable created successfully.")


    # Starting the Gold dims delta table data load
    logger.info("Starting data load for the dimension delta tables in Gold layer.")

    # Looping through the gold dims metadata variable 'gold_dims_meta' for performing the data load
    for item in gold_dims_meta:

        # Pipeline variables from gold_dims_meta metadata variable
        app_nm = item["app_nm"]
        src_table_name = item["src_table_name"]
        src_table_path = item["src_table_path"]
        tgt_table_name = item["tgt_table_name"]
        basePath = item["basePath"]

        logger.info(f"Data load started for dimension table '{tgt_table_name}'.")

        logger.info(f"""
                        app_nm      :   {app_nm}
                        src_table_name   :   {src_table_name}
                        src_table_path   :   {src_table_path}
                        tgt_table_name  :   {tgt_table_name}
                        basePath    :   {basePath}     
                        """)


        try: 
            # Calling the factory method from spark_utils module to instantiate the object based on the tgt_table_name
            logger.info(f"Calling the factory method from spark_utils module to instantiate the object based on the target table name '{tgt_table_name}'.")            
            factory = Factory.get_gold_dims(tgt_table_name)
            logger.info(f"Object instantiated successfully based on the target table name '{tgt_table_name}'.")                        


            # Creating pyspark dataframe from silver delta table
            logger.info(f"Creating pyspark dataframe from silver delta table '{src_table_name}'.")
            silver_df = factory.read_silver(spark, src_table_path)
            logger.info(f"Dataframe has been created successfully from silver delta table '{src_table_name}'.")


            # Performing SCD2/merge data load to the dimension delta table in Gold layer
            logger.info(f"Performing SCD2 data load using merge to the dimension delta table '{tgt_table_name}' in Gold layer.")
            factory.load_dimension(spark, basePath, silver_df)
            logger.info(f"Data loaded successfully to the Gold dimension delta table '{tgt_table_name}'.")


        except AnalysisException as e:
            # Logging AnalysisException details
            logger.error(f"AnalysisException Message : {str(e)}")
        
        except Exception as e:
            # Logging Exception details
            logger.critical(f"An unexpected error occurred: {str(e)}")

    # Shutting down the logger
    logger.info("Data load for the Gold dimension delta tables has been completed.")
    logging.shutdown()

### **7. Executing the main function**

In [None]:
if __name__ == "__main__":
    main()

    # Moving log file from DBFS to ADLS
    dbutils.fs.mv(f"file:/dbfs/{log_file}", log_target_dir)