### **1. Importing required pyspark and user defined modules**

In [None]:
# Importing AnalysisException for handling exceptions
from pyspark.sql import SparkSession
from pyspark.sql.utils import AnalysisException

In [None]:
%run "./custom_logging"

In [None]:
%run "./metadata"

In [None]:
%run "./spark_utils"

### **2. Creating widgets to receive the parameters from ADF**

In [None]:
# Creating widgets to receive the parameters from ADF
dbutils.widgets.text("job_name", "")
dbutils.widgets.text("file_location", "")
dbutils.widgets.text("log_target_dir", "")

job_name = dbutils.widgets.get("job_name")
file_location = dbutils.widgets.get("file_location")
log_target_dir = dbutils.widgets.get("log_target_dir")

### **3. Configuring the custom logger from custom_logging module**

In [None]:
# Creating object for the CustomLogger class
cust_log = CustomLogger()
logger, log_file = cust_log.custom_logger(job_name)

### **4. Staring pipeline log**

In [None]:
# Pipeline log
logger.info(f"JOB_NAME : {job_name}")
logger.info(f"The {job_name} for gold facts delta data load is started.")

### **5.  Establish SparkSession**

In [None]:
logger.info("Establishing sparksession with service principal authentication configs.")

try:
    spark = (
            SparkSession \
            .builder \
            .appName("Olist_Gold_Facts") \
            .config("fs.azure.account.auth.type.<storage-account-name>.dfs.core.windows.net", "OAuth") \
            .config("fs.azure.account.oauth.provider.type.<storage-account-name>.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") \
            .config("fs.azure.account.oauth2.client.id.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope="<secret-scope-name>", key="sp-client-id")) \
            .config("fs.azure.account.oauth2.client.secret.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope="<secret-scope-name>", key="sp-client-pwd")) \
            .config("fs.azure.account.oauth2.client.endpoint.<storage-account-name>.dfs.core.windows.net", f"https://login.microsoftonline.com/{dbutils.secrets.get(scope='<secret-scope-name>', key='sp-directory-id')}/oauth2/token") \
            .getOrCreate()
        )
except Exception as e:
    # Logging Exception details
    logger.critical(f"An unexpected error occurred: {str(e)}")

logger.info("Sparksession with service principal authentication configs established successfully.")

### **6. Main function definition**

In [None]:
def main():

    logger.info(f"In the {__name__} function.")

    # Creating metadata variable from the user defined modules
    logger.info("Creating metadata variable from the user defined modules.")

    meta_data = ReadJson(file_location)
    gold_facts_meta = meta_data.get_metadata()

    logger.info("Variable created successfully.")


    # Starting the Gold facts delta table data load
    logger.info("Starting data load for the fact delta tables in Gold layer.")

    # Looping through the gold facts metadata variable 'gold_facts_meta' for performing the data load
    for item in gold_facts_meta:

        # Pipeline variables from gold_facts_meta metadata variable
        fact = item["fact"]
        silver_src_tables = item["silver_src_tables"]
        dim_src_tables = item["dim_src_tables"]
        tgt_table_name = item["tgt_table_name"]
        basePath = item["basePath"]

        logger.info(f"Data load started for fact table '{tgt_table_name}'.")

        logger.info(f"""
                        fact      :   {fact}
                        silver_src_tables   :   {silver_src_tables}
                        dim_src_tables   :   {dim_src_tables}
                        tgt_table_name  :   {tgt_table_name}
                        basePath    :   {basePath}     
                        """)


        try: 
            # Calling the factory method from spark_utils module to instantiate the object based on the fact
            logger.info(f"Calling the factory method from spark_utils module to instantiate the object based on the fact '{fact}'.")            
            factory = Factory.get_gold_facts(fact)
            logger.info(f"Object instantiated successfully based on the fact name '{fact}'.")                        


            # Creating dictionary of pyspark dataframes from the source Delta tables in Silver & Gold layer using silver_src_tables & dim_src_tables 
            logger.info("Creating dictionary of pyspark dataframes from the source Delta tables in Silver & Gold layer using silver_src_tables & dim_src_tables.")
            silver_df_dict, dims_df_dict = factory.get_src_df_dict(spark, silver_src_tables, dim_src_tables)
            logger.info("Dictionary of pyspark dataframes has been created successfully.")

            
            # Performing the necessary tranformation for the fact table with silver_df_dict, dims_df_dict dictionary of pyspark dataframes
            logger.info("Performing the necessary tranformation for the fact table with silver_df_dict, dims_df_dict dictionary of pyspark dataframes.")
            src_df = factory.transform_src_df(silver_df_dict, dims_df_dict)
            logger.info("Tranformation for the fact table has been done successfully.")        


            # Performing upsert data load to the fact delta table in Gold layer using merge
            logger.info(f"Performing upsert data load using merge to the fact delta table '{tgt_table_name}' in Gold layer.")
            factory.load_fact(spark, basePath, src_df)
            logger.info(f"Data loaded successfully to the Gold fact delta table '{tgt_table_name}'.")


        except AnalysisException as e:
            # Logging AnalysisException details
            logger.error(f"AnalysisException Message : {str(e)}")
        
        except Exception as e:
            # Logging Exception details
            logger.critical(f"An unexpected error occurred: {str(e)}")

    # Shutting down the logger
    logger.info("Data load for the Gold fact delta tables has been completed.")
    logging.shutdown()

### **7. Executing the main function**

In [None]:
if __name__ == "__main__":
    main()

    # Moving log file from DBFS to ADLS
    dbutils.fs.mv(f"file:/dbfs/{log_file}", log_target_dir)