### **1. Importing required pyspark and user defined modules**

In [None]:
# Importing AnalysisException for handling exceptions
from pyspark.sql import SparkSession
from pyspark.sql.utils import AnalysisException

In [None]:
%run "./custom_logging"

In [None]:
%run "./metadata"

In [None]:
%run "./spark_utils"

### **2. Creating widgets to receive the parameters from ADF**

In [None]:
# Creating widgets to receive the parameters from ADF
dbutils.widgets.text("job_name", "")
dbutils.widgets.text("file_location", "")
dbutils.widgets.text("log_target_dir", "")

job_name = dbutils.widgets.get("job_name")
file_location = dbutils.widgets.get("file_location")
log_target_dir = dbutils.widgets.get("log_target_dir")

### **3. Configuring the custom logger from custom_logging module**

In [None]:
# Creating object for the CustomLogger class
cust_log = CustomLogger()
logger, log_file = cust_log.custom_logger(job_name)

### **4. Staring pipeline log**

In [None]:
# Pipeline log
logger.info(f"JOB_NAME : {job_name}")
logger.info(f"The {job_name} for bronze archive delta data load is started.")

### **5.  Establish SparkSession**

In [None]:
logger.info("Establishing sparksession with service principal authentication configs.")

try:
    spark = (
            SparkSession \
            .builder \
            .appName("Olist_Bronze_Archive") \
            .config("fs.azure.account.auth.type.<storage-account-name>.dfs.core.windows.net", "OAuth") \
            .config("fs.azure.account.oauth.provider.type.<storage-account-name>.dfs.core.windows.net", "org.apache.hadoop.fs.azurebfs.oauth2.ClientCredsTokenProvider") \
            .config("fs.azure.account.oauth2.client.id.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope="<secret-scope-name>", key="sp-client-id")) \
            .config("fs.azure.account.oauth2.client.secret.<storage-account-name>.dfs.core.windows.net", dbutils.secrets.get(scope="<secret-scope-name>", key="sp-client-pwd")) \
            .config("fs.azure.account.oauth2.client.endpoint.<storage-account-name>.dfs.core.windows.net", f"https://login.microsoftonline.com/{dbutils.secrets.get(scope='<secret-scope-name>', key='sp-directory-id')}/oauth2/token") \
            .getOrCreate()
        )
except Exception as e:
    # Logging Exception details
    logger.critical(f"An unexpected error occurred: {str(e)}")

logger.info("Sparksession with service principal authentication configs established successfully.")

### **6. Main function definition**

In [None]:
def main():

    logger.info(f"In the {__name__} function.")

    #Creating objects for the user defined modules
    logger.info("Creating objects for the user defined modules.")

    meta_data = ReadJson(file_location)
    spark_utils = SparkUtils()
    archive_meta = meta_data.get_metadata()

    logger.info("Objects created successfully.")

        
    # Starting the bronze archive delta table data load
    logger.info("Starting data load for the Bronze archive delta tables.")

    # Looping through the archive metadata variable 'archive_meta' for performing the data load
    for item in archive_meta:

        # Pipeline variables from archive_meta metadata variable
        app_nm = item["app_nm"]
        file_name = item["file_name"]
        file_path = item["file_path"]
        columns = item["columns"]
        table_name = item["table_name"]
        basePath = item["basePath"]

        logger.info(f"Data load started for application '{app_nm}'.")

        logger.info(f"""
                        app_nm      :   {app_nm}
                        file_name   :   {file_name}
                        file_path   :   {file_path}
                        columns     :   {columns}
                        table_name  :   {table_name}
                        basePath    :   {basePath}     
                        """)

        try: 
            # Creating pyspark dataframe from bronze staging table
            logger.info(f"Creating pyspark dataframe from bronze staging parquet file '{file_name}'.")
            df = spark_utils.read_parquet_file(spark, file_path, columns)
            logger.info(f"Dataframe has been created successfully from bronze staging parquet file '{file_name}'.")


            # Adding load_date audit column to the dataframe
            logger.info(f"Adding load_date audit column to the dataframe.")
            bronze_df = spark_utils.add_audit_column(df)
            logger.info(f"Audit column added to the dataframe successfully.")


            # Deleting 1 year older and current day records if any from the bronze archive delta table.
            logger.info(f"Deleting 1 year older records and current day records if any from the '{table_name}' table.")
            spark_utils.del_archive_records(spark, basePath)
            logger.info(f"Deleted 1 year older records and current day records if any from the '{table_name}' table.")


            # Loading data to the bronze archive delta table
            logger.info(f"Loading data to the bronze archive delta table '{table_name}'.")
            spark_utils.load_archive_table(bronze_df, basePath)
            logger.info(f"Data loaded successfully to the bronze archive delta table '{table_name}'.")


        except AnalysisException as e:
            # Logging AnalysisException details
            logger.error(f"AnalysisException Message : {str(e)}")
        
        except Exception as e:
            # Logging Exception details
            logger.critical(f"An unexpected error occurred: {str(e)}")

    # Shutting down the logger
    logger.info("Data load for the Bronze archive delta tables has been completed.")
    logging.shutdown()

### **6. Executing the main function**

In [None]:
if __name__ == "__main__":
    main()
    
    # Moving log file from DBFS to ADLS
    dbutils.fs.mv(f"file:/dbfs/{log_file}", log_target_dir)

'\nif __name__ == "__main__":\n    main()\n    \n    # Moving log file from DBFS to ADLS\n    dbutils.fs.mv(f"file:/dbfs/{log_file}", log_target_dir)\n'