### **1. Importing the required modules and functions.**

In [0]:
# Importing the required modules and functions
from pyspark.sql.functions import col, date_trunc, to_date, to_timestamp, current_timestamp
from delta.tables import DeltaTable
from abc import ABC, abstractmethod

### **2. Abstract base class for factory interface.**

In [0]:
# Creating abstract base class for factory interface
class Gold_Facts(ABC):
    # Basic representation of the data extraction & loading codes

    @abstractmethod
    def get_src_df_dict(self, spark, silver_src_tables, dim_src_tables):
        # Read and returns a dictionary of pyspark dataframes from the source Delta tables in Silver & Gold layer
        raise NotImplementedError("This method must be overridden by subclasses")

    @abstractmethod
    def transform_src_df(self, silver_df_dict, dims_df_dict):
        # Computes the necessary tranformation for the fact table with src_df_dict dictionary of pyspark dataframes
        raise NotImplementedError("This method must be overridden by subclasses")        

    @abstractmethod
    def load_fact(self, spark, basePath, src_df):
        # Performs data load for Fact Delta tables in Gold layer with source pyspark dataframe 
        raise NotImplementedError("This method must be overridden by subclasses")

### **3. Concrete classes and implementing the abstract methods in subclasses.**

In [0]:
class Sales(Gold_Facts):

    # Read and returns a dictionaries of pyspark dataframes from the source Delta tables in Silver & Gold layer
    def get_src_df_dict(self, spark, silver_src_tables, dim_src_tables):

        # Creates a dictionary of pyspark dataframes from the source Delta tables in Silver layer using dictionary comprehension
        silver_df_dict = {key : DeltaTable.forPath(spark, value).toDF() for (key, value) in silver_src_tables.items()}

        # Creates a dictionary of pyspark dataframes from the source Delta tables in Gold layer using dictionary comprehension
        dims_df_dict = {key : DeltaTable.forPath(spark, value).toDF().filter(col("is_current") == 1) for (key, value) in dim_src_tables.items()} 

        return silver_df_dict, dims_df_dict
    

    # Computes the necessary tranformation for the fact table with silver_df_dict, dims_df_dict dictionary of pyspark dataframes
    def transform_src_df(self, silver_df_dict, dims_df_dict):

        # Creates the sales pyspark dataframe using the dataframe dictionary
        sales_df = silver_df_dict["dim_olist_date"].alias("dt") \
            .join(dims_df_dict["dim_olist_orders"].alias("o"), (to_date(col("dt.date_skey"), "yyyyMMdd") == date_trunc("day", col("o.order_purchase_timestamp").cast("date")))) \
            .join(dims_df_dict["dim_olist_customers"].alias("c"), (col("o.customer_id") == col("c.customer_id"))) \
            .join(silver_df_dict["silver_olist_order_items"].alias("oi"), (col("o.order_id") == col("oi.order_id"))) \
            .join(dims_df_dict["dim_olist_products"].alias("p"), (col("oi.product_id") == col("p.product_id"))) \
            .join(dims_df_dict["dim_olist_sellers"].alias("s"), (col("p.seller_id") == col("s.seller_id"))) \
            .where(col("o.order_status") == "Delivered")

        sales_df = sales_df.select(
            col("dt.date_skey").alias("date_skey"),
            col("c.customer_skey").alias("customer_skey"),
            col("o.order_skey").alias("order_skey"),
            col("p.product_skey").alias("product_skey"),
            col("s.seller_skey").alias("seller_skey"),
            to_timestamp(col("o.order_purchase_timestamp"), "yyyy-MM-dd hh:mi:ss").alias("order_purchase_timestamp"),
            col("dt.cal_month_name").alias("order_purchase_month"),
            col("p.product_category_name_english").alias("product_category_name_english"),
            col("oi.item_quantity").cast("int").alias("sales_quantity"),
            col("p.price").cast("float").alias("price_per_unit"),
            col("p.freight_value").cast("float").alias("freight_value_per_unit"),
            ((col("p.price") * col("oi.item_quantity")) + ((col("p.freight_value") * col("oi.item_quantity")))).alias("net_sales_amount"),
            col("c.customer_state").alias("customer_state"),
            col("c.customer_state_code").alias("customer_state_code")
            )
        
        return sales_df


    # Performs upsert to the Sales fact delta table in Gold layer 
    def load_fact(self, spark, basePath, src_df):

        # Reading sales fact delta table from gold layer
        fact_olist_sales = DeltaTable.forPath(spark, f"abfss://{basePath}")

        # Apply upsert operation using merge
        fact_olist_sales.alias("tgt").merge(
        src_df.alias("sales_df"),
        """((
            (sales_df.date_skey = tgt.date_skey) AND
            (sales_df.customer_skey = tgt.customer_skey) AND
            (sales_df.order_skey = tgt.order_skey) AND
            (sales_df.product_skey = tgt.product_skey) AND
            (sales_df.seller_skey = tgt.seller_skey)
        ))""") \
        .whenMatchedUpdate(
        set = {
            "order_purchase_timestamp": "sales_df.order_purchase_timestamp",
            "order_purchase_month": "sales_df.order_purchase_month",
            "product_category_name_english": "sales_df.product_category_name_english",
            "sales_quantity": "sales_df.sales_quantity",
            "price_per_unit": "sales_df.price_per_unit",  
            "freight_value_per_unit": "sales_df.freight_value_per_unit",          
            "net_sales_amount": "sales_df.net_sales_amount",
            "customer_state": "sales_df.customer_state",
            "customer_state_code": "sales_df.customer_state_code",
            "load_date": current_timestamp()
        }
        ).whenNotMatchedInsert(
        values = {
            "date_skey": "sales_df.date_skey",            
            "customer_skey": "sales_df.customer_skey",
            "order_skey": "sales_df.order_skey",
            "product_skey": "sales_df.product_skey",
            "seller_skey": "sales_df.seller_skey",                                    
            "order_purchase_timestamp": "sales_df.order_purchase_timestamp",
            "order_purchase_month": "sales_df.order_purchase_month",
            "product_category_name_english": "sales_df.product_category_name_english",
            "sales_quantity": "sales_df.sales_quantity",
            "price_per_unit": "sales_df.price_per_unit",  
            "freight_value_per_unit": "sales_df.freight_value_per_unit",          
            "net_sales_amount": "sales_df.net_sales_amount",
            "customer_state": "sales_df.customer_state",
            "customer_state_code": "sales_df.customer_state_code",
            "load_date": current_timestamp()
        }
        ).execute()

        return True

In [0]:
class Payments(Gold_Facts):

    # Read and returns a dictionaries of pyspark dataframes from the source Delta tables in Silver & Gold layer
    def get_src_df_dict(self, spark, silver_src_tables, dim_src_tables):

        # Creates a dictionary of pyspark dataframes from the source Delta tables in Silver layer using dictionary comprehension
        silver_df_dict = {key : DeltaTable.forPath(spark, value).toDF() for (key, value) in silver_src_tables.items()}

        # Creates a dictionary of pyspark dataframes from the source Delta tables in Gold layer using dictionary comprehension
        dims_df_dict = {key : DeltaTable.forPath(spark, value).toDF().filter(col("is_current") == 1) for (key, value) in dim_src_tables.items()} 

        return silver_df_dict, dims_df_dict
    

    # Computes the necessary tranformation for the fact table with silver_df_dict, dims_df_dict dictionary of pyspark dataframes
    def transform_src_df(self, silver_df_dict, dims_df_dict):

        # Creates the payments pyspark dataframe using the dataframe dictionary
        payments_df = silver_df_dict["dim_olist_date"].alias("dt") \
            .join(dims_df_dict["dim_olist_orders"].alias("o"), (to_date(col("dt.date_skey"), "yyyyMMdd") == date_trunc("day", col("o.order_approved_at").cast("date")))) \
            .join(dims_df_dict["dim_olist_customers"].alias("c"), (col("o.customer_id") == col("c.customer_id"))) \
            .join(silver_df_dict["silver_olist_order_payments"].alias("op"), (col("o.order_id") == col("op.order_id")))


        payments_df = payments_df.select(
            col("dt.date_skey").alias("date_skey"),
            col("c.customer_skey").alias("customer_skey"),
            col("o.order_skey").alias("order_skey"),
            col("o.order_approved_at").alias("payment_approved_at"),
            col("dt.cal_month_name").alias("order_purchase_month"),
            col("op.payment_sequential").alias("payment_sequential"),
            col("op.payment_type").alias("payment_type"),
            col("op.payment_value").alias("payment_value"),
            col("o.order_status").alias("order_status")
            )
        
        return payments_df


    # Performs upsert to the Payments fact delta table in Gold layer 
    def load_fact(self, spark, basePath, src_df):

        # Reading payments fact delta table from gold layer
        fact_olist_order_payments = DeltaTable.forPath(spark, f"abfss://{basePath}")

        # Apply upsert operation using merge
        fact_olist_order_payments.alias("tgt").merge(
        src_df.alias("payments_df"),
        """((
            (payments_df.date_skey = tgt.date_skey) AND
            (payments_df.customer_skey = tgt.customer_skey) AND
            (payments_df.order_skey = tgt.order_skey) AND
            (payments_df.payment_sequential = tgt.payment_sequential)
        ))""") \
        .whenMatchedUpdate(
        set = {
            "payment_approved_at": "payments_df.payment_approved_at",
            "order_purchase_month": "payments_df.order_purchase_month",
            "payment_type": "payments_df.payment_type",
            "payment_value": "payments_df.payment_value",
            "order_status": "payments_df.order_status",
            "load_date": current_timestamp()
        }
        ).whenNotMatchedInsert(
        values = {
            "date_skey": "payments_df.date_skey",            
            "customer_skey": "payments_df.customer_skey",
            "order_skey": "payments_df.order_skey",
            "payment_approved_at": "payments_df.payment_approved_at",
            "order_purchase_month": "payments_df.order_purchase_month",
            "payment_sequential": "payments_df.payment_sequential",
            "payment_type": "payments_df.payment_type",
            "payment_value": "payments_df.payment_value",  
            "order_status": "payments_df.order_status",          
            "load_date": current_timestamp()
        }
        ).execute()

        return True

### **4. Factory class with static method**

In [0]:
# Factory class with static method
class Factory:

    # Method to return the instance based on the target fact table
    @staticmethod
    def get_gold_facts(fact: str) -> Gold_Facts:

        facts = {
        "Sales" : Sales(),
        "Payments" : Payments()
        }

        while True:
            # If fact is present in the facts dictionary it'll return the object of the concrete class
            if fact in facts:
                return facts[fact]
            
            print(f"Unknown fact : {fact}.")