## Create Dimension and Fact tables (Gold layer)

In [None]:
import dlt # dlt module, which is a Python package for working with Delta Lake
from pyspark.sql.functions import col, expr # used for data manipulation and transformation in Spark SQL

In [None]:
@dlt.table (
    {Dim1 table Schema} # To be filled with schema of the Dim1 table
) 
def Dim1():
    return dlt.read("Dim1_cleansed") # Dim1 table is defined by reading from cleansed data from silver layer using the dlt.read function.

@dlt.table (
    {Dim2 table Schema} # To be filled with schema of the Dim2 table
) 
def Dim2():
    return dlt.read("Dim2_cleansed") # Dim2 table is defined by reading from cleansed data from silver layer using the dlt.read function.

In [None]:
@dlt.table
def Fact_table(): # The Fact_table** is defined by joining the Dim1, Dim2, and cleansed data from silver layer using the join function and selecting necessary columns using the select function.
    s = dlt.read("Dim1").alias("s") #reads from Dim1 table
    p = dlt.read("Dim2").alias("p") #reads from Dim2 table
    c = dlt.read("Fact_cleansed").alias("c") #reads from Fact_cleansed table from silver layer
    return (
        s.join(p, s.key_column == p.key_column, "inner") # joins the three DataFrames s,p,c based on the common column key_column using inner joins. # Filled with actual key column name
        .join(c, s.key_column == c.key_column, "inner") # joins the three DataFrames s,p,c based on the common column key_column using inner joins. # Filled with actual key column name
        .select(
            "s.column1", #selects column1 from Dim1 # Filled with actual column name
            "c.column2", #selects column2 from Fact_cleansed # Filled with actual column name
            "p.column3", #selects column3 from Dim1 # Filled with actual column name
            col("s.old_column_name").cast("date").alias("new_column_name"), # The **col** functions is used to create new columns by casting a column to a date type.- # Filled with actual column name
            "s.column4", #selects column4 from Dim1 # Filled with actual column name
            "s.column5", #selects column5 from Dim1 # Filled with actual column name
            expr("s.column4 * s.column5").alias("new_column_name"), #expr function is used to create another new column by multiplying the "s.column4" and "s.column5" columns together. # Filled with actual column name
        )
    )
     
     

The resulting DataFrame from the Fact_table function will represent the joined and transformed data from the Delta tables "Dim1", "Dim2", and "Fact_cleansed".