# Create Fact Table

### Reading Silver Data

In [0]:
df_silver = spark.sql("SELECT * FROM parquet.`abfss://silver@datalake.dfs.core.windows.net/carsales`")

df_silver.display()

## Reading all the Dimensions (DIMS)

In [0]:
df_dealer = spark.sql("SELECT * FROM carsproject_catalog.gold.dim_dealer")

df_branch = spark.sql("SELECT * FROM carsproject_catalog.gold.dim_branch")

df_model = spark.sql("SELECT * FROM carsproject_catalog.gold.dim_model")

df_date = spark.sql("SELECT * FROM carsproject_catalog.gold.dim_date2")

### Bringing Keys to the FACT table

In [0]:
df_fact = df_silver.join(df_branch, df_silver.Branch_ID == df_branch.Branch_ID, how="left")\
               .join(df_dealer, df_silver.Dealer_ID == df_dealer.Dealer_ID, how="left")\
               .join(df_model, df_silver.Model_ID == df_model.Model_ID, how="left")\
                .join(df_date, df_silver.Date_ID == df_date.Date_ID, how="left")\
                .select(df_silver.Revenue, df_silver.Units_Sold, df_silver.Revenue_Per_Unit, df_branch.dim_branch_key, df_dealer.dim_dealer_key, df_model.dim_model_key, df_date.dim_date_key)

In [0]:
display(df_fact)

## Writing Fact Table

In [0]:
from delta.tables import DeltaTable

In [0]:
if spark.catalog.tableExists('fact_sales'):
    deltatbl = DeltaTable.forName(spark, 'carsproject_catalog.gold.fact_sales')

    deltatbl.alias('target').merge(df_fact.alias('source'), 'target.dim_branch_key = source.dim_branch_key AND target.dim_dealer_key = source.dim_dealer_key AND target.dim_model_key = source.dim_model_key AND target.dim_date_key = source.dim_date_key')\
        .whenMatchedUpdateAll()\
        .whenNotMatchedInsertAll()\
        .execute()

else:
    df_fact.write.format('delta')\
        .mode('overwrite')\
        .option("path", "abfss://gold@datalake.dfs.core.windows.net/fact_sales")\
        .saveAsTable("carsproject_catalog.gold.fact_sales")

In [0]:
%sql
SELECT * FROM carsproject_catalog.gold.fact_sales