## Customers

In [None]:
df_customers = spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.crm_customers")
# Load the customers data as a delta table under Ops_Silver Schema
df_customers.write.mode("overwrite").saveAsTable("Ops_Silver.Customers")

## Companies

In [None]:
df_companies = spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.crm_companies")
# Load the companies data as a delta table under Ops_Silver Schema
df_companies.write.mode("overwrite").saveAsTable("Ops_Silver.Companies")

## Products

In [18]:
from pyspark.sql.functions import trim,col,expr

df_product = spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_product WHERE FinishedGoodsFlag=1 and SellEndDate IS NULL")

df_product=df_product.withColumnsRenamed({"Name":"ProductName"})

df_product = df_product.withColumn("Style", trim(col('Style')))
df_product = df_product.withColumn("ProductLine", trim(col('ProductLine')))


df_product=df_product. \
    withColumn(
        'Style', 
        expr("""
            CASE 
            WHEN Style = 'U' THEN 'Universal'
            WHEN Style = 'M' THEN 'Mens'
            WHEN Style = 'W' THEN 'Womens'
            ELSE 'None'
            END
            """)
    )

df_product=df_product. \
    withColumn(
        'ProductLine', 
        expr("""
            CASE 
            WHEN ProductLine = 'R' THEN 'Road'
            WHEN ProductLine = 'M' THEN 'Mountain'
            WHEN ProductLine = 'T' THEN 'Touring'
            WHEN ProductLine = 'S' THEN 'Standard'
            ELSE 'Not for Sale'
            END
            """)
    )

StatementMeta(, 7364c774-5593-43ab-84b2-8eb29830778c, 20, Finished, Available, Finished)

In [19]:
df_productcategory = spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_productcategory")
df_productcategory=df_productcategory.withColumnsRenamed({"Name":"ProductCategory"})

StatementMeta(, 7364c774-5593-43ab-84b2-8eb29830778c, 21, Finished, Available, Finished)

In [20]:
df_productsubcategory=spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_productsubcategory")
df_productsubcategory=df_productsubcategory.withColumnsRenamed({"Name":"ProductSubCategory"})

StatementMeta(, 7364c774-5593-43ab-84b2-8eb29830778c, 22, Finished, Available, Finished)

In [21]:
df_productmodel=spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_productmodel")
df_productmodel=df_productmodel.withColumnsRenamed({"Name":"ProductModelName"})

StatementMeta(, 7364c774-5593-43ab-84b2-8eb29830778c, 23, Finished, Available, Finished)

In [22]:
df_product_category_joined=df_productcategory.join(df_productsubcategory,df_productcategory.ProductCategoryID==df_productsubcategory.ProductCategoryID,"inner")
df_product_category_final=df_product_category_joined.select(df_product_category_joined.ProductCategory,df_product_category_joined.ProductSubCategory,df_product_category_joined.ProductSubcategoryID)

StatementMeta(, 7364c774-5593-43ab-84b2-8eb29830778c, 24, Finished, Available, Finished)

In [23]:
df_product_joined=df_product.join(df_product_category_final,df_product.ProductSubcategoryID==df_product_category_final.ProductSubcategoryID,"left")
df_product_joined_final=df_product_joined.join(df_productmodel,df_product_joined.ProductModelID==df_productmodel.ProductModelID,"left")
df_product_final=df_product_joined_final.select(df_product_joined_final.ProductID,df_product_joined_final.ProductNumber,df_product_joined_final.ProductName,df_product_joined_final.ProductCategory,df_product_joined_final.ProductSubCategory,df_product_joined_final.ProductLine,df_product_joined_final.Style,df_product_joined_final.Color,df_product_joined_final.SellStartDate,df_product_joined_final.MakeFlag,df_product_joined_final.ProductModelName)


StatementMeta(, 7364c774-5593-43ab-84b2-8eb29830778c, 25, Finished, Available, Finished)

In [24]:
# Load the transformed products dataframe in the silver layer into a delta table under Ops_Silver Schema
df_product_final.write.mode("overwrite").saveAsTable("Ops_Silver.Products")

StatementMeta(, 7364c774-5593-43ab-84b2-8eb29830778c, 26, Finished, Available, Finished)

## Sales

In [1]:
df_salesorderheader = spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_salesorderheader")

df_salesterritory=spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_salesterritory")
df_salesterritory=df_salesterritory.withColumnsRenamed({"TerritoryID":"SalesTerritoryID","Name":"SalesTerritoryName","CountryRegionCode":"SalesTerritoryCountry","Group":"SalesTerritoryGroup"})

df_salesorderheader_join_salesterritory=df_salesorderheader.join(df_salesterritory,df_salesorderheader.TerritoryID==df_salesterritory.SalesTerritoryID,"left")
df_salesorderheader_join_salesterritory=df_salesorderheader_join_salesterritory.select(df_salesorderheader_join_salesterritory.SalesOrderID,df_salesorderheader_join_salesterritory.OrderDate,df_salesorderheader_join_salesterritory.ShipDate,df_salesorderheader_join_salesterritory.OnlineOrderFlag,df_salesorderheader_join_salesterritory.AccountNumber,df_salesorderheader_join_salesterritory.CustomerID,df_salesorderheader_join_salesterritory.SalesOrderNumber,df_salesorderheader_join_salesterritory.SalesTerritoryID,df_salesorderheader_join_salesterritory.SalesTerritoryName,df_salesorderheader_join_salesterritory.SalesTerritoryCountry,df_salesorderheader_join_salesterritory.SalesTerritoryGroup)

df_salescustomer=spark.sql("SELECT CustomerID,PersonID,StoreID FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_salescustomer")
df_salesorderheader_final=df_salesorderheader_join_salesterritory.join(df_salescustomer,'CustomerID',"left")

StatementMeta(, 1edca27b-aa7f-4414-9a41-9a9003330092, 3, Finished, Available, Finished)

In [2]:
df_salesorderdetail = spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_salesorderdetail")
df_sales=df_salesorderdetail.join(df_salesorderheader_final,'SalesOrderID',"inner")
columns_to_drop=['CarrierTrackingNumber','rowguid','ModifiedDate']
df_sales=df_sales.drop(*columns_to_drop)

StatementMeta(, 1edca27b-aa7f-4414-9a41-9a9003330092, 4, Finished, Available, Finished)

In [3]:
# Load the transformed products dataframe in the silver layer into a delta table under Ops_Silver Schema
df_sales.write.mode("overwrite").saveAsTable("Ops_Silver.Sales")

StatementMeta(, 1edca27b-aa7f-4414-9a41-9a9003330092, 5, Finished, Available, Finished)

## Sales Reason

In [140]:
df_salesorderheadersalesreason = spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_salesorderheadersalesreason")
df_salesreason=spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.erp_salesreason")
df_salesreason_join=df_salesorderheadersalesreason.join(df_salesreason,'SalesReasonID',"left")
columns_to_drop=['ModifiedDate']
df_salesreason_join=df_salesreason_join.drop(*columns_to_drop)

StatementMeta(, , , Waiting, , Waiting)

In [None]:
# Load the transformed products dataframe in the silver layer into a delta table under Ops_Silver Schema
df_salesreason_join.write.mode("overwrite").saveAsTable("Ops_Silver.SalesReason")

StatementMeta(, , , Waiting, , Waiting)

In [None]:
df_customers = spark.sql("SELECT * FROM AdventureWorks_Lakehouse.Ops_Bronze.crm_customers")
# Load the customers data as a delta table under Ops_Silver Schema
df_customers.write.mode("overwrite").saveAsTable("Ops_Silver.Customers")

StatementMeta(, , , Waiting, , Waiting)