# *Silver Sales Tables Transformations*

In [4]:
from pyspark.sql.functions import *
from pyspark.sql.types import *
from delta.tables import DeltaTable

### Sales Header

In [None]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/sales/SalesOrderHeader")

In [None]:
df = df.drop(col("CreditCardApprovalCode"), col("CurrencyRateID"), col("Comment"), col("rowguid"))

In [None]:
if spark.catalog.tableExists("Silver_Sales.SalesOrderHeader"):
    dlt_obj = DeltaTable.forName(spark, "Silver_Sales.SalesOrderHeader")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.SalesOrderID = src.SalesOrderID")\
            .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
            .whenNotMatchedInsertAll()\
            .execute()
else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/SalesOrderHeader")\
            .saveAsTable("Silver_Sales.SalesOrderHeader")

### Sales Details

In [None]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/sales/SalesOrderDetail")

In [None]:
df = df.drop(col("rowguid"))

In [None]:
if spark.catalog.tableExists("silver_sales.SalesOrderDetail"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.SalesOrderDetail")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.SalesOrderID = src.SalesOrderID AND trg.SalesOrderDetailID = src.SalesOrderDetailID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()
else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/SalesOrderDetail")\
            .saveAsTable("silver_sales.SalesOrderDetail")

### Customers

In [None]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/sales/Customer')

In [None]:
df_reseller = df.filter(col("StoreID").isNotNull())
df_reseller = df_reseller.drop(col("rowguid"))

In [None]:
df_retailCust = df.filter(col("StoreID").isNull())
df_retailCust = df_retailCust.drop(col("rowguid"))

In [None]:
if spark.catalog.tableExists("silver_sales.Reseller"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.Reseller")
    dlt_obj.alias("trg").merge(df_reseller.alias("src"), "trg.CustomerID = src.CustomerID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df_reseller.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/Reseller")\
            .saveAsTable("silver_sales.Reseller")

In [None]:
if spark.catalog.tableExists("silver_sales.RetailCustomer"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.RetailCustomer")
    dlt_obj.alias("trg").merge(df_retailCust.alias("src"), "trg.CustomerID = src.CustomerID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df_reseller.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/RetailCustomer")\
            .saveAsTable("silver_sales.RetailCustomer")

### BusinessEntityAddress

In [None]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/person/BusinessEntityAddress')

df = df.drop(col("rowguid"))

if spark.catalog.tableExists("silver_sales.BusinessEntityAddress"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.BusinessEntityAddress")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.BusinessEntityID = src.BusinessEntityID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df.write.format("delta")\
        .mode("append")\
        .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/BusinessEntityAddress")\
        .saveAsTable("silver_sales.BusinessEntityAddress")


In [None]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/person/vw_Address_Clean')

df = df.drop(col("rowguid"), col("AddressLine2"), col("SpatialLocation"))

if spark.catalog.tableExists("silver_sales.Address"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.Address")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.AddressID = src.AddressID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df.write.format("delta")\
        .mode("append")\
        .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/Address")\
        .saveAsTable("silver_sales.Address")


In [None]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/person/AddressType')

df = df.drop(col("rowguid"))

if spark.catalog.tableExists("silver_sales.AddressType"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.AddressType")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.AddressTypeID = src.AddressTypeID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df.write.format("delta")\
        .mode("append")\
        .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/AddressType")\
        .saveAsTable("silver_sales.AddressType")


In [None]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/person/StateProvince')

df = df.drop(col("rowguid"))

if spark.catalog.tableExists("silver_sales.StateProvince"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.StateProvince")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.StateProvinceID = src.StateProvinceID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df.write.format("delta")\
        .mode("append")\
        .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/StateProvince")\
        .saveAsTable("silver_sales.StateProvince")


In [None]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/person/CountryRegion')

if spark.catalog.tableExists("silver_sales.CountryRegion"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.CountryRegion")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.CountryRegionCode = src.CountryRegionCode")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df.write.format("delta")\
        .mode("append")\
        .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/CountryRegion")\
        .saveAsTable("silver_sales.CountryRegion")


In [None]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/sales/SalesTerritory')

df = df.drop(col("SalesYTD"), col("SalesLastYear"), col("CostYTD"), col("CostLastYear"), col("rowguid"))

if spark.catalog.tableExists("silver_sales.SalesTerritory"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.SalesTerritory")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.TerritoryID = src.TerritoryID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df.write.format("delta")\
        .mode("append")\
        .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/SalesTerritory")\
        .saveAsTable("silver_sales.SalesTerritory")


In [3]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/person/EmailAddress')

df = df.drop(col("rowguid"))

if spark.catalog.tableExists("silver_sales.EmailAddress"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.EmailAddress")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.BusinessEntityID = src.BusinessEntityID AND trg.EmailAddressID = src.EmailAddressID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df.write.format("delta")\
        .mode("append")\
        .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/EmailAddress")\
        .saveAsTable("silver_sales.EmailAddress")


### Store

In [None]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/sales/Store')

df = df.drop(col("rowguid"), col("Demographics"))

if spark.catalog.tableExists("silver_sales.Store"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.Store")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.BusinessEntityID = src.BusinessEntityID")\
                .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                .whenNotMatchedInsertAll()\
                .execute()
else:
    df.write.format("delta")\
        .mode("append")\
        .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/Store")\
        .saveAsTable("silver_sales.Store")


### SalesReason

In [None]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/sales/SalesOrderHeaderSalesReason")

if spark.catalog.tableExists("silver_sales.OrderSalesReason"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.OrderSalesReason")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.SalesOrderID = src.SalesOrderID AND trg.SalesReasonID = src.SalesReasonID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()

else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/OrderSalesReason")\
            .saveAsTable("silver_sales.OrderSalesReason")            

In [None]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/sales/SalesReason")

if spark.catalog.tableExists("silver_sales.SalesReason"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.SalesReason")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.SalesReasonID = src.SalesReasonID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()

else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/SalesReason")\
            .saveAsTable("silver_sales.SalesReason")            

### Special Offer

In [None]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/sales/SpecialOffer")

df = df.drop(col("rowguid"))

if spark.catalog.tableExists("silver_sales.SpecialOffer"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.SpecialOffer")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.SpecialOfferID = src.SpecialOfferID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()

else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/SpecialOffer")\
            .saveAsTable("silver_sales.SpecialOffer")            

### Perosn

In [18]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/person/Person")

df = df.drop(col("rowguid"), col("AdditionalContactInfo"), col("Demographics"), col("Suffix"))


df = df.withColumn("PersonType", when(col("PersonType") == "IN", "Individual Retail Customer")\
                            .when(col("PersonType") == "SC", "Store Contact")\
                            .when(col("PersonType") == "EM", "Employee")\
                            .when(col("PersonType") == "SP", "Sales Person")\
                            .when(col("PersonType") == "VC", "Vendor Contact")\
                            .when(col("PersonType") == "GC", "General Contact"))

if spark.catalog.tableExists("silver_sales.Person"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.Person")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.BusinessEntityID = src.BusinessEntityID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()

else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/Person")\
            .saveAsTable("silver_sales.Person")            

In [8]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/person/PersonPhone")

if spark.catalog.tableExists("silver_sales.PersonPhone"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.PersonPhone")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.BusinessEntityID = src.BusinessEntityID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()

else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/PersonPhone")\
            .saveAsTable("silver_sales.PersonPhone")            

In [9]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/person/PhoneNumberType")

if spark.catalog.tableExists("silver_sales.PhoneNumberType"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.PhoneNumberType")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.PhoneNumberTypeID = src.PhoneNumberTypeID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()

else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/person/PhoneNumberType")\
            .saveAsTable("silver_sales.PhoneNumberType")            

In [8]:
df = spark.read.format("parquet")\
            .load("abfss://bronze@dlcontoso.dfs.core.windows.net/sales/SalesPerson")

df = df.drop(col("SalesYTD"), col("SalesLastYear"), col("rowguid"))

if spark.catalog.tableExists("silver_sales.SalesPerson"):
    dlt_obj = DeltaTable.forName(spark, "silver_sales.SalesPerson")
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.BusinessEntityID = src.BusinessEntityID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()

else:
    df.write.format("delta")\
            .mode("append")\
            .option("path", "abfss://silver@dlcontoso.dfs.core.windows.net/sales/SalesPerson")\
            .saveAsTable("silver_sales.SalesPerson")            

### Product

In [31]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/production/Product')

df = df.drop(col("rowguid"))

df = df.withColumn("ProductLine", trim(col("ProductLine")))\
        .withColumn("Class", trim(col("Class")))\
        .withColumn("Style", trim(col("Style")))

df = df.withColumn("ProductLine", when(col("ProductLine") == "R", "Road")\
                                    .when(col("ProductLine") == "M", "Mountain")\
                                    .when(col("ProductLine") == "T", "Touring")\
                                    .when(col("ProductLine") == "S", "Standard")\
                                    .otherwise("General")
                )

df = df.withColumn("Class", when(col("Class") == "H", "High")\
                            .when(col("Class") == "M", "Medium")\
                            .when(col("Class") == "L", "Low")\
                            .otherwise("Other")
                )

df = df.withColumn("Style", when(col("Style") == "M", "Men")\
                            .when(col("Style") == "W", "Women")\
                            .when(col("Style") == "U", "Universal")\
                            .otherwise("No-Style")
                )

if spark.catalog.tableExists('silver_sales.Product'):
    dlt_obj = DeltaTable.forName(spark, 'silver_sales.Product')
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.ProductID = src.ProductID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()
else:
    df.write.format('delta')\
            .mode('append')\
            .option('path', 'abfss://silver@dlcontoso.dfs.core.windows.net/production/Product')\
            .saveAsTable('silver_sales.Product')

In [32]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/production/ProductSubcategory')

df = df.drop(col("rowguid"))


if spark.catalog.tableExists('silver_sales.ProductSubcategory'):
    dlt_obj = DeltaTable.forName(spark, 'silver_sales.ProductSubcategory')
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.ProductSubcategoryID = src.ProductSubcategoryID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()
else:
    df.write.format('delta')\
            .mode('append')\
            .option('path', 'abfss://silver@dlcontoso.dfs.core.windows.net/production/ProductSubcategory')\
            .saveAsTable('silver_sales.ProductSubcategory')

In [33]:
df = spark.read.format('parquet')\
            .load('abfss://bronze@dlcontoso.dfs.core.windows.net/production/ProductCategory')

df = df.drop(col("rowguid"))

if spark.catalog.tableExists('silver_sales.ProductCategory'):
    dlt_obj = DeltaTable.forName(spark, 'silver_sales.ProductCategory')
    dlt_obj.alias("trg").merge(df.alias("src"), "trg.ProductCategoryID = src.ProductCategoryID")\
                        .whenMatchedUpdateAll(condition="src.ModifiedDate > trg.ModifiedDate")\
                        .whenNotMatchedInsertAll()\
                        .execute()
else:
    df.write.format('delta')\
            .mode('append')\
            .option('path', 'abfss://silver@dlcontoso.dfs.core.windows.net/production/ProductCategory')\
            .saveAsTable('silver_sales.ProductCategory')