SalesLT_ProductModel 

In [0]:
# Enable auto merge

spark.sql("SET spark.databricks.delta.schema.autoMerge.enabled = true")

In [0]:
%run "/Workspace/Utils/Utils"

In [0]:
from pyspark.sql import DataFrame, Window
from pyspark.sql import functions as F
from pyspark.sql.types import (
    IntegerType, StringType, TimestampType, StructType, StructField
)
from pyspark.sql.functions import count
from pyspark.sql.functions import col, count
from pyspark.sql.functions import count, desc

In [0]:
#Loading Table

df = spark.table("adlslmcompany_bronze.managed_bronze.saleslt_productmodel")

In [0]:
df.display(10)

In [0]:
#Checking for ID duplicated values

checkduplicates(df, "ProductModelID" )

In [0]:
#Checking for Name duplicated values

checkduplicates(df, "Name" )

In [0]:
def silver_clean_productmodel(df): 


    #Deleting ireelevant columns
    df = df.drop("CatalogDescription" )


    # Adds transformation Date column
    df = df.withColumn("silves_transformed_timestamp", F.current_timestamp())


        #Cast to ensure datatype
    df = df.select(
         F.col('ProductModelID').cast(IntegerType()).alias('ProductModelID'),
         F.col('Name').cast(StringType()).alias('Name'), 
         F.col('rowguid').cast(StringType()).alias('rowguid'),
         F.col('ModifiedDate').cast(TimestampType()).alias('ModifiedDate'),
         F.col('bronze_ingestion_timestamp').cast(TimestampType()).alias('bronze_ingestion_timestamp'),
         F.col('silves_transformed_timestamp').cast(TimestampType()).alias('silves_transformed_timestamp'),
                 )
    

    return df

In [0]:
#Defining expected schema
expected_schema = StructType([
    StructField("ProductModelID", IntegerType(), False),             
    StructField("Name", StringType(), True),                  
    StructField("rowguid", StringType(), False),
    StructField("ModifiedDate", TimestampType(), False) ,
    StructField("bronze_ingestion_timestamp", TimestampType(), False),
    StructField("silves_transformed_timestamp", TimestampType(), False)
                            ])


In [0]:
#Transforming DF

silver_df = silver_clean_productmodel(df)

In [0]:
#Comparing lenghts

compare_lengths(df, silver_df)

In [0]:
#Checking the schema 
_validate_schema(silver_df, expected_schema)

**IMPORTANT: Please note that this is a simulated project; the upsert operation will be executed within this notebook. In a production environment, a dedicated notebook containing only the function and validations would be developed. All function notebooks would be orchestrated by Azure Data Factory (ADF) pipelines or Azure Databricks (ADB) workflows. The method of upsert may vary based on the utilization of auto loader, streaming, or Change Data Feed (CDF).**

In [0]:
#Loading into the Silver Layer    

target_table= "saleslt_ProductModel"   

schema = "managed_silver"

catalog = "adlslmcompany_silver"

primary_keys = ["ProductModelID"]


_upsert_silver_table(silver_df, target_table, primary_keys, schema, catalog )