## Transformations (Silver layer)

In [None]:
new_schema = {Schema} #If you want to structure your data differently for the silver layer, you can specify the new schema in the "Schema" place-holder.

In [None]:
import dlt # dlt module, which is a Python package for working with Delta Lake
from pyspark.sql.functions import * # used for data manipulation and transformation in Spark SQL

In [None]:
@dlt.table
def Dim1_cleansed():
    return (
        dlt.read("Dim1_raw") # dlt.read() function is used to read data from a source i.e. bronze layer.
        .select(*) # selects all columns from the DataFrame read from the source.
        .withColumnRenamed('OldColumnName', 'New_Column_Name') #changing column name
        .withColumn("column_name", regexp_replace("column_name", "string_value", "new_string_value")) #replace part of a string with another string
        .withColumn("datetime_comun", from_unixtime("formatted_datettime_column")) #changes date-time column to unix date-time format
        .withColumn("column_name", from_json(col("column_name"), new_schema)) #changing the schema of a column in json 
        .withColumn("column_name", explode("column_name")) #exploding the array to get the individual rows
        {Tranformations} # This place-holder represents additional transformations that you want to make with respect to the data.
        )

In [None]:
@dlt.table
def Dim2_cleansed():
    return (
        dlt.read("Dim2_raw") # dlt.read() function is used to read data from a source i.e. bronze layer.
        .select(*) # selects all columns from the DataFrame read from the source.
        .withColumnRenamed('OldColumnName', 'New_Column_Name') #changing column name
        .withColumn("datetime_comun", from_unixtime("formatted_datettime_column")) #changes date-time column to unix date-time format 
        .withColumn("column_name", regexp_replace("column_name", "string_value", "new_string_value")) #replace part of a string with another string 
        .withColumn("column_name", from_json(col("column_name"), new_schema)) #changing the schema of a column in json 
        .withColumn("column_name", explode("column_name")) #exploding the array to get the individual rows
        {Tranformations} # This place-holder represents additional transformations that you want to make with respect to the data.
        )

In [None]:
@dlt.table
def Fact_cleansed():
    return (
        dlt.read("Fact_raw") # dlt.read() function is used to read data from a source i.e. bronze layer.
        .select(*) # selects all columns from the DataFrame read from the source.
        .withColumnRenamed('OldColumnName', 'New_Column_Name') #changing column name
        .withColumn("datetime_comun", from_unixtime("formatted_datettime_column")) #changes date-time column to unix date-time format 
        .withColumn("column_name", regexp_replace("column_name", "string_value", "new_string_value")) #replace part of a string with another string 
        .withColumn("column_name", from_json(col("column_name"), new_schema)) #changing the schema of a column in json 
        .withColumn("column_name", explode("column_name")) #exploding the array to get the individual rows
        {Tranformations} # This place-holder represents additional transformations that you want to make with respect to the data.
        )

> Note: - All the transformations you make on a table need to match with the new schema that you have declared at the beginning.