The silver layer is an intermediate layer where data is refined, cleansed, and transformed for analysis.
The code below defines three functions (`data1_cleansed`, `data2_cleansed`, and `data3_cleansed`) that load and transform data into respective Delta tables (`data1_cleansed`, `data2_cleansed`, and `data3_cleansed`). Each function follows a similar structure and performs the following steps:

1. Reads the data from a streaming source using `dlt.read_stream` for `data1_cleansed`, `data2_cleansed` and `data3_cleansed`.
2. Selects all columns from the source data using `.select(*)`.
3. Converts a formatted datetime column to Unix datetime format using `withColumn("datetime_comun", from_unixtime("formatted_datettime_column"))`.
4. Renames a column from 'OldColumnName' to 'New_Column_Name' using `withColumnRenamed`.
5. Replaces a specific string value in a column with a new string value using `withColumn("column_name", regexp_replace("column_name", "string_value", "new_string_value"))`.
6. Changes the schema of a column from JSON format using `withColumn("column_name", from_json(col("column_name"), new_schema))`.
7. Explodes an array column to obtain individual rows using `withColumn("column_name", explode("column_name"))`.
8. Applies additional transformations specific to each table (`{Transformations}`).

Each function is decorated with `@dlt.table` to define them as Delta tables and configure properties such as comments, table properties, Spark configurations, and permanence settings.

In [None]:
data1_schema = {new_schema}

@dlt.table(
    comment="Load data to data1_cleansed table",
    table_properties={"pipelines.reset.allowed": "true"},
    spark_conf={"pipelines.trigger.interval": "60 seconds"},
    temporary=False,
)
def data1_cleansed():
    return (
        dlt.read_stream("data1_raw")
        #         spark.readStream.format("delta").table("data1_raw")
        .select(*)
        .withColumn("datetime_comun", from_unixtime("formatted_datettime_column")) #changes date-time column to unix date-time format
        .withColumnRenamed('OldColumnName', 'New_Column_Name') #changing column name
        .withColumn("column_name", regexp_replace("column_name", "string_value", "new_string_value")) #replace part of a string with another string
        .withColumn("column_name", from_json(col("column_name"), new_schema)) #changing the schema of a column in json # parse JSON string column and extract its fields into separate columns in the DataFrame.
        .withColumn("column_name", explode("column_name")) #exploding the array to get the individual rows
        {Tranformations}
    )


data2_schema = {new_schema}
           
@dlt.table(
    comment="Load data to data2_cleansed table",
    table_properties={"pipelines.reset.allowed": "true"},
    spark_conf={"pipelines.trigger.interval": "60 seconds"},
    temporary=False,
)
def data2_cleansed():
    return (
        dlt.read_stream("data2_raw")
        #         spark.readStream.format("delta").table("data2_raw")
        #          spark.read.format("delta").table("data2_raw")
        .select(*)
        .withColumn("datetime_comun", from_unixtime("formatted_datettime_column")) #changes date-time column to unix date-time format
        .withColumnRenamed('OldColumnName', 'New_Column_Name') #changing column name
        .withColumn("column_name", regexp_replace("column_name", "string_value", "new_string_value")) #replace part of a string with another string
        .withColumn("column_name", from_json(col("column_name"), new_schema)) #changing the schema of a column in json
        .withColumn("column_name", explode("column_name")) #exploding the array to get the individual rows
        {Tranformations}
    )


data3_schema = {new_schema}

@dlt.table(
    comment="Load data to a data3_cleansed table",
    table_properties={"pipelines.reset.allowed": "true"},
    spark_conf={"pipelines.trigger.interval": "60 seconds"},
    temporary=False,
)
def data3_cleansed():
    return (
        dlt.read_stream("data3_raw")
        #         spark.readStream.format("delta").table("data3_raw")
        #         spark.read.format("delta").table("data3_raw")
        .select(*)
        .withColumn("datetime_comun", from_unixtime("formatted_datettime_column")) #changes date-time column to unix date-time format
        .withColumnRenamed('OldColumnName', 'New_Column_Name') #changing column name
        .withColumn("column_name", regexp_replace("column_name", "string_value", "new_string_value")) #replace part of a string with another string
        .withColumn("column_name", from_json(col("column_name"), new_schema)) #changing the schema of a column in json
        .withColumn("column_name", explode("column_name")) #exploding the array to get the individual rows
        {Tranformations}
    )
