In [0]:
%run "/Workspace/Project/04.Common Notebook"

In [0]:
dbutils.widgets.text("Enter the environment","")
env = dbutils.widgets.get("Enter the environment")

In [0]:
from pyspark.sql.functions import *

In [0]:
def read_bronze_table(environment):
    # Reading a DataFrame From Bronze Table
    df_bronze = spark.readStream.table(f"{environment}_catalog.bronze.raw_traffic")
    return df_bronze

In [0]:
def renaming_columns(df, columns):
    print('Renaming columns (converting to UPPERCASE only): ', end=' ')
    for col_name in columns:
        new_col_name = col_name.upper()
        df = df.withColumnRenamed(col_name, new_col_name)
    print('Success !!')
    print("****************************************************************************************")
    return df



In [0]:
def adding_columns(df):
    print("Adding columns:", end=' ')

    #  Calculate the sum of EV Car and EV Bike to get total electric vehicles
    col_sum = col('EV_CAR') + col('EV_BIKE')
    add_col = df.withColumn('ELECTRIC_VEHICLAS_COUNT', col_sum)

    #  Calculate the sum of all relevant vehicle types to get total motor vehicles
    moter_col_sum = (
    col('TWO_WHEELED_MOTOR_VEHICLES') +
    col('CARS_AND_TAXIS') +
    col('BUSES_AND_COACHES') +
    col('LGV_TYPE') +
    col('HGV_TYPE') +
    col('ELECTRIC_VEHICLAS_COUNT'))

    #  Add the 'Moter Vehiclas Count' column to the DataFrame
    add_col = add_col.withColumn('MOTER_VEHICLAS_COUNT', moter_col_sum)

    # ➤ Add the 'Transformed time' column to the DataFrame for Incremental loading
    add_col = add_col.withColumn('TRANSFORMED_TIME', current_timestamp())


    # ➤ Print status
    print("success!!")
    print("****************************************************************************************")
    return add_col


In [0]:
def write_Traffic_SilverTable(df, environment):
    print('Starting to write the Silver Traffic Table:', end=' ')

    #  Write the streaming DataFrame to the silver Delta table
    df.writeStream \
        .format("delta") \
        .option("checkpointLocation", checkpoint_path + "/SilverTrafficLoad/Checkpt") \
        .outputMode("append") \
        .queryName("SilverTrafficWriteStream") \
        .trigger(availableNow=True) \
        .toTable(f"{environment}_catalog.silver.silver_traffic")

    print('Success !!')
    print("****************************************************************************************")

In [0]:
#  Step 0: Read the bronze table based on the environment
df_traffic = read_bronze_table(env)

#  Step 1: Remove duplicate records from the original DataFrame
remove_df = remove_duplicates(df_traffic)
all_columns = remove_df.columns

#  Step 3: Handle NULL values for both string and numeric columns
null_df = handling_nulls(remove_df, all_columns)

#  Step 4: Rename columns by replacing underscores "_" with spaces " "
renamed_df = renaming_columns(null_df, all_columns)

#  Step 5: Add new calculated columns (e.g., Electric and Motor Vehicle counts)
add_df = adding_columns(renamed_df)

#  Step 6: Write the transformed DataFrame to the Silver table
result_df = write_Traffic_SilverTable(add_df, env)

#  Step 7: Display the final DataFrame
display(result_df)


In [0]:
display(spark.sql("SELECT MOTER_VEHICLAS_COUNT,LINK_LENGTH_KM FROM dev_catalog.silver.silver_traffic"))