In [0]:
%skip
# Clear Spark cache
spark.catalog.clearCache()

# Optional: remove temp views
for v in spark.catalog.listTables():
    if v.isTemporary:
        spark.catalog.dropTempView(v.name)


In [0]:
%skip
%sql

USE CATALOG `dev_catalog`;

In [0]:
%skip
checkpoint = spark.sql("""DESCRIBE EXTERNAL LOCATION checkpoints""").select("url").collect()[0].url
landing = spark.sql("""DESCRIBE EXTERNAL LOCATION landing""").select("url").collect()[0].url
silver = spark.sql("""DESCRIBE EXTERNAL LOCATION silver""").select("url").collect()[0].url

In [0]:
%run "/Workspace/Dev/04. Common"


In [0]:
dbutils.widgets.text(name="env",defaultValue='',label='Enter the environment in lower case')
env = dbutils.widgets.get("env")

environment = env

In [0]:
def read_BronzeTrafficTable(environment):
    print("Reading the Bronze Table Data : ",end='')
    df_bronzeTraffic = (spark.readStream
                        .table(f"`{environment}_catalog`.`bronze`.`raw_traffic`")
                        )
    print(f'Reading {environment}_catalog.bronze.raw_traffic Success!')
    return df_bronzeTraffic

In [0]:
def remove_duplicates(df):
    print('Removing Duplicate values: ', end='')
    df_dup = df.dropDuplicates()
    print('Success!! ')
    return df_dup

In [0]:
def handle_nulls(df,columns):
    print('Replacing NULL values on String Columns with "Unknown" ' , end='')
    df_string = df.fillna('Unknown', subset=columns)
    print('Successs!! ')

    print('Replacing NULL values on Numeric Columns with "0" ' , end='')
    df_clean = df_string.fillna(0, subset=columns)
    print('Successs!! ')

    return df_clean

In [0]:
def EV_Count(df):
    print('Creating Electric Vehicles Count Column : ', end='')
    from pyspark.sql.functions import col
    df_ev = df.withColumn("Electric_Vehicles_Count",
                          col("EV_Car") + col("EV_Bike")
                          )
    
    print('Success!! ')
    return df_ev

In [0]:
def Motor_Count(df):
    print('Creating Motor Vehicles Count Column : ', end='')
    from pyspark.sql.functions import col
    df_motor = df.withColumn("Motor_Vehicle_Count",
                             col("Two_wheeled_motor_vehicles") + col("Cars_and_taxis") + 
                             col("Buses_and_coaches") + col("LGV_Type") + 
                             col("HGV_Type")+col("EV_Car") + col("Electric_Vehicles_Count")
                             )
    print('Success!! ')
    return df_motor

In [0]:
def create_transformed_time(df):
    print('Creating Transformed_Time column : ', end='')
    from pyspark.sql.functions import current_timestamp
    df_transformed = df.withColumn("Transformed_Time",current_timestamp())
    print('Success!! ')
    return df_transformed

In [0]:
def write_SilverTrafficTable(streamingdf,environment):
    print('Writing the Silver_Traffic Table : ', end='')

    writeStreamSilver= (streamingdf.writeStream
                        .format("delta")
                        .option("checkpointLocation", checkpoint+"SilverTrafficLoad/Checkpt")
                        .outputMode("append")                        
                        .queryName("SilverTrafficWriteStream")
                        .trigger(availableNow=True)
                        .toTable(f"`{environment}_catalog`.`silver`.`silver_traffic`")
                        )
    
    print(f'`{environment}_catalog`.`silver`.`silver_traffic` Success!!')

In [0]:

## Reading the bronze traffic data
df_trafficdata = read_BronzeTrafficTable(env)

# To remove duplicate rows
df_dups = remove_duplicates(df_trafficdata)

# To raplce any NULL values
Allcolumns =df_dups.schema.names
df_nulls = handle_nulls(df_dups,Allcolumns)

## To get the total EV_Count
df_ev = EV_Count(df_nulls)


## To get the Total Motor vehicle count
df_motor = Motor_Count(df_ev)

## Calling Transformed time function
df_final = create_transformed_time(df_motor)

## Writing to silver_traffic
write_SilverTrafficTable(df_final, env)

In [0]:
# spark.sql("""Select * from dev_catalog.silver.Silver_Traffic""").display()
# spark.sql("""Select count(1) from dev_catalog.silver.Silver_Traffic""").display()

In [0]:
# spark.sql("""Select * from dev_catalog.silver.Silver_Traffic where Record_ID between 74174 and 74190 """).display()