In [0]:
# Retrieve the values from the Bronze output

bronze_output= dbutils.jobs.taskValues.get(taskKey ="Bronze", key ="bronze_output")

start_date= bronze_output.get("start_date","")
end_date= bronze_output.get("end_date","")
bronze_dbfs = bronze_output.get("bronze_dbfs","")
silver_dbfs = bronze_output.get("silver_dbfs","")
print(f" start date: {start_date} , bronze_dbfs : { bronze_dbfs}")

In [0]:
from pyspark.sql.functions import col, when, isnull
from pyspark.sql.types import TimestampType

In [0]:
#load the json data into spark dataframe
df= spark.read.option("multiline","true").json(f"{bronze_dbfs}/{start_date}_earthquake_data.json")
df.show(10)

In [0]:
df= df.select("id",
              col("geometry.coordinates").getItem(0).alias("longitude"),
              col("geometry.coordinates").getItem(1).alias("latitude"),
              col("geometry.coordinates").getItem(2).alias("elevation"),
              col("properties.mag").alias("magnitude"),
              col("properties.magType").alias("magnitude_type"),
              col("properties.place").alias("location"),
              col("properties.time").alias("time"),
              col("properties.updated").alias("updated"),
              col("properties.title").alias("title"),
              col("properties.status").alias("status"),
              col("properties.sig").alias("sig")
              )



In [0]:
# check for missing null values
df= df\
    .withColumn('longitude', when(isnull(col('longitude')),0).otherwise(col('longitude')))\
    .withColumn('latitude', when(isnull(col('latitude')),0).otherwise(col('latitude')))\
    .withColumn('time', when(isnull(col('time')),0).otherwise(col('time'))) 

In [0]:
#convert unix time to normal time
df= df.withColumn("time",(col("time")/1000).cast(TimestampType()))\
    .withColumn("updated",(col("updated")/1000).cast(TimestampType()))

In [0]:
# save the transformed data frame to silver contianer
silver_output_path = f"{silver_dbfs}/earthquake_events_silver/"

In [0]:
#append data to silver df into parquet file
df.write.mode("append").parquet(silver_output_path)

In [0]:
dbutils.jobs.taskValues.set(key="silver_output",value=silver_output_path)