In [0]:
container_name = dbutils.widgets.get("container_name")
storage_account_name = dbutils.widgets.get("storage_account_name")
storage_account_key = dbutils.secrets.get(scope = "zillowsecrets",key="storage_account_key")
base_folder_name = dbutils.widgets.get("base_folder_name")
main_folder_price = dbutils.widgets.get("main_folder_price")
catalog = dbutils.widgets.get("catalog")
schema = dbutils.widgets.get("schema")

# Configure access
spark.conf.set(
    f"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net",
    storage_account_key
)

# Construct base path
folder_path = f"abfss://{container_name}@{storage_account_name}.dfs.core.windows.net/{base_folder_name}/{main_folder_price}"

In [0]:
items = dbutils.fs.ls(f'{folder_path}')
# loaded_files = [item.name for item in items]
if len(dbutils.fs.ls(folder_path)) == 0:
    dbutils.jobs.taskValues.set("run_now","false")
    dbutils.notebook.exit("No new files to process")
else:
    print("files are available")
    dbutils.jobs.taskValues.set("run_now","true")

In [0]:
from pyspark.sql.functions import *
from pyspark.sql.types import *

spark.conf.set(
    f"fs.azure.account.key.{storage_account_name}.dfs.core.windows.net",
    storage_account_key
)
StructSchema_price = StructType([
    StructField('zpid',StringType(),True),
    StructField('date',StringType(),True),           
    StructField('event',StringType(),True),
    StructField('price',StringType(),True),
    StructField('priceChangeRate',StringType(),True),
    StructField('pricePerSquareFoot',StringType(),True) 
    ])

df = spark.read.schema(StructSchema_price) \
    .option("multiline","true") \
        .option("header", "true") \
        .json(folder_path)

display(df)

In [0]:
import urllib.parse
decode = udf(lambda x: urllib.parse.unquote(x), StringType())
df = df.withColumn("file_name", decode(regexp_extract(input_file_name(), r"([^/]+$)", 1)))
display(df)

In [0]:
df.write.format('delta').option("mergeSchema", "true").mode("append").saveAsTable(f'{catalog}.{schema}.price_bronze')

In [0]:
%sql
select * from ${catalog}.${schema}.price_bronze;