In [0]:
from pyspark.sql.functions import current_timestamp, input_file_name, lit
from pyspark.sql.functions import col
from pyspark.sql.types import *
import json, uuid
# Databricks Notebook: 00_Config_and_01_Bronze_Raw
# Purpose: Read Cosmos validated data + ADLS KYC → Bronze Raw Layer (NO DELTA)


In [0]:
# env = dbutils.widgets.get("env")

storage_account = "banksourcedata"
storage_key = "vY+3KlJ8sQw7O4XiXhY2Oc7iDqqxJmyPOyzIUehKQCV7FMbOlHJMpnRE8goMIeLrh7EdcPlXAvMo+AStXel84A=="
adls_container = "raw"

root_path = "abfss://etl@banksourcedata.dfs.core.windows.net"
bronze_root = f"{root_path}/bronze"

cosmos_endpoint = "https://bankoperatondb.documents.azure.com:443/"
cosmos_key = "aBCBTTRjxXr6QOQuQ0JpJEtnBPBmJLSgXQGL2Ag2viG2mAfwOnILEPw4zfjzcL1le0nyfZ6LYw86ACDbseFxuA=="
cosmos_db = "OPERATION_DB"

cosmos_atm = "ATMTransactions"
cosmos_upi = "UPIEvents"
cosmos_profile = "AccountProfile"
cosmos_fraud = "FraudAlerts"


In [0]:
spark.conf.set(
    f"fs.azure.account.key.{storage_account}.dfs.core.windows.net",
    storage_key
)

raw_root = f"abfss://{adls_container}@{storage_account}.dfs.core.windows.net"
print("ADLS configured successfully via Account Key.")
print("RAW root =", raw_root)


ADLS configured successfully via Account Key.
RAW root = abfss://raw@banksourcedata.dfs.core.windows.net


In [0]:
spark.conf.set("spark.cosmos.accountEndpoint", cosmos_endpoint)
spark.conf.set("spark.cosmos.accountKey", cosmos_key)
spark.conf.set("spark.cosmos.database", cosmos_db)

print("Cosmos DB configured using Account Key.")


Cosmos DB configured using Account Key.


In [0]:
def read_cosmos(container_name):
    cfg = {
        "spark.cosmos.accountEndpoint": cosmos_endpoint,
        "spark.cosmos.accountKey": cosmos_key,
        "spark.cosmos.database": cosmos_db,
        "spark.cosmos.container": container_name,
    }
    print(f"Reading Cosmos container: {container_name}")
    df = spark.read.format("cosmos.oltp").options(**cfg).load()
    return df.withColumn("ingest_time", current_timestamp()) \
             .withColumn("ingest_source", lit("cosmos"))


In [0]:
bronze_paths = {
    "atm": f"{bronze_root}/atm",
    "upi": f"{bronze_root}/upi",
    "profile": f"{bronze_root}/accountprofile",
    "fraud": f"{bronze_root}/fraudalerts",
    "kyc": f"{bronze_root}/kyc"
}

# Create folders
for p in bronze_paths.values():
    dbutils.fs.mkdirs(p)


In [0]:
df_atm = read_cosmos(cosmos_atm)

output_path = f"{bronze_paths['atm']}"
(
    df_atm.write
    .mode("overwrite")
    .json(output_path)
)

print("✔ ATM Bronze RAW saved at:", output_path)
# display(df_atm.limit(5))


Reading Cosmos container: ATMTransactions
✔ ATM Bronze RAW saved at: abfss://etl@banksourcedata.dfs.core.windows.net/bronze/atm


In [0]:
df_upi = read_cosmos(cosmos_upi)

output_path = f"{bronze_paths['upi']}"
(
    df_upi.write
    .mode("overwrite")
    .json(output_path)
)

print("✔ UPI Bronze RAW saved at:", output_path)
# display(df_upi.limit(5))


Reading Cosmos container: UPIEvents
✔ UPI Bronze RAW saved at: abfss://etl@banksourcedata.dfs.core.windows.net/bronze/upi


In [0]:
df_prof = read_cosmos(cosmos_profile)

output_path = f"{bronze_paths['profile']}"
(
    df_prof.write
    .mode("overwrite")
    .json(output_path)
)

print("✔ AccountProfile Bronze RAW saved at:", output_path)
# display(df_prof.limit(5))


Reading Cosmos container: AccountProfile
✔ AccountProfile Bronze RAW saved at: abfss://etl@banksourcedata.dfs.core.windows.net/bronze/accountprofile


In [0]:
df_fraud = read_cosmos(cosmos_fraud)

output_path = f"{bronze_paths['fraud']}"
(
    df_fraud.write
    .mode("overwrite")
    .json(output_path)
)

print("✔ FraudAlerts Bronze RAW saved at:", output_path)
# display(df_fraud.limit(5))


Reading Cosmos container: FraudAlerts
✔ FraudAlerts Bronze RAW saved at: abfss://etl@banksourcedata.dfs.core.windows.net/bronze/fraudalerts
