In [0]:
# First, test if secrets are accessible
try:
    client_id = dbutils.secrets.get(scope="finlake_scope", key="sp-client-id")
    client_secret = dbutils.secrets.get(scope="finlake_scope", key="sp-client-secret")
    tenant_id = dbutils.secrets.get(scope="finlake_scope", key="tenant-id")
    
    print("Secrets retrieved successfully:")
    print(f"   Client ID: {client_id}")
    print(f"   Client Secret: {'*' * len(client_secret)}")
    
    print(f"   Tenant ID: {tenant_id}")
    
except Exception as e:
    print(f"Error retrieving secrets: {e}")

In [0]:
# --- 01_ingest_to_delta ---
# Databricks notebook (Python)

# =============================
# 1 WIDGETS / PARAMETERS
# =============================
dbutils.widgets.text("input_path", "")     
dbutils.widgets.text("raw_delta_path", "abfss://raw@finlakeadlsa3b3.dfs.core.windows.net/delta/raw_transactions")
dbutils.widgets.text("ingest_date", "")    
dbutils.widgets.text("file_pattern", "*.csv") 

input_path = dbutils.widgets.get("input_path").strip()
raw_delta_path = dbutils.widgets.get("raw_delta_path").strip()
ingest_date = dbutils.widgets.get("ingest_date").strip()
file_pattern = dbutils.widgets.get("file_pattern").strip()

print("=== PARAMETERS ===")
print(f"input_path: {input_path}")
print(f"raw_delta_path: {raw_delta_path}")
print(f"ingest_date: {ingest_date}")
print(f"file_pattern: {file_pattern}")
print("===================")

# =============================
# 2 IMPORTS & CONFIG
# =============================
from pyspark.sql import functions as F
from pyspark.sql.utils import AnalysisException

# Storage account details
storage_account = "finlakeadlsa3b3"
container = "raw"

# This securely fetches the key from your Key Vault at runtime.
spark.conf.set(
  "fs.azure.account.key.finlakeadlsa3b3.dfs.core.windows.net",
  dbutils.secrets.get(scope="finlake_scope", key="adls-key")
)

# =============================
# 3 BUILD INPUT PATH (Auto-detect)
# =============================

if input_path.startswith("abfss://"):
    csv_path = input_path if input_path.endswith("/") else input_path + "/"

elif len(input_path) == 10 and input_path.count("-") == 2:
    # user entered only a date, e.g. "2025-10-10"
    csv_path = f"abfss://{container}@{storage_account}.dfs.core.windows.net/{input_path}/"

elif input_path.lower() in ["", "delta", "raw", "input"]:
    # fallback default location for safety
    csv_path = f"abfss://{container}@{storage_account}.dfs.core.windows.net/{ingest_date}/"

else:
    # assume it's a subfolder path
    csv_path = f"abfss://{container}@{storage_account}.dfs.core.windows.net/{input_path}/"

csv_path = csv_path + file_pattern
print(f"Resolved CSV path: {csv_path}")

# =============================
# 4 READ CSV INTO DATAFRAME
# =============================
try:
    df = (spark.read
          .option("header", "true")
          .option("inferSchema", "true")
          .csv(csv_path))
    print("Successfully read input files")
except AnalysisException as e:
    print(f"ERROR: Could not read input files. Check if folder exists: {csv_path}")
    print(str(e))
    raise e

print(f"Row count: {df.count()}")
display(df.limit(5))

# =============================
# 5 ADD METADATA COLUMNS
# =============================
if "ingest_date" not in df.columns:
    df = df.withColumn("ingest_date", F.lit(ingest_date if ingest_date else F.current_date()))

df = df.withColumn("load_ts", F.current_timestamp())

# =============================
# 6 WRITE TO RAW DELTA TABLE
# =============================
(
    df.write
      .format("delta")
      .mode("append")
      .partitionBy("ingest_date")
      .save(raw_delta_path)
)

print(f"Wrote raw Delta table to: {raw_delta_path}")
print("Ingestion completed successfully.")
display(df.limit(5))