In [0]:
from pyspark.sql.types import StructType, StructField, IntegerType, StringType
from pyspark.sql.functions import current_timestamp, lit

In [0]:
schema = StructType([
    StructField("customer_id", IntegerType(), True),
    StructField("first_name", StringType(), True),
    StructField("last_name", StringType(), True),
    StructField("email", StringType(), True),
    StructField("country", StringType(), True),
    StructField("created_date", StringType(), True)
])

In [0]:
customer_path = "/FileStore/tables/customers"

df_customers = (
    spark.readStream
    .format("CloudFiles")
    .option("cloudFiles.format", "csv")
    .option("header", "true")
    .option("cloudFiles.schemaLocation", "/tmp/customer_schema")
    .schema(schema)
    .load(customer_path)
)

In [0]:
df_customers_bronze = (
    df_customers
    .withColumn("ingestion_timestamp", current_timestamp())
    .withColumn("source_system", lit("crm"))
)

In [0]:
query = (
    df_customers_bronze.writeStream
    .format("delta")
    .option("checkpointLocation", "/tmp/customer_checkpoint")
    .outputMode("append")
    .toTable("migration_project_db_ws.bronze.customers")
)

In [0]:
%sql
SELECT * FROM migration_project_db_ws.bronze.customers;

customer_id,first_name,last_name,email,country,created_date,ingestion_timestamp,source_system
101,Chirag,Venkateshaiah,chiragvenkateshaiah@gmail.com,India,2024-01-01,2026-01-15T03:50:49.544Z,crm
102,Joel,Marsh,joelmarsh@outlook.com,USA,2024-01-02,2026-01-15T03:52:54.897Z,crm
