#Reading from source CSV File.

# Define Ingestion Configuration

# Ingest Files into Bronze Tables
## path: where csv file is
## source: label like: crm/erp
## table: target Bronze Table 

# Import config and functions

In [0]:
%run ./ingestion_config
from pyspark.sql import functions as F
#from bronze.ingestion_config import INGESTION_CONFIG

In [0]:
# path: where csv file is
# source: label like: crm/erp
# table: target Bronze Table

def ingest_csv_to_bronze(path: str, source: str, table: str):
    df = (
        spark.read
             .option("header", "true")      # contains column names.
             .option("mode", "PERMISSIVE")  # try to read everything and don’t fail the full load because of a bad row.
             .option("inferSchema", "false")   # keep raw strings in Bronze
             .csv(path)  # reads from path into a DataFrame.

             # BRONZE METADATA/ READ README.md
             .withColumn("_source", F.lit(source))
             .withColumn("_source_file",F.col("_metadata.file_path"))
             .withColumn("_ingest_timestamp", F.current_timestamp())
    )

    
    (df.write
          .mode("overwrite")   # production usually uses append/merge
          .option("overwriteSchema", "true")   # Forces Delta to replace the table schema with the DataFrame schema.
          .format("delta")   # write data as Delta table
          .saveAsTable(f"workspace.bronze.{table}")  # Saves as a managed table in the metastore:
    )

# Loops through each config entry (each file you want to ingest).
for item in INGESTION_CONFIG:  
    print(f"Ingesting {item['source']} → workspace.bronze.{item['table']}")
    ingest_csv_to_bronze(item["path"], item["source"], item["table"])


#Read Delta table to ensure that data exist

In [0]:
%sql
DROP TABLE workspace.bronze.crm_cust_info;

# Check if data is STRING for Bronze layer

In [0]:
%sql
DESCRIBE TABLE workspace.bronze.crm_cust_info;