In [0]:
from pyspark.sql.functions import *;
from pyspark.sql.types import *

In [0]:
# Get current date using current_timestamp

ingest_date = spark.sql("SELECT current_date()").collect()[0][0]

print(ingest_date)

2026-01-05


### Load Companies data

In [0]:
companies_inc_path=f"/Volumes/capstone/default/raw_data/companies/incremental/{ingest_date}/"

print(companies_inc_path)

/Volumes/capstone/default/raw_data/companies/incremental/2026-01-05/


In [0]:
from pyspark.sql.functions import *
from pyspark.sql.utils import AnalysisException

In [0]:
from pyspark.sql.functions import lit, current_timestamp
import uuid

def write_log(table_name, source_system, log_table):
    spark.createDataFrame(
        [(str(uuid.uuid4()), table_name, source_system)],
        ["log_id", "table_name", "source_system"]
    ) \
    .withColumn("ingestion_timestamp", current_timestamp()) \
    .withColumn("created_at", current_timestamp()) \
    .write \
    .format("delta") \
    .mode("append") \
    .saveAsTable(log_table)

In [0]:
try:
    companies_inc = (
        spark.read
        .option("header", True)
        .option("inferSchema", True)
        .csv(companies_inc_path)
    )

    companies_bronze = (
        companies_inc
        .withColumn("ingestion_timestamp", current_timestamp())
        .withColumn("source_file", lit(f"{companies_inc_path}/companies.csv"))
        .withColumn("load_type", lit("incremental"))
    )

    companies_bronze.write \
        .format("delta") \
        .mode("append") \
        .option("mergeSchema", "true") \
        .saveAsTable("capstone.default.bronze_companies")

    write_log("capstone.default.bronze_companies", "default", "capstone.default.bronze_log")
except AnalysisException as e:
    print(f"No incremental data found at {companies_inc_path}. Skipping ingestion.")

No incremental data found at /Volumes/capstone/default/raw_data/companies/incremental/2026-01-05/. Skipping ingestion.


### Load Daily prices data

In [0]:
prices_inc_path=f"/Volumes/capstone/default/raw_data/daily_prices/incremental/{ingest_date}/"

In [0]:
try:
    daily_prices_inc = (
        spark.read
        .option("header", True)
        .option("inferSchema", True)
        .csv(prices_inc_path)
    )

    daily_prices_bronze = (
        daily_prices_inc
        .withColumn("ingestion_timestamp", current_timestamp())
        .withColumn("source_file", lit(f"{prices_inc_path}/daily_prices.csv"))
        .withColumn("load_type", lit("incremental"))
    )

    daily_prices_bronze.write \
        .format("delta") \
        .mode("append") \
        .option("mergeSchema", "true") \
        .saveAsTable("capstone.default.bronze_daily_prices")

    write_log("capstone.default.bronze_daily_prices", "default", "capstone.default.bronze_log")
except AnalysisException as e:
    print(f"No incremental data found at {prices_inc_path}. Skipping ingestion.")

### Load Traders data

In [0]:
traders_inc_path=f"/Volumes/capstone/default/raw_data/traders/incremental/{ingest_date}/"

In [0]:
try:
    traders_inc=(
        spark.read
        .option("header", True)
        .option("inferSchema", True)
        .csv(traders_inc_path)
    )

    traders_bronze=(
        traders_inc
        .withColumn("ingestion_timestamp", current_timestamp())
        .withColumn("source_file", lit(f"{traders_inc_path}/traders.csv"))
        .withColumn("load_type", lit("incremental"))
    )

    traders_bronze.write \
        .format("delta") \
        .mode("append") \
        .option("mergeSchema", "true") \
        .saveAsTable("capstone.default.bronze_traders")
    
    write_log("capstone.default.bronze_traders", "default", "capstone.default.bronze_log")
except AnalysisException as e:
    print(f"No incremental data found at {traders_inc_path}. Skipping ingestion.")

No incremental data found at /Volumes/capstone/default/raw_data/traders/incremental/2026-01-05/. Skipping ingestion.


### Load Trades data

In [0]:
trades_inc_path=f"/Volumes/capstone/default/raw_data/trades/incremental/{ingest_date}/"

In [0]:
try:
    trades_inc=(
        spark.read
        .option("header", True)
        .option("inferSchema", True)
        .csv(trades_inc_path)
    )

    trades_bronze=(
        trades_inc
        .withColumn("ingestion_timestamp", current_timestamp())
        .withColumn("source_file", lit(f"{trades_inc_path}/trades.csv"))
        .withColumn("load_type", lit("incremental"))
    )

    trades_bronze.write \
        .format("delta") \
        .mode("append") \
        .option("mergeSchema", "true") \
        .saveAsTable("capstone.default.bronze_trades")

    write_log("capstone.default.bronze_trades", "default", "capstone.default.bronze_log")
except AnalysisException as e:
    print(f"No incremental data found at {trades_inc_path}. Skipping ingestion.")

In [0]:
fx_rates_inc_path=f"/Volumes/capstone/default/raw_data/fx_rates/incremental/{ingest_date}/"

In [0]:
try:
    fx_rates_inc=(
        spark.read
        .option("header", True)
        .option("inferSchema", True)
        .csv(fx_rates_inc_path)
    )

    fx_rates_bronze=(
        fx_rates_inc
        .withColumn("ingestion_timestamp", current_timestamp())
        .withColumn("source_file", lit(f"{fx_rates_inc_path}/fx_rates.csv"))
        .withColumn("load_type", lit("incremental"))
    )

    fx_rates_bronze.write \
        .format("delta") \
        .mode("append") \
        .option("mergeSchema", "true") \
        .saveAsTable("capstone.default.bronze_fx_rates")
    
    write_log("capstone.default.bronze_fx_rates", "default", "capstone.default.bronze_log")
except AnalysisException as e:
    print(f"No incremental data found at {fx_rates_inc_path}. Skipping ingestion.")

In [0]:
%sql
SELECT * FROM capstone.default.bronze_log;

log_id,table_name,source_system,ingestion_timestamp,created_at
756f1c5a-c9a7-482f-93d1-bb9dee6ad1b8,capstone.default.bronze_daily_prices,default,2026-01-05T09:36:24.392Z,2026-01-05T09:36:24.392Z
0f1748f8-225b-48c3-919b-a434a8880b1b,capstone.default.bronze_fx_rates,default,2026-01-05T09:36:35.544Z,2026-01-05T09:36:35.544Z
16eb7e48-bc38-4bd5-8a1a-1224d961c756,capstone.default.bronze_trades,default,2026-01-05T09:36:30.721Z,2026-01-05T09:36:30.721Z
