In [0]:
from pyspark.sql.functions import (
    current_timestamp,
    input_file_name,
    lit,
    col,
    to_timestamp,
    concat,
    split
)

#### Companies ingestion

In [0]:
companies_historical_src="/Volumes/capstone/default/raw_data/companies/historical/companies.csv"

In [0]:
companies_raw=spark.read.format("csv")\
    .option("header",True)\
    .option("inferSchema",True)\
    .load(companies_historical_src)

In [0]:
companies_bronze=(
    companies_raw
    .withColumn("ingestion_timestamp",lit(current_timestamp()))
    .withColumn("source_file",lit(companies_historical_src))
    .withColumn("load_type",lit("historical"))
)



In [0]:
companies_bronze.display()

ticker,company_name,sector,industry,country,exchange,currency,is_active,ingestion_timestamp,source_file,load_type
AAPL,Apple Inc,Technology,Consumer Electronics,US,NASDAQ,USD,True,2026-01-05T09:00:33.831Z,/Volumes/capstone/default/raw_data/companies/historical/companies.csv,historical
MSFT,Microsoft Corp,Technology,Software,US,NASDAQ,USD,True,2026-01-05T09:00:33.831Z,/Volumes/capstone/default/raw_data/companies/historical/companies.csv,historical
GOOGL,Alphabet Inc,Technology,Internet,US,NASDAQ,USD,True,2026-01-05T09:00:33.831Z,/Volumes/capstone/default/raw_data/companies/historical/companies.csv,historical
AMZN,Amazon.com Inc,Consumer,E-Commerce,US,NASDAQ,USD,True,2026-01-05T09:00:33.831Z,/Volumes/capstone/default/raw_data/companies/historical/companies.csv,historical
TSLA,Tesla Inc,Automobile,EV,US,NASDAQ,USD,True,2026-01-05T09:00:33.831Z,/Volumes/capstone/default/raw_data/companies/historical/companies.csv,historical
RELIANCE,Reliance Industries,Energy,Oil & Gas,IN,NSE,INR,True,2026-01-05T09:00:33.831Z,/Volumes/capstone/default/raw_data/companies/historical/companies.csv,historical
HDFCBANK,HDFC Bank,Banking,Financial Services,IN,NSE,INR,True,2026-01-05T09:00:33.831Z,/Volumes/capstone/default/raw_data/companies/historical/companies.csv,historical


In [0]:
companies_bronze.write\
        .format("delta")\
        .mode("append")\
        .partitionBy("ingestion_timestamp")\
        .saveAsTable("capstone.default.bronze_companies")
                


### Daily Prices Ingestion

In [0]:
daily_prices_src="/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv"

In [0]:
daily_prices_raw=(
    spark.read.format("csv")\
        .option("header",True)
        .option("inferSchema",True)
        .load(daily_prices_src)
)

In [0]:
daily_prices_bronze=(
    daily_prices_raw
    .withColumn("ingestion_timestamp",lit(current_timestamp()))
    .withColumn("source_file",lit(daily_prices_src))
    .withColumn("load_type",lit("historical"))
)

In [0]:
daily_prices_bronze.display()

ticker,trade_date,open_price,high_price,low_price,close_price,adjusted_close,volume,ingestion_timestamp,source_file,load_type
AAPL,2020-12-25,596.84,599.59,568.55,574.41,574.41,728951,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2020-12-28,574.17,581.44,573.26,574.42,574.42,1163240,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2020-12-29,572.66,589.48,569.68,581.46,581.46,832196,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2020-12-30,581.44,583.02,561.5,569.37,569.37,638556,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2020-12-31,569.49,574.2,567.52,569.32,569.32,542503,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2021-01-01,569.02,578.79,561.1,572.11,572.11,200432,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2021-01-04,571.92,583.89,564.93,579.26,579.26,527663,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2021-01-05,578.72,592.46,577.29,589.06,589.06,809060,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2021-01-06,589.46,621.13,588.01,612.8,612.8,407399,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical
AAPL,2021-01-07,610.77,617.11,604.81,608.62,608.62,435829,2026-01-05T09:00:53.083Z,/Volumes/capstone/default/raw_data/daily_prices/historical/daily_prices.csv,historical


In [0]:
daily_prices_bronze.write\
    .format("delta")\
    .mode("append")\
    .partitionBy("ingestion_timestamp")\
    .saveAsTable("capstone.default.bronze_daily_prices")

### Traders Ingestion

In [0]:
traders_src="/Volumes/capstone/default/raw_data/traders/historical/traders.csv"

In [0]:
traders_raw=(
    spark.read.format("csv")\
        .option("header",True)
        .option("inferSchema",True)
        .load(traders_src)
)

In [0]:
traders_bronze=(
    traders_raw
    .withColumn("ingestion_timestamp",lit(current_timestamp()))
    .withColumn("source_file",lit(traders_src))
    .withColumn("load_type", lit("historical"))
)

In [0]:
traders_bronze.write\
    .format("delta")\
    .mode("append")\
    .partitionBy("ingestion_timestamp")\
    .saveAsTable("capstone.default.bronze_traders")

### Trades Ingestion

In [0]:
trades_src="/Volumes/capstone/default/raw_data/trades/historical/trades.csv"

In [0]:
trades_raw=(
    spark.read.format("csv")\
        .option("header",True)
        .option("inferSchema",True)
        .load(trades_src)
)

In [0]:
trades_bronze=(
    trades_raw
    .withColumn("ingestion_timestamp",lit(current_timestamp()))
    .withColumn("source_file",lit(trades_src))
    .withColumn("load_type",lit("historical"))
)

In [0]:
trades_bronze.write\
    .format("delta")\
    .mode("append")\
    .partitionBy("ingestion_timestamp")\
    .saveAsTable("capstone.default.bronze_trades")

In [0]:
fx_rates_src="/Volumes/capstone/default/raw_data/fx_rates/historical/fx_rates_usd_inr_5y.csv"
fx_rates_raw=(
    spark.read.format("csv")\
        .option("header",True)
        .option("inferSchema",True)
        .load(fx_rates_src)
)

### fx rates ingestion

In [0]:
fx_rates_bronze=(
    fx_rates_raw
    .withColumn("ingestion_timestamp",lit(current_timestamp()))
    .withColumn("source_file",lit(fx_rates_src))
    .withColumn("load_type",lit("historical"))
)

In [0]:
fx_rates_bronze.write\
    .format("delta")\
    .mode("append")\
    .partitionBy("ingestion_timestamp")\
    .saveAsTable("capstone.default.bronze_fx_rates")