In [0]:
dbutils.widgets.text(name="env",defaultValue="",label=" Enter the environment in lower case")
dbutils.widgets.text(name="load_type",defaultValue="",label=" Enter the load_type (price/fundamentals) in lower case")
env = dbutils.widgets.get("env")
load_type = dbutils.widgets.get("load_type")

from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, LongType, TimestampType

path = f"abfss://stock-project-container@bgardzinski1stock.dfs.core.windows.net/{env}/raw-data"

input_path_fund = f"{path}/daily-fundamentals"
schema_loc_fund = f"{path}/_schema_autoloader/fundamentals/"
checkpoint_loc_fund = f"{path}/_checkpoints/fundamentals_stream/"

input_path_pric = f"{path}/hourly_prices"
schema_loc_pric = f"{path}/_schema_autoloader/prices/"
checkpoint_loc_pric = f"{path}/_checkpoints/prices_stream/"



def read_fundamentals_stream():
    schema = StructType([
        StructField("symbol", StringType(), True),
        StructField("shortName", StringType(), True),
        StructField("sector", StringType(), True),
        StructField("industry", StringType(), True),
        StructField("country", StringType(), True),
        StructField("currency", StringType(), True),
        StructField("fullTimeEmployees", StringType(), True),  # was IntegerType
        StructField("marketCap", StringType(), True),          # was LongType
        StructField("enterpriseValue", StringType(), True),    # was LongType
        StructField("totalRevenue", StringType(), True),       # was LongType
        StructField("netIncomeToCommon", StringType(), True),  # was LongType
        StructField("profitMargins", DoubleType(), True),
        StructField("revenueGrowth", DoubleType(), True),
        StructField("ebitda", StringType(), True),              # was LongType
        StructField("enterpriseToRevenue", DoubleType(), True),
        StructField("enterpriseToEbitda", DoubleType(), True),
        StructField("bookValue", DoubleType(), True),
        StructField("priceToBook", DoubleType(), True),
        StructField("trailingPE", DoubleType(), True),
        StructField("forwardPE", DoubleType(), True),
        StructField("trailingEps", DoubleType(), True),
        StructField("forwardEps", DoubleType(), True),
        StructField("returnOnAssets", DoubleType(), True),
        StructField("returnOnEquity", DoubleType(), True),
        StructField("earningsQuarterlyGrowth", DoubleType(), True),
        StructField("ipoExpectedDate", StringType(), True),
        StructField("extract_time", TimestampType(), True)
    ])
    df = (
        spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option("cloudFiles.schemaLocation", schema_loc_fund)
        .option('header','true')
        .schema(schema)
        .load(input_path_fund)
    )
    return df

def write_fundamentals(df):
    query = (
        df.writeStream \
    .format("delta") \
    .option("checkpointLocation", checkpoint_loc_fund) \
    .trigger(once=True) \
    .toTable(f"hive_metastore.{env}_bronze.fundamentals")
    )
    query.awaitTermination()



def read_prices_stream():
    schema = StructType([
    StructField("symbol", StringType(), True),
    StructField("current_price", DoubleType(), True),
    StructField("open", DoubleType(), True),
    StructField("day_high", DoubleType(), True),
    StructField("day_low", DoubleType(), True),
    StructField("previous_close", DoubleType(), True),
    StructField("volume", DoubleType(), True),
    StructField("market_cap", DoubleType(), True),
    StructField("extract_time", TimestampType(), True)
])

    df = (
        spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option("cloudFiles.schemaLocation", schema_loc_pric)
        .option("header", "true")
        .schema(schema)
        .load(input_path_pric)
    )

    return df

def write_prices(df):
    query = (
        df.writeStream \
    .format("delta") \
    .option("checkpointLocation", checkpoint_loc_pric) \
    .trigger(once=True) \
    .toTable(f"hive_metastore.{env}_bronze.prices")
    )
    query.awaitTermination()


if load_type == "pric":
    df = read_prices_stream()
    write_prices(df)
elif load_type == "fund":
    df = read_fundamentals_stream()
    write_fundamentals(df)
else:
    print("Please enter a valid flag")

    

In [0]:
display(spark.sql(f"SELECT * FROM hive_metastore.dev_bronze.prices"))

In [0]:
spark.sql(f"DESCRIBE hive_metastore.{env}_bronze.fundamentals").show(truncate=False)