In [0]:
dbutils.widgets.text(name="env",defaultValue="",label=" Enter the environment in lower case")
env = dbutils.widgets.get("env")

from pyspark.sql.types import StructType, StructField, StringType, IntegerType, DoubleType, LongType, TimestampType, DateType

path = f"abfss://stock-project-container@bgardzinski1stock.dfs.core.windows.net/{env}"

input_path_fund = f"{path}/raw-data/daily-fundamentals"
schema_loc_fund = f"{path}/raw-data/_schema_autoloader/fundamentals/"
checkpoint_loc_fund = f"{path}/bronze/_checkpoints/fundamentals_stream/"



def read_fundamentals_stream():
    schema = StructType([
    StructField("symbol", StringType(), True),
    StructField("shortName", StringType(), True),
    StructField("sector", StringType(), True),
    StructField("industry", StringType(), True),
    StructField("country", StringType(), True),
    StructField("currency", StringType(), True),
    StructField("fullTimeEmployees", StringType(), True),
    StructField("marketCap", StringType(), True),
    StructField("enterpriseValue", StringType(), True),
    StructField("totalRevenue", StringType(), True),
    StructField("netIncomeToCommon", StringType(), True),
    StructField("profitMargins", DoubleType(), True),
    StructField("revenueGrowth", DoubleType(), True),
    StructField("ebitda", StringType(), True),
    StructField("enterpriseToRevenue", DoubleType(), True),
    StructField("enterpriseToEbitda", DoubleType(), True),
    StructField("bookValue", DoubleType(), True),
    StructField("priceToBook", DoubleType(), True),
    StructField("trailingPE", DoubleType(), True),
    StructField("forwardPE", DoubleType(), True),
    StructField("trailingEps", DoubleType(), True),
    StructField("forwardEps", DoubleType(), True),
    StructField("returnOnAssets", DoubleType(), True),
    StructField("returnOnEquity", DoubleType(), True),
    StructField("earningsQuarterlyGrowth", DoubleType(), True),
    StructField("ipoExpectedDate", StringType(), True),
    StructField("extract_time", TimestampType(), True),
    StructField("extract_date", DateType(), True),
    StructField("year", IntegerType(), True),  
    StructField("month", IntegerType(), True), 
    StructField("day", IntegerType(), True) 
])
    df = (
        spark.readStream
        .format("cloudFiles")
        .option("cloudFiles.format", "csv")
        .option("cloudFiles.schemaLocation", schema_loc_fund)
        .option('header','true')
        .schema(schema)
        .load(input_path_fund)
    )
    return df

def write_fundamentals(df):
    query = (
        df.writeStream \
    .format("delta") \
    .option("checkpointLocation", checkpoint_loc_fund) \
    .trigger(availableNow=True) \
    .toTable(f"hive_metastore.{env}_bronze.fundamentals") \
    .awaitTermination() )


df = read_fundamentals_stream()
write_fundamentals(df)


    

In [0]:
spark.sql("DROP TABLE IF EXISTS hive_metastore.dev_silver.fundamentals")

In [0]:
spark.sql(f"DESCRIBE hive_metastore.{env}_bronze.fundamentals").show(truncate=False)