In [0]:
dbutils.widgets.text("catalog", "ptd_dev")

catalog       = dbutils.widgets.get("catalog")
schema_bronze = "bronze"
schema_silver = "silver"

spark.sql(f"USE CATALOG {catalog}")
spark.sql("""
CREATE TABLE IF NOT EXISTS silver.ticker_news_tone (
    ticker STRING,
    date DATE,
    weighted_gdelt_tone_avg DOUBLE,
    news_count LONG,
    ingestion_ts TIMESTAMP
)
USING DELTA
""")

In [0]:
from pyspark.sql import functions as F

tonechart = spark.table("bronze.gdelt_tonechart")
sp100_universe = spark.table("bronze.universe_sp100_snapshot")\
    .select(F.upper(F.col("company_name")).alias("company_name"), F.upper(F.col("ticker")).alias("ticker"))
ticker_news_tone = spark.table("silver.ticker_news_tone")

In [0]:
tone_with_ticker = tonechart.join(
                        sp100_universe, F.upper(tonechart["company_name"]) == sp100_universe["company_name"], "inner")

to_process = tone_with_ticker.join(
    ticker_news_tone.select("ticker", "date"),
    on=["ticker", "date"],
    how="left_anti"
)

In [0]:
agg = (to_process
    .groupBy("ticker", "date")
    .agg(
        (F.sum(F.col("bin")* F.col("count")) / F.sum(F.col("count"))).alias("weighted_gdelt_tone_avg"),
        F.sum("count").alias("news_count")
    )
    .withColumn("ingestion_ts", F.current_timestamp())
    .withColumn("date", F.to_date("date"))
       )

agg.write.mode("overwrite").option("overwriteSchema", "true").saveAsTable("silver.ticker_news_tone")
