In [0]:
from pyspark.sql import functions as F
from delta.tables import DeltaTable

# =========================
# CONFIG
# =========================
catalog_name = "electricity-project"

silver_schema = "silver"
gold_schema = "gold"

input_table = "silver.price_features"
output_table = "gold.actual_prices"

# =========================
# CATALOG + SCHEMA
# =========================
spark.sql(f"USE CATALOG `{catalog_name}`")
spark.sql(f"CREATE SCHEMA IF NOT EXISTS {gold_schema}")
spark.sql(f"USE SCHEMA {gold_schema}")

# =========================
# READ SILVER FEATURES
# =========================
df = spark.table(input_table)

# =========================
# SELECT ACTUALS
# =========================
actuals_df = df.select(
    F.col("datetime"),
    F.col("price_nok"),
    F.col("temperature")
)

# =========================
# MERGE INTO GOLD ACTUALS
# =========================
if spark.catalog.tableExists(output_table):

    delta_out = DeltaTable.forName(spark, output_table)

    (
        delta_out.alias("t")
        .merge(
            actuals_df.alias("s"),
            "t.datetime = s.datetime"
        )
        .whenMatchedUpdate(set={
            "price_nok": "s.price_nok",
            "temperature": "s.temperature"
        })
        .whenNotMatchedInsert(values={
            "datetime": "s.datetime",
            "price_nok": "s.price_nok",
            "temperature": "s.temperature"
        })
        .execute()
    )

else:
    (
        actuals_df
        .write
        .format("delta")
        .mode("overwrite")
        .saveAsTable(output_table)
    )


In [0]:
%sql
SELECT
  min(datetime),
  max(datetime),
  count(*)
FROM `electricity-project`.gold.actual_prices;

In [0]:
%sql
SELECT *
FROM `electricity-project`.gold.actual_prices
ORDER BY datetime
LIMIT 10;

In [0]:
%sql
SELECT *
FROM `electricity-project`.gold.actual_prices
ORDER BY datetime DESC
LIMIT 10;
