# Silver Layer: Posts ETL
This section merges bronze posts and patch data, enriches post metadata, and writes the results to the silver posts table. Ingestion events are logged for traceability.

In [0]:
%sql
-- Join patch to posts on post_url; prioritize patch data over posts data
-- Deduplicate merged_posts to ensure one row per post_url
CREATE OR REPLACE TEMP VIEW merged_posts_dedup AS
SELECT
  post_url,
  post_publish_date,
  post_publish_timestamp,
  link,
  title,
  content
FROM (
  SELECT
    posts.post_url AS post_url,
    CAST(posts.post_publish_date AS DATE) AS post_publish_date,
    post_details.post_timestamp AS post_publish_timestamp,
    COALESCE(patch.true_url, posts.link) AS link,
    COALESCE(patch.title, posts.title) AS title,
    COALESCE(patch.content, posts.content, posts.title) AS content,
    ROW_NUMBER() OVER (PARTITION BY posts.post_url ORDER BY patch.true_url DESC, patch.title DESC, patch.content DESC) AS rn
  FROM
    IDENTIFIER (:BRONZE_CATALOG || "." || :BRONZE_SCHEMA || "." || :BRONZE_POSTS_TABLE) AS posts
  LEFT JOIN
    IDENTIFIER (:BRONZE_CATALOG || "." || :BRONZE_SCHEMA || "." || :BRONZE_POST_PATCH_TABLE) AS patch
  ON
    posts.post_url = patch.post_url
  LEFT JOIN
    IDENTIFIER (:BRONZE_CATALOG || "." || :BRONZE_SCHEMA || "." || :BRONZE_POST_DETAILS_TABLE) AS post_details
  ON
    posts.post_url = post_details.post_url
)
WHERE rn = 1;

-- Create silver posts table if not exists
CREATE TABLE IF NOT EXISTS IDENTIFIER (:SILVER_CATALOG || "." || :SILVER_SCHEMA || "." || :SILVER_POSTS_TABLE) (
  post_url STRING,
  post_publish_date DATE,
  post_publish_timestamp TIMESTAMP,
  link STRING,
  title STRING,
  content STRING
) USING DELTA;

-- Use deduplicated view for MERGE
MERGE INTO IDENTIFIER (:SILVER_CATALOG || "." || :SILVER_SCHEMA || "." || :SILVER_POSTS_TABLE) AS t
USING merged_posts_dedup AS s
ON t.post_url = s.post_url
WHEN MATCHED THEN
  UPDATE SET
    post_url = s.post_url,
    post_publish_date = s.post_publish_date,
    post_publish_timestamp = s.post_publish_timestamp,
    link = s.link,
    title = s.title,
    content = s.content
WHEN NOT MATCHED THEN
  INSERT (post_url, post_publish_date, post_publish_timestamp, link, title, content)
  VALUES (s.post_url, s.post_publish_date, s.post_publish_timestamp, s.link, s.title, s.content);