In [None]:
def scd2_upsert_dim_video():
    print("Running SCD2 MERGE → preserving history...")

    merge_query = f"""
    -- STEP 1: Expire old versions when title changed
    MERGE `{FULL_DATASET}.dim_video` T
    USING (
      SELECT
        video AS video_id,
        video_title
      FROM `{FULL_DATASET}.staging_aggregated_video`
      GROUP BY video, video_title
    ) S
    ON T.video_id = S.video_id
   AND T.is_current = TRUE
   AND T.video_title != S.video_title

    WHEN MATCHED THEN
      UPDATE SET
        is_current = FALSE,
        effective_to = CURRENT_TIMESTAMP();

    -- STEP 2: Insert new current version (for changed OR new videos)
    INSERT INTO `{FULL_DATASET}.dim_video` (
      surrogate_key,
      video_id,
      video_title,
      effective_from,
      effective_to,
      is_current
    )
    SELECT
      GENERATE_UUID(),
      src.video,
      src.video_title,
      CURRENT_TIMESTAMP(),
      NULL,
      TRUE
    FROM `{FULL_DATASET}.staging_aggregated_video` src
    LEFT JOIN `{FULL_DATASET}.dim_video` dim
      ON src.video = dim.video_id
     AND dim.is_current = TRUE
    WHERE dim.video_id IS NULL
       OR dim.video_title != src.video_title;
    """

    print("Running SCD2 MERGE...")
    run_query(merge_query)
    print("SCD2 complete! History preserved!")

In [None]:
# 1. Reset to original title
run_query(f"""
UPDATE `{FULL_DATASET}.staging_aggregated_video`
SET video_title = 'How I Would Learn Data Science (If I Had to Start Over)'
WHERE video = '4OZip0cgOho'
""")

run_query(f"""
UPDATE `{FULL_DATASET}.bronze_aggregated_video`
SET video_title = 'How I Would Learn Data Science (If I Had to Start Over)'
WHERE video_id = '4OZip0cgOho'
""")

# 2. Apply first version (should create v1)
scd2_upsert_dim_video()

# 3. Change title to demo
run_query(f"""
UPDATE `{FULL_DATASET}.staging_aggregated_video`
SET video_title = 'santhkumar_demo_v22'
WHERE video = '4OZip0cgOho'
""")

# 4. Apply second version → THIS WILL CREATE HISTORY!
scd2_upsert_dim_video()

Running query...
Done. Rows affected: 1
Running query...
Done. Rows affected: 1
Running SCD2 MERGE → preserving history...
Running SCD2 MERGE...
Running query...
Done. Rows affected: N/A
SCD2 complete! History preserved!
Running query...
Done. Rows affected: 1
Running SCD2 MERGE → preserving history...
Running SCD2 MERGE...
Running query...
Done. Rows affected: N/A
SCD2 complete! History preserved!


In [None]:
print("SCD2 HISTORY")
df = client.query(f"""
    SELECT
        video_title,
        is_current,
        effective_from,
        effective_to
    FROM `{FULL_DATASET}.dim_video`
    WHERE video_id = '4OZip0cgOho'
    ORDER BY effective_from DESC
""").to_dataframe()

display(df)

SCD2 HISTORY


Unnamed: 0,video_title,is_current,effective_from,effective_to
0,santhkumar_demo_v22,True,2025-11-11 05:48:37.783750+00:00,NaT
1,How I Would Learn Data Science (If I Had to St...,False,2025-11-11 05:48:31.111132+00:00,2025-11-11 05:48:35.796193+00:00
2,santhkumar_demo_v22,False,2025-11-11 05:47:16.136900+00:00,2025-11-11 05:48:29.198607+00:00
3,How I Would Learn Data Science (If I Had to St...,False,2025-11-11 05:42:31.833588+00:00,2025-11-11 05:47:14.286448+00:00
