<a href="https://colab.research.google.com/github/ShikharV010/gist_daily_runs/blob/main/Gush_SEO_Tracker.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
pip install pandas sqlalchemy psycopg2-binary python-dateutil gspread oauth2client

In [None]:
# --- CONFIG ---
import os
import pandas as pd
from sqlalchemy import create_engine, text

DB_URL = "postgresql+psycopg2://airbyte_user:airbyte_user_password@gw-rds-prod.celzx4qnlkfp.us-east-1.rds.amazonaws.com:5432/gw_prod"

SCHEMA_GSC  = "airbyte_ingestion"
TBL_GSC_PQD = "gush_gsc_page_query_daily"  # (kept if you still need query-level)
TBL_GSC_PD  = "gush_gsc_page_daily"        # <-- new: page-level table

SCHEMA_DICT = "airbyte_ingestion"
TABLE_DICT  = "gtm_seo_gush_seo_pages"

engine = create_engine(DB_URL)

# --- READ GSC: page_query_daily (unchanged; optional) ---
with engine.connect() as con:
    gsc_pqd_sql = text(f"""
        SELECT
            date::date          AS date,
            page                AS page,
            query               AS query,
            clicks::bigint      AS clicks,
            impressions::bigint AS impressions,
            ctr::numeric        AS ctr,
            position::numeric   AS position
        FROM {SCHEMA_GSC}.{TBL_GSC_PQD}
        WHERE date IS NOT NULL
    """)
    gsc_page_query_daily_df = pd.read_sql(gsc_pqd_sql, con)

# --- READ GSC: page_daily (use this for page-level performance) ---
with engine.connect() as con:
    gsc_pd_sql = text(f"""
        SELECT
            date::date          AS date,
            page                AS page,
            clicks::bigint      AS clicks,
            impressions::bigint AS impressions
        FROM {SCHEMA_GSC}.{TBL_GSC_PD}
        WHERE date IS NOT NULL
    """)
    gsc_page_daily_df = pd.read_sql(gsc_pd_sql, con)

# --- READ Pages/Keywords (only the columns you need; exact hero_url strings) ---
with engine.connect() as con:
    dict_sql = text(f"""
        SELECT
            primary_keyword,
            secondary_keyword,
            hero_url,
            volume
        FROM {SCHEMA_DICT}.{TABLE_DICT}
        WHERE hero_url IS NOT NULL
          AND hero_url <> ''
          AND hero_url <> 'New'
          AND hero_url <> '\\'
    """)
    dict_raw_df = pd.read_sql(dict_sql, con)

#display(gsc_page_daily_df.head(3))
#display(dict_raw_df.head(3))


GET QUERY AND PAGE PERFORMANCE

In [None]:
# -*- coding: utf-8 -*-
"""
Weekly 28-day SEO performance (daily sums) wide table

Joins (EXACT string equality):
  - airbyte_ingestion.gush_gsc_page_query_daily  (date, page, query, clicks, impressions)
    ×
  - airbyte_ingestion.gtm_seo_gush_seo_pages     (primary_keyword, secondary_keyword, hero_url, volume)
"""

import os
import pandas as pd
from sqlalchemy import create_engine, text

# ─────────────────────────────────────────────────────────────
# CONFIG
# ─────────────────────────────────────────────────────────────
DB_URL = os.getenv(
    "DB_URL",
    "postgresql+psycopg2://airbyte_user:airbyte_user_password@"
    "gw-rds-prod.celzx4qnlkfp.us-east-1.rds.amazonaws.com:5432/gw_prod"
)

SCHEMA_GSC = "airbyte_ingestion"
TABLE_GSC  = "gush_gsc_page_query_daily"

SCHEMA_DICT = "airbyte_ingestion"
TABLE_DICT  = "gtm_seo_gush_seo_pages"

# Weekly anchors starting from 29 Jun (change if needed)
START_ANCHOR_STR = "2025-06-29"
ANCHOR_FREQ      = "7D"

OUTPUT_CSV = None  # e.g., "weekly_28d_sums_wide.csv"

# ─────────────────────────────────────────────────────────────
# Helpers
# ─────────────────────────────────────────────────────────────
def label_for_anchor(ts: pd.Timestamp) -> str:
    return f"{ts.day}{ts.strftime('%b')}"

def compute_28d_sums(joined_df: pd.DataFrame, anchor: pd.Timestamp) -> pd.DataFrame:
    """
    For a given anchor date, compute 28-day sums of clicks & impressions
    for each (primary_keyword, secondary_keyword, hero_url).
    """
    win_start = anchor - pd.Timedelta(days=27)  # inclusive 28D window
    jdate = pd.to_datetime(joined_df["date"])
    sub = joined_df.loc[
        (jdate >= win_start) & (jdate <= anchor),
        ["primary_keyword","secondary_keyword","hero_url","clicks","impressions"]
    ].copy()

    label = label_for_anchor(anchor)
    if sub.empty:
        return pd.DataFrame(columns=[
            "primary_keyword","secondary_keyword","hero_url",
            f"{label}_clicks", f"{label}_impressions"
        ])

    agg = (
        sub.groupby(["primary_keyword","secondary_keyword","hero_url"], as_index=False)
           .agg(clicks_28d=("clicks","sum"),
                impressions_28d=("impressions","sum"))
           .rename(columns={
               "clicks_28d":      f"{label}_clicks",
               "impressions_28d": f"{label}_impressions"
           })
    )
    return agg[["primary_keyword","secondary_keyword","hero_url",
                f"{label}_clicks", f"{label}_impressions"]]

# ─────────────────────────────────────────────────────────────
# 1) Read data
# ─────────────────────────────────────────────────────────────
engine = create_engine(DB_URL)

with engine.connect() as con:
    gsc_sql = text(f"""
        SELECT
            date::date          AS date,
            page                AS page,
            query               AS query,
            clicks::bigint      AS clicks,
            impressions::bigint AS impressions
        FROM {SCHEMA_GSC}.{TABLE_GSC}
        WHERE date IS NOT NULL
    """)
    gsc_raw_df = pd.read_sql(gsc_sql, con)

with engine.connect() as con:
    dict_sql = text(f"""
        SELECT
            primary_keyword,
            secondary_keyword,
            hero_url,
            volume
        FROM {SCHEMA_DICT}.{TABLE_DICT}
        WHERE hero_url IS NOT NULL AND hero_url <> '' AND hero_url <> 'New' AND hero_url <> '\\'
          AND secondary_keyword IS NOT NULL AND secondary_keyword <> ''
    """)
    dict_raw_df = pd.read_sql(dict_sql, con)

# ─────────────────────────────────────────────────────────────
# 2) Prep: standardize names (values remain unmodified / exact)
#    + DEDUP dictionary to one row per (primary, secondary, hero_url)
# ─────────────────────────────────────────────────────────────
gsc = gsc_raw_df.rename(columns=lambda c: c.strip().lower()).copy()
kw  = dict_raw_df.rename(columns=lambda c: c.strip().lower()).copy()

# Dedup KW to avoid row multiplication on join (keep a stable volume; choose MAX)
kw_dedup = (
    kw.groupby(["primary_keyword","secondary_keyword","hero_url"], as_index=False)
      .agg(volume=("volume","max"))
)

# Optional: ensure unique daily rows in GSC (protect vs source dup granularity)
gsc_daily = (
    gsc.groupby(["date","page","query"], as_index=False)
       .agg(clicks=("clicks","sum"), impressions=("impressions","sum"))
)

# Base keys so even empty weeks keep a row
base_keys = kw_dedup[["primary_keyword","secondary_keyword","hero_url","volume"]].copy()

# ─────────────────────────────────────────────────────────────
# 3) EXACT join: page == hero_url AND query == secondary_keyword
#    + re-aggregate after join to collapse any residual dupes
# ─────────────────────────────────────────────────────────────
joined = gsc_daily.merge(
    kw_dedup[["primary_keyword","secondary_keyword","hero_url","volume"]],
    left_on=["page","query"],
    right_on=["hero_url","secondary_keyword"],
    how="inner"
)[["date","primary_keyword","secondary_keyword","hero_url","volume","clicks","impressions"]]

# Safety: collapse to one row per (date, primary, secondary, hero).
joined = (
    joined.groupby(["date","primary_keyword","secondary_keyword","hero_url","volume"], as_index=False)
          .agg(clicks=("clicks","sum"), impressions=("impressions","sum"))
)

if joined.empty:
    raise SystemExit("No rows after exact join. Verify hero_url and secondary_keyword match GSC page/query exactly.")

# ─────────────────────────────────────────────────────────────
# 4) Build weekly anchors (29 Jun → latest available date)
# ─────────────────────────────────────────────────────────────
start_anchor = pd.to_datetime(START_ANCHOR_STR)
max_date = pd.to_datetime(joined["date"]).max()
anchors = pd.date_range(start=start_anchor, end=max_date, freq=ANCHOR_FREQ)
anchors = pd.DatetimeIndex([a for a in anchors if a <= max_date])

# ─────────────────────────────────────────────────────────────
# 5) Compute weekly 28D sums and assemble wide
# ─────────────────────────────────────────────────────────────
wide = base_keys[["primary_keyword","secondary_keyword","volume","hero_url"]].copy()

frames = [compute_28d_sums(joined, a) for a in anchors]
for dfw in frames:
    wide = wide.merge(dfw, on=["primary_keyword","secondary_keyword","hero_url"], how="left")

# Fill NaNs with 0 for metric columns, and cast to integers (no decimals)
metric_cols = [c for c in wide.columns if c.endswith("_clicks") or c.endswith("_impressions")]
if metric_cols:
    wide[metric_cols] = wide[metric_cols].fillna(0).astype("int64")

# ─────────────────────────────────────────────────────────────
# 6) Order columns: static first, then weekly pairs in *reverse chronological* order
# ─────────────────────────────────────────────────────────────
ordered_cols = ["primary_keyword","secondary_keyword","volume","hero_url"]
for a in anchors[::-1]:  # latest first
    lbl = label_for_anchor(a)
    ordered_cols += [f"{lbl}_clicks", f"{lbl}_impressions"]

final_wide_df = wide.reindex(columns=ordered_cols)

print(f"Rows: {final_wide_df.shape[0]}, Cols: {final_wide_df.shape[1]}")
print("First columns:", final_wide_df.columns[:8].tolist())
print("Last columns :", final_wide_df.columns[-6:].tolist())
final_wide_df.head(5)

# Optional: save to CSV
if OUTPUT_CSV:
    final_wide_df.to_csv(OUTPUT_CSV, index=False)
    print(f"Saved: {OUTPUT_CSV}")


In [None]:
display(final_wide_df)

GET RANK

In [None]:
import requests
import pandas as pd

# config
SERPER_API_KEY = "6769b8e78f7e96c5ff1793582bebbe532085d6be"   # replace with your real key
API_URL = "https://google.serper.dev/search"

headers = {
    "X-API-KEY": SERPER_API_KEY,
    "Content-Type": "application/json"
}

# run over all keywords
df_keywords = dict_raw_df.copy()

results = []

for idx, row in df_keywords.iterrows():
    keyword = str(row["secondary_keyword"]).strip()
    hero_url = str(row["hero_url"]).strip()

    payload = {
        "q": keyword,
        "gl": "us",
        "hl": "en",
        "num": 100        # fetch up to top 500 results
    }

    try:
        res = requests.post(API_URL, headers=headers, json=payload, timeout=20)
        print(f"{idx}: status {res.status_code}")   # print status for debugging

        res.raise_for_status()
        data = res.json()

        rank = None
        for i, r in enumerate(data.get("organic", []), start=1):
            if hero_url in r.get("link", ""):
                rank = i
                break

        # assign "None or >500" if not found
        if rank is None:
            rank_value = "None or >100"
        else:
            rank_value = rank

        results.append({
            "secondary_keyword": keyword,
            "hero_url": hero_url,
            "rank": rank_value
        })

    except Exception as e:
        print(f"Error for {keyword}: {e}")
        results.append({
            "secondary_keyword": keyword,
            "hero_url": hero_url,
            "rank": "None or >500"
        })

rank_df = pd.DataFrame(results)
print(rank_df)


MERGE RANK AND WEEK ON WEEK QUERY & PAGE PERFORMANCE

In [None]:
# 1) Keep only join keys + rank, dedupe
rank_clean = (
    rank_df[["secondary_keyword", "hero_url", "rank"]]
    .drop_duplicates(subset=["secondary_keyword", "hero_url"], keep="first")
)

# 2) LEFT JOIN
merged = final_wide_df.merge(
    rank_clean,
    on=["secondary_keyword", "hero_url"],
    how="left"
)

# 3) Reorder so: primary_keyword, secondary_keyword, hero_url, volume, rank, then everything else
front = ["primary_keyword", "secondary_keyword", "hero_url", "volume", "rank"]
rest  = [c for c in merged.columns if c not in front]
merged = merged[front + rest]

merged.head()


WRITE QUERY AND PAGE PERFORMANCE

In [None]:
# --- Write merged to Postgres safely (no DROP) + refresh view ---

import pandas as pd
from sqlalchemy import create_engine, text, inspect

# ───────────── DB config ─────────────
engine = create_engine(
    "postgresql+psycopg2://airbyte_user:airbyte_user_password@"
    "gw-rds-prod.celzx4qnlkfp.us-east-1.rds.amazonaws.com:5432/gw_prod"
)

TABLE_SCHEMA = "gist"
TABLE_NAME   = "gist_gush_query_page_seo"
VIEW_NAME    = "vw_gist_gush_query_page_seo"

# ───────────── DataFrame to load ─────────────
assert 'merged' in globals(), "merged not found. Run the transform cell first."
df = merged.copy()
if df.empty:
    print("🛑 merged is empty; nothing to load.")
    engine.dispose()
    raise SystemExit

# Ensure schema exists
with engine.begin() as conn:
    conn.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{TABLE_SCHEMA}";'))

insp = inspect(engine)
table_exists = insp.has_table(TABLE_NAME, schema=TABLE_SCHEMA)

if not table_exists:
    # First run: create table from scratch based on DataFrame
    df.to_sql(
        name=TABLE_NAME,
        con=engine,
        schema=TABLE_SCHEMA,
        if_exists="replace",  # safe: no dependent view yet
        index=False,
        method="multi",
        chunksize=5_000,
    )
    print(f"✅ created {TABLE_SCHEMA}.{TABLE_NAME} with {len(df)} rows")
else:
    # Subsequent runs: handle new columns (weekly anchors), then TRUNCATE + APPEND
    # 1) Read existing column names (lowercased for comparison)
    with engine.connect() as conn:
        existing_cols = pd.read_sql(
            text("""
                SELECT column_name
                FROM information_schema.columns
                WHERE table_schema = :schema AND table_name = :table
                ORDER BY ordinal_position
            """),
            conn,
            params={"schema": TABLE_SCHEMA, "table": TABLE_NAME},
        )["column_name"].str.lower().tolist()

    # 2) Identify missing columns from the new DF
    df_cols_lower = [c.lower() for c in df.columns]
    missing = [c for c in df.columns if c.lower() not in existing_cols]

    # 3) Add any missing columns:
    #    - static text columns: primary_keyword, secondary_keyword, hero_url
    #    - static numeric column: volume (BIGINT)
    #    - metric columns (weekly *_clicks / *_impressions): BIGINT DEFAULT 0
    if missing:
        text_cols   = {"primary_keyword", "secondary_keyword", "hero_url"}
        numeric_cols = {"volume"}  # add more static numeric fields here if needed

        with engine.begin() as conn:
            for col in missing:
                col_l = col.lower()
                if col_l in (c.lower() for c in text_cols):
                    conn.execute(text(
                        f'ALTER TABLE "{TABLE_SCHEMA}"."{TABLE_NAME}" '
                        f'ADD COLUMN IF NOT EXISTS "{col}" text;'
                    ))
                elif col_l in (c.lower() for c in numeric_cols):
                    conn.execute(text(
                        f'ALTER TABLE "{TABLE_SCHEMA}"."{TABLE_NAME}" '
                        f'ADD COLUMN IF NOT EXISTS "{col}" bigint;'
                    ))
                else:
                    # Treat all metric columns as BIGINT with default 0
                    conn.execute(text(
                        f'ALTER TABLE "{TABLE_SCHEMA}"."{TABLE_NAME}" '
                        f'ADD COLUMN IF NOT EXISTS "{col}" bigint DEFAULT 0;'
                    ))
        print(f"🧩 added {len(missing)} new column(s): {missing}")

    # 4) TRUNCATE target (keeps dependent view intact), then APPEND all rows
    with engine.begin() as conn:
        conn.execute(text(f'TRUNCATE TABLE "{TABLE_SCHEMA}"."{TABLE_NAME}";'))

    df.to_sql(
        name=TABLE_NAME,
        con=engine,
        schema=TABLE_SCHEMA,
        if_exists="append",   # we just truncated; do not replace
        index=False,
        method="multi",
        chunksize=5_000,
    )
    print(f"✅ truncated & loaded {len(df)} rows into {TABLE_SCHEMA}.{TABLE_NAME}")

# 5) (Re)create passthrough view (idempotent)
with engine.begin() as conn:
    conn.execute(text(f'''
        CREATE OR REPLACE VIEW "{TABLE_SCHEMA}"."{VIEW_NAME}" AS
        SELECT * FROM "{TABLE_SCHEMA}"."{TABLE_NAME}";
    '''))

print(f"🪟 view {TABLE_SCHEMA}.{VIEW_NAME} refreshed.")
engine.dispose()


GET PAGE PERFORMANCE

In [None]:
# -*- coding: utf-8 -*-
"""
Exact-URL page performance (28-day sums, weekly anchors)
Uses gush_gsc_page_daily for page-level parity with GSC "Page" view.
"""

import pandas as pd

# ---- Params ----
START_ANCHOR_STR = "2025-06-29"  # first weekly anchor (e.g., Sunday)
ANCHOR_FREQ      = "7D"          # weekly cadence

# ---- Helpers ----
def label_for_anchor(ts: pd.Timestamp) -> str:
    return f"{ts.day}{ts.strftime('%b')}"

def compute_28d_sums(pages_join_df: pd.DataFrame, anchor: pd.Timestamp) -> pd.DataFrame:
    win_start = anchor - pd.Timedelta(days=27)  # inclusive window
    jdate = pd.to_datetime(pages_join_df["date"])
    sub = pages_join_df.loc[(jdate >= win_start) & (jdate <= anchor),
                            ["hero_url", "clicks", "impressions"]].copy()
    label = label_for_anchor(anchor)
    if sub.empty:
        return pd.DataFrame(columns=["hero_url", f"{label}_clicks", f"{label}_impressions"])
    agg = (sub.groupby("hero_url", as_index=False)
             .agg(**{"%s_clicks" % label: ("clicks","sum"),
                     "%s_impressions" % label: ("impressions","sum")}))
    return agg

# ---- EXACT join: hero_url == page (no normalization) ----
# Dedup pages list
base_pages_df = (
    dict_raw_df[["hero_url"]]
    .dropna()
    .drop_duplicates()
    .query("hero_url != '' and hero_url != 'New' and hero_url != '\\\\'")
    .copy()
)

# Aggregate GSC to one row per (date, page) (protects vs duplicates in source)
gsc_day = (gsc_page_daily_df
           .groupby(["date","page"], as_index=False)
           .agg(clicks=("clicks","sum"), impressions=("impressions","sum")))

# Exact equality join
pages_join_df = gsc_day.merge(
    base_pages_df.rename(columns={"hero_url":"page"}),
    on="page",
    how="inner"
).rename(columns={"page":"hero_url"})

if pages_join_df.empty:
    raise SystemExit("No exact matches between hero_url and GSC.page. Check strings in the sheet vs GSC.")

# ---- Build anchors up to latest weekly anchor ≤ today (UTC) ----
start_anchor_d = pd.to_datetime(START_ANCHOR_STR).date()
today_utc_d    = pd.Timestamp.utcnow().date()
weeks_since_start = max(0, (today_utc_d - start_anchor_d).days // 7)
anchor_end = pd.Timestamp(start_anchor_d) + pd.Timedelta(days=7 * weeks_since_start)
anchors = pd.date_range(start=pd.Timestamp(start_anchor_d), end=anchor_end, freq=ANCHOR_FREQ)

# ---- Compute week-on-week 28D sums → wide ----
wide = base_pages_df[["hero_url"]].copy()
frames = [compute_28d_sums(pages_join_df, a) for a in anchors]
for dfw in frames:
    wide = wide.merge(dfw, on="hero_url", how="left")

# Fill NaNs with 0 for metric columns, then CAST to integers (no decimals)
metric_cols = [c for c in wide.columns if c.endswith("_clicks") or c.endswith("_impressions")]
if metric_cols:
    wide[metric_cols] = wide[metric_cols].fillna(0).astype("int64")  # <- integer-only output

# ---- Order columns: hero_url + weekly pairs in *reverse chronological* order ----
ordered_cols = ["hero_url"]
for a in anchors[::-1]:  # latest first
    lbl = label_for_anchor(a)
    ordered_cols += [f"{lbl}_clicks", f"{lbl}_impressions"]

final_pages_wide_df = wide.reindex(columns=ordered_cols)

print(f"Rows: {final_pages_wide_df.shape[0]}, Cols: {final_pages_wide_df.shape[1]}")
print("Sample columns:", final_pages_wide_df.columns[:6].tolist())
final_pages_wide_df.head(5)


WRITE PAGE PERFORMANCE

In [None]:
# --- Write final_pages_wide_df to Postgres safely (no DROP on target) + refresh view ---

import pandas as pd
from sqlalchemy import create_engine, text, inspect

# ───────────── DB config ─────────────
engine = create_engine(
    "postgresql+psycopg2://airbyte_user:airbyte_user_password@"
    "gw-rds-prod.celzx4qnlkfp.us-east-1.rds.amazonaws.com:5432/gw_prod"
)

TABLE_SCHEMA = "gist"
TABLE_NAME   = "gist_gush_page_seo"
VIEW_NAME    = "vw_gist_gush_page_seo"

# ───────────── DataFrame to load ─────────────
assert 'final_pages_wide_df' in globals(), "final_pages_wide_df not found. Run the transform cell first."
df = final_pages_wide_df.copy()
if df.empty:
    print("🛑 final_pages_wide_df is empty; nothing to load.")
    engine.dispose()
    raise SystemExit

# ensure schema exists
with engine.begin() as conn:
    conn.execute(text(f'CREATE SCHEMA IF NOT EXISTS "{TABLE_SCHEMA}";'))

insp = inspect(engine)
table_exists = insp.has_table(TABLE_NAME, schema=TABLE_SCHEMA)

if not table_exists:
    # First run: create table from scratch
    df.to_sql(
        name=TABLE_NAME,
        con=engine,
        schema=TABLE_SCHEMA,
        if_exists="replace",  # safe: no dependent view yet
        index=False,
        method="multi",
        chunksize=5_000,
    )
    print(f"✅ created {TABLE_SCHEMA}.{TABLE_NAME} with {len(df)} rows")
else:
    # Subsequent runs: handle new weekly columns (schema drift), then TRUNCATE + APPEND
    # 1) fetch existing column names
    with engine.connect() as conn:
        existing_cols = pd.read_sql(
            text("""
                SELECT column_name
                FROM information_schema.columns
                WHERE table_schema = :schema AND table_name = :table
                ORDER BY ordinal_position
            """),
            conn,
            params={"schema": TABLE_SCHEMA, "table": TABLE_NAME},
        )["column_name"].str.lower().tolist()

    df_cols = [c.lower() for c in df.columns]
    missing = [c for c in df.columns if c.lower() not in existing_cols]

    # 2) add any missing columns (new weekly anchors) as DOUBLE PRECISION, default 0
    #    hero_url stays text; all metric columns are numeric
    if missing:
        with engine.begin() as conn:
            for col in missing:
                if col.lower() == "hero_url":
                    conn.execute(text(f'ALTER TABLE "{TABLE_SCHEMA}"."{TABLE_NAME}" ADD COLUMN IF NOT EXISTS "{col}" text;'))
                else:
                    conn.execute(text(f'ALTER TABLE "{TABLE_SCHEMA}"."{TABLE_NAME}" ADD COLUMN IF NOT EXISTS "{col}" double precision DEFAULT 0;'))
        print(f"🧩 added {len(missing)} new column(s): {missing}")

    # 3) TRUNCATE target (keeps view intact), then APPEND all rows
    with engine.begin() as conn:
        conn.execute(text(f'TRUNCATE TABLE "{TABLE_SCHEMA}"."{TABLE_NAME}";'))

    df.to_sql(
        name=TABLE_NAME,
        con=engine,
        schema=TABLE_SCHEMA,
        if_exists="append",   # <- do NOT replace; we just truncated
        index=False,
        method="multi",
        chunksize=5_000,
    )
    print(f"✅ truncated & loaded {len(df)} rows into {TABLE_SCHEMA}.{TABLE_NAME}")

# (re)create passthrough view (idempotent)
with engine.begin() as conn:
    conn.execute(text(f'''
        CREATE OR REPLACE VIEW "{TABLE_SCHEMA}"."{VIEW_NAME}" AS
        SELECT * FROM "{TABLE_SCHEMA}"."{TABLE_NAME}";
    '''))

print(f"🪟 view {TABLE_SCHEMA}.{VIEW_NAME} refreshed.")
engine.dispose()
