<a href="https://colab.research.google.com/github/ShikharV010/gist_daily_runs/blob/main/PagePerformance_TrendsCalculations.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [111]:
!pip install psycopg2-binary sqlalchemy pandas



In [112]:
import pandas as pd
import numpy as np
from sqlalchemy import create_engine

# Replace with your actual PostgreSQL connection info
db_config = {
    'user': 'airbyte_user',
    'password': 'airbyte_user_password',
    'host': 'gw-postgres-dev.celzx4qnlkfp.us-east-1.rds.amazonaws.com',
    'port': '5432',
    'database': 'gw_prod'
}

# Create SQLAlchemy engine
engine = create_engine(f"postgresql+psycopg2://{db_config['user']}:{db_config['password']}@{db_config['host']}:{db_config['port']}/{db_config['database']}")


In [113]:
sql = """
SELECT
  campaign_id,
  start_date::date AS start_date,
  end_date::date AS end_date,
  impressions,
  clicks,
  ctr,
  position
FROM gist.matv_gist_pageperformance
"""
df_raw = pd.read_sql(sql, engine)


In [114]:
# Get distinct weeks sorted by start_date
week_ranks = (
    df_raw[['start_date', 'end_date']]
    .drop_duplicates()
    .sort_values('start_date')
    .reset_index(drop=True)
)
week_ranks['week_no'] = week_ranks.reset_index().index + 1  # earliest = 1

# Merge back into raw data
df_ranked = df_raw.merge(week_ranks, on=['start_date', 'end_date'], how='left')


In [115]:
df_agg = (
    df_ranked
    .groupby(['campaign_id', 'week_no'], as_index=False)
    .agg({
        'impressions': 'sum',
        'clicks': 'sum',
        'ctr': 'mean',          # You can change to weighted avg later
        'position': lambda x: x.replace(0, np.nan).mean()
    })
)


In [116]:
df_pivot = df_agg.pivot(index='campaign_id', columns='week_no')

# Flatten multi-index columns like ('clicks', 12) → clicks_week_12
df_pivot.columns = [
    f"{metric}_week_{week_no}" for metric, week_no in df_pivot.columns
]
df_pivot.reset_index(inplace=True)

In [117]:
import pandas as pd
import numpy as np

# 1. Helper function
def get_trend_label(metric, change):
    metric = metric.rstrip("s")  # Ensure labels say "impression" not "impressions"
    if change > 0.30: return f"{metric} gain > 30%"
    elif change > 0.20: return f"{metric} gain > 20%"
    elif change > 0.10: return f"{metric} gain > 10%"
    elif change > 0.05: return f"{metric} gain > 5%"
    elif change < -0.30: return f"{metric} drop > 30%"
    elif change < -0.20: return f"{metric} drop > 20%"
    elif change < -0.10: return f"{metric} drop > 10%"
    elif change < -0.05: return f"{metric} drop > 5%"
    return ""

# 2. Get week numbers from columns
week_nums = sorted([
    int(col.split("_week_")[1])
    for col in df_pivot.columns
    if "_week_" in col and col.startswith("impressions")
], reverse=True)

# 3. Pick top 10 latest week pairs
latest_weeks = week_nums[:11]  # 11 weeks to do 10 comparisons

# 4. Initialize output container
performance_columns = {"campaign_id": df_pivot["campaign_id"]}

# 5. Loop through and calculate
for i in range(1, 11):
    week_n = latest_weeks[i - 1]      # current week
    week_prev = latest_weeks[i]       # previous week

    label_list = []

    for metric in ["impressions", "clicks", "ctr"]:
        col_curr = f"{metric}_week_{week_n}"
        col_prev = f"{metric}_week_{week_prev}"

        change = (
            (df_pivot[col_curr] - df_pivot[col_prev]) / df_pivot[col_prev]
        ).replace([np.inf, -np.inf], np.nan).fillna(0)

        label_series = change.apply(lambda x: get_trend_label(metric, x))
        label_list.append(label_series)

    # Combine all metric labels for this comparison
    combined = pd.DataFrame(label_list).T
    combined[f"performance_week_{week_n}"] = combined.apply(
        lambda row: "Stagnant" if all(v == "" for v in row) else ", ".join(filter(None, row)),
        axis=1
    )

    performance_columns[f"performance_week_{week_n}"] = combined[f"performance_week_{week_n}"]

# 6. Final weekly performance DataFrame
df_weekly_perf = pd.DataFrame(performance_columns)


In [118]:
monthly_labels = []

# Dynamically extract top 8 week numbers (latest first)
week_nums = sorted([
    int(col.split("_week_")[1])
    for col in df_pivot.columns
    if col.startswith("impressions_week_")
], reverse=True)

recent_4 = week_nums[:4]
past_4 = week_nums[4:8]

for _, row in df_pivot.iterrows():
    labels = []

    # Sum recent and past impressions
    recent_impr = row[[f"impressions_week_{w}" for w in recent_4 if f"impressions_week_{w}" in row]].sum()
    past_impr = row[[f"impressions_week_{w}" for w in past_4 if f"impressions_week_{w}" in row]].sum()

    # Sum clicks
    recent_clicks = row[[f"clicks_week_{w}" for w in recent_4 if f"clicks_week_{w}" in row]].sum()
    past_clicks = row[[f"clicks_week_{w}" for w in past_4 if f"clicks_week_{w}" in row]].sum()

    # Average CTR
    recent_ctr = row[[f"ctr_week_{w}" for w in recent_4 if f"ctr_week_{w}" in row]].mean()
    past_ctr = row[[f"ctr_week_{w}" for w in past_4 if f"ctr_week_{w}" in row]].mean()

    # Generate performance label
    for metric, recent, past in zip(
        ["impression", "clicks", "ctr"],
        [recent_impr, recent_clicks, recent_ctr],
        [past_impr, past_clicks, past_ctr]
    ):
        change = 0 if past == 0 else (recent - past) / past
        label = get_trend_label(metric, change)
        if label:
            labels.append(label)

    final_label = "Stagnant" if not labels else ", ".join(labels)
    monthly_labels.append(final_label)

# Final DataFrame
df_monthly_perf = pd.DataFrame({
    "campaign_id": df_pivot["campaign_id"],
    "performance_monthly": monthly_labels
})


In [119]:
quarterly_labels = []

# Get valid weeks where all required metrics are present
valid_weeks = [
    w for w in week_nums
    if all(f"{metric}_week_{w}" in df_pivot.columns for metric in ["impressions", "clicks", "ctr"])
]

if len(valid_weeks) >= 13:  # Minimum 13 to allow a gap and at least 1 past week
    latest_week = valid_weeks[0]

    # Current quarter: latest 12 weeks
    recent_12 = [latest_week - i for i in range(12)]

    # For past quarter: up to 12 weeks before the recent block, skip 1 week in between
    past_start = latest_week - 13
    past_weeks = [past_start - i for i in range(12)]
    past_weeks_available = [w for w in past_weeks if w in valid_weeks]

    for _, row in df_pivot.iterrows():
        labels = []

        # Aggregate recent quarter
        recent_impr = row[[f"impressions_week_{w}" for w in recent_12 if f"impressions_week_{w}" in row]].sum()
        past_impr = row[[f"impressions_week_{w}" for w in past_weeks_available if f"impressions_week_{w}" in row]].sum()

        recent_clicks = row[[f"clicks_week_{w}" for w in recent_12 if f"clicks_week_{w}" in row]].sum()
        past_clicks = row[[f"clicks_week_{w}" for w in past_weeks_available if f"clicks_week_{w}" in row]].sum()

        recent_ctr = row[[f"ctr_week_{w}" for w in recent_12 if f"ctr_week_{w}" in row]].mean()
        past_ctr = row[[f"ctr_week_{w}" for w in past_weeks_available if f"ctr_week_{w}" in row]].mean()

        for metric, recent, past in zip(
            ["impression", "clicks", "ctr"],
            [recent_impr, recent_clicks, recent_ctr],
            [past_impr, past_clicks, past_ctr]
        ):
            change = 0 if past == 0 else (recent - past) / past
            label = get_trend_label(metric, change)
            if label:
                labels.append(label)

        final = "Stagnant" if not labels else ", ".join(labels)
        quarterly_labels.append(final)

    df_quarterly_perf = pd.DataFrame({
        "campaign_id": df_pivot["campaign_id"],
        "performance_quarterly": quarterly_labels
    })

else:
    print("Not enough weeks to compute quarterly performance (need ≥13 weeks)")
    df_quarterly_perf = pd.DataFrame({
        "campaign_id": df_pivot["campaign_id"],
        "performance_quarterly": ["N/A"] * len(df_pivot)
    })


In [120]:
# Merge on campaign_id
df_perf_all = df_monthly_perf.merge(df_quarterly_perf, on="campaign_id", how="left") \
                             .merge(df_weekly_perf, on="campaign_id", how="left")

# Show full DataFrame in notebook
pd.set_option('display.max_columns', None)

In [121]:
# Step 1: Identify only the performance_week_* columns
week_perf_cols = [col for col in df_perf_all.columns if col.startswith("performance_week_")]

# Step 2: Extract week numbers and sort descending (latest week first)
original_week_nums = sorted([
    int(col.split("_")[-1]) for col in week_perf_cols
], reverse=True)

# Step 3: Create a mapping to rename them with week_10 being latest
rename_map = {
    f"performance_week_{old}": f"performance_week_{new}"
    for old, new in zip(original_week_nums, range(10, 0, -1))
}

# Step 4: Apply renaming to get the final DataFrame
df_perf_all_final = df_perf_all.rename(columns=rename_map)


In [122]:
# Get all unique week numbers from impressions columns
week_nums_all = sorted([
    int(col.split("_week_")[1])
    for col in df_pivot.columns
    if col.startswith("impressions_week_")
], reverse=True)

# Take the top 10 weeks (most recent)
top_10_weeks = week_nums_all[:10]

# Desired order: first impressions for all weeks, then clicks, ctr, position
metrics = ['impressions', 'clicks', 'ctr', 'position']
ordered_cols = ['campaign_id'] + [
    f"{metric}_week_{w}" for metric in metrics for w in top_10_weeks
]

# Filter the DataFrame
df_pivot_filtered = df_pivot[ordered_cols].copy()


In [123]:
# Create a rename mapping: week_22 → week_10, week_21 → week_9, ..., week_13 → week_1
rename_mapping = {}
for i, week in enumerate(top_10_weeks):
    for metric in metrics:
        old_col = f"{metric}_week_{week}"
        new_col = f"{metric}_week_{10 - i}"
        rename_mapping[old_col] = new_col

# Apply the renaming
df_pivot_filtered_renamed = df_pivot_filtered.rename(columns=rename_mapping)


In [124]:
# Join on campaign_id
df_final = df_perf_all_final.merge(
    df_pivot_filtered_renamed,
    on='campaign_id',
    how='left'
)


In [125]:
display(df_final)

Unnamed: 0,campaign_id,performance_monthly,performance_quarterly,performance_week_10,performance_week_9,performance_week_8,performance_week_7,performance_week_6,performance_week_5,performance_week_4,performance_week_3,performance_week_2,performance_week_1,impressions_week_10,impressions_week_9,impressions_week_8,impressions_week_7,impressions_week_6,impressions_week_5,impressions_week_4,impressions_week_3,impressions_week_2,impressions_week_1,clicks_week_10,clicks_week_9,clicks_week_8,clicks_week_7,clicks_week_6,clicks_week_5,clicks_week_4,clicks_week_3,clicks_week_2,clicks_week_1,ctr_week_10,ctr_week_9,ctr_week_8,ctr_week_7,ctr_week_6,ctr_week_5,ctr_week_4,ctr_week_3,ctr_week_2,ctr_week_1,position_week_10,position_week_9,position_week_8,position_week_7,position_week_6,position_week_5,position_week_4,position_week_3,position_week_2,position_week_1
0,00d856fb-4297-480c-b2d5-c13701deffe0,"impression drop > 5%, click gain > 30%, ctr ga...",impression gain > 30%,"impression drop > 10%, click gain > 10%","impression drop > 20%, click drop > 5%, ctr ga...","impression drop > 5%, click gain > 30%, ctr ga...","click gain > 30%, ctr drop > 5%","impression gain > 5%, ctr gain > 30%","impression gain > 30%, click gain > 30%, ctr g...","click gain > 30%, ctr drop > 30%","impression gain > 5%, ctr drop > 30%",impression gain > 10%,impression gain > 10%,5429.0,6075.0,7964.0,8753.0,9162.0,8415.0,6448.0,6224.0,5808.0,5093.0,12.0,10.0,11.0,6.0,3.0,3.0,2.0,1.0,1.0,0.0,0.028871,0.028203,0.026638,0.002620,0.002891,0.002061,0.000424,0.001134,0.002646,0.000000,36.574189,39.534627,39.040883,40.925245,40.752444,36.934806,34.151639,33.339555,29.677715,40.054128
1,013300d9-d7e1-4cf7-8b88-e16f02d5c600,"impression gain > 30%, click gain > 5%, ctr ga...","impression gain > 30%, click gain > 30%, ctr g...","impression gain > 5%, ctr gain > 10%","impression gain > 5%, click gain > 10%","click gain > 5%, ctr gain > 5%","impression gain > 5%, ctr gain > 5%","impression gain > 10%, ctr gain > 10%","impression gain > 5%, click drop > 5%, ctr dro...","impression gain > 10%, ctr drop > 10%","impression gain > 10%, click gain > 10%","impression gain > 10%, click gain > 30%, ctr g...","impression gain > 5%, click gain > 10%, ctr ga...",73890.0,69546.0,64877.0,62527.0,56988.0,51569.0,48380.0,41102.0,35130.0,31021.0,499.0,500.0,430.0,392.0,397.0,409.0,440.0,424.0,354.0,268.0,0.006314,0.005429,0.005337,0.004909,0.004521,0.003844,0.004500,0.005552,0.005644,0.004910,28.967623,28.465315,29.689879,30.196801,30.644211,28.212768,25.305498,24.192853,23.152534,23.960647
2,01c697d0-b570-491b-bd5b-192820325bb5,"impression gain > 30%, click gain > 10%, ctr d...","impression gain > 30%, click gain > 30%, ctr g...","impression gain > 10%, ctr drop > 20%",impression gain > 5%,impression gain > 5%,"impression gain > 5%, ctr drop > 30%",impression gain > 10%,"impression gain > 10%, click gain > 10%, ctr d...","impression gain > 10%, click gain > 20%, ctr g...","impression gain > 5%, click gain > 20%, ctr ga...","click gain > 30%, ctr drop > 30%","click gain > 10%, ctr gain > 30%",105229.0,95037.0,86973.0,81186.0,74623.0,66852.0,59001.0,50542.0,47635.0,48723.0,307.0,302.0,297.0,301.0,305.0,309.0,261.0,204.0,158.0,107.0,0.003235,0.004363,0.004277,0.004255,0.010842,0.010961,0.011791,0.008667,0.005981,0.009226,22.227618,22.603664,22.355546,24.926428,25.295423,26.917456,24.730066,27.059547,28.991401,31.181495
3,02344b65-6cd1-401c-89be-d69221aa428e,Stagnant,Stagnant,"impression gain > 30%, click gain > 30%, ctr g...",Stagnant,Stagnant,Stagnant,Stagnant,Stagnant,Stagnant,Stagnant,Stagnant,Stagnant,55.0,32.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,3.0,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.018182,0.010417,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,12.563636,12.250000,,,,,,,,
4,056320a0-86a4-4463-a77b-353b3a3af3b1,"impression gain > 30%, click gain > 30%, ctr g...",impression gain > 30%,"impression gain > 30%, click gain > 30%, ctr g...","impression gain > 5%, click gain > 10%, ctr dr...","impression drop > 10%, click gain > 30%, ctr g...","impression gain > 30%, click gain > 20%, ctr g...","impression gain > 5%, click gain > 30%, ctr ga...","impression gain > 20%, click gain > 20%, ctr g...","impression gain > 30%, ctr drop > 10%","impression gain > 10%, click gain > 30%, ctr g...","impression gain > 20%, ctr gain > 5%",ctr gain > 30%,2816.0,1804.0,1658.0,1992.0,1524.0,1431.0,1176.0,405.0,346.0,283.0,22.0,14.0,12.0,9.0,7.0,5.0,4.0,4.0,1.0,1.0,0.020815,0.007719,0.010193,0.008192,0.007692,0.002419,0.002007,0.002367,0.000947,0.000893,30.397179,31.186312,31.493036,32.479834,28.440571,24.513343,23.017466,16.062969,12.571839,15.377743
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
184,f8f9569a-ef60-4b64-96e5-11ca0ad14ad7,"impression gain > 30%, click gain > 20%","impression gain > 30%, click gain > 30%, ctr g...","impression gain > 10%, click gain > 10%, ctr g...","impression gain > 5%, ctr drop > 20%","impression gain > 5%, click drop > 10%, ctr dr...","impression gain > 5%, click gain > 10%","impression gain > 10%, click gain > 10%, ctr g...","impression gain > 10%, click gain > 10%, ctr d...","impression gain > 10%, click gain > 20%, ctr g...","impression gain > 30%, click gain > 10%, ctr g...","impression gain > 5%, click drop > 5%, ctr dro...",ctr gain > 20%,29759.0,26093.0,23829.0,22364.0,21226.0,18454.0,15528.0,13066.0,10013.0,9404.0,70.0,61.0,60.0,71.0,64.0,56.0,49.0,39.0,33.0,36.0,0.011877,0.008466,0.011629,0.015079,0.014543,0.012308,0.014271,0.007261,0.006613,0.009303,31.138230,30.008081,26.290549,24.241504,25.660369,24.314314,23.645243,23.463624,21.780488,23.255495
185,fd481f70-23c5-4501-b084-2518731e4aeb,"impression gain > 20%, click gain > 20%, ctr d...","impression gain > 30%, click gain > 30%, ctr g...",impression gain > 5%,Stagnant,Stagnant,"impression gain > 5%, ctr gain > 5%","impression gain > 5%, click gain > 10%","impression gain > 10%, click gain > 5%, ctr dr...","impression gain > 20%, click gain > 20%, ctr d...","impression gain > 20%, click gain > 10%, ctr g...","impression gain > 10%, click gain > 10%, ctr d...","impression gain > 20%, click gain > 30%, ctr g...",107775.0,99313.0,99741.0,102892.0,96417.0,88063.0,75013.0,60292.0,48580.0,40609.0,593.0,580.0,572.0,573.0,552.0,499.0,468.0,389.0,332.0,296.0,0.002983,0.003066,0.003205,0.003302,0.003132,0.003272,0.003783,0.005329,0.002544,0.002705,22.882659,23.877080,23.057894,23.465126,23.754975,24.282229,25.548147,26.427859,24.360482,22.658170
186,fdf60792-f202-4746-b7a7-d52813374cef,"impression gain > 30%, click gain > 30%, ctr g...","impression gain > 30%, click gain > 30%, ctr g...",Stagnant,"impression gain > 5%, ctr gain > 20%","impression gain > 5%, click gain > 5%","impression gain > 10%, click gain > 10%, ctr g...","impression gain > 10%, click gain > 10%","impression gain > 5%, ctr drop > 10%","impression gain > 10%, click gain > 10%, ctr d...","impression gain > 5%, ctr drop > 5%","impression gain > 5%, click gain > 10%, ctr dr...","impression gain > 10%, click gain > 20%, ctr d...",151295.0,152383.0,144083.0,136532.0,122413.0,105192.0,96393.0,86131.0,80293.0,73024.0,451.0,465.0,448.0,415.0,372.0,335.0,324.0,292.0,282.0,251.0,0.004295,0.004250,0.003304,0.003229,0.002898,0.002830,0.003429,0.003937,0.004366,0.004947,25.214389,25.756545,25.828760,25.861542,25.696084,25.855855,26.263462,26.203525,25.189833,25.049121
187,fe2dc8c0-eae4-4e20-9cee-3c95adce12f1,"impression gain > 30%, click gain > 30%, ctr g...",Stagnant,"impression gain > 20%, click gain > 30%","impression gain > 30%, click gain > 30%, ctr g...","impression gain > 30%, click gain > 30%, ctr g...","impression gain > 30%, click gain > 30%, ctr g...",Stagnant,Stagnant,Stagnant,Stagnant,Stagnant,Stagnant,144263.0,113643.0,79720.0,18806.0,14022.0,0.0,0.0,0.0,0.0,0.0,152.0,113.0,53.0,3.0,2.0,0.0,0.0,0.0,0.0,0.0,0.001722,0.001655,0.001343,0.000077,0.000030,0.000000,0.000000,0.000000,0.000000,0.000000,49.203805,57.278836,62.177367,50.783153,51.164917,,,,,


In [134]:
from sqlalchemy import create_engine, text

# Step 0: Setup engine
engine = create_engine(
    "postgresql://airbyte_user:airbyte_user_password@gw-postgres-dev.celzx4qnlkfp.us-east-1.rds.amazonaws.com:5432/gw_prod"
)

# Step 1: Force drop the materialized view WITH CASCADE
with engine.begin() as conn:  # begin() ensures commit
    print("⏳ Dropping materialized view...")
    conn.execute(text("DROP MATERIALIZED VIEW IF EXISTS gist.matv_gist_pageperformancetrends CASCADE;"))
    print("✅ Dropped materialized view")

# Step 2: Replace the base table
df_final.to_sql(
    name="gist_pageperformancetrends",
    con=engine,
    schema="gist",
    if_exists="replace",  # Replace table
    index=False,
    method="multi"
)
print("✅ Table 'gist_pageperformancetrends' written successfully")

# Step 3: Recreate the materialized view (adjust if needed)
with engine.begin() as conn:
    print("⏳ Creating materialized view...")
    conn.execute(text("""
        CREATE MATERIALIZED VIEW gist.matv_gist_pageperformancetrends
        TABLESPACE pg_default
        AS
        SELECT * FROM gist.gist_pageperformancetrends
        WITH DATA;
    """))
    conn.execute(text("ALTER TABLE gist.matv_gist_pageperformancetrends OWNER TO airbyte_user;"))
    print("✅ Recreated materialized view")


⏳ Dropping materialized view...
✅ Dropped materialized view
✅ Table 'gist_pageperformancetrends' written successfully
⏳ Creating materialized view...
✅ Recreated materialized view
