# Merged Ads Final Validation with 2% Threshold

This notebook performs **segment-level validation** for Merged Ads Final data with a **2% tolerance threshold**.

**Files:**
- Target: `growth_team/merged_ads_final.csv`
- Gold: `merged_ads_final_gold.xlsx`

**Column Mapping:**
- `Day` → date column
- `Campaign` → campaign column
- `Cost` → cost metric
- `Impr.` → impressions metric
- `Clicks` → clicks metric

**Validation Segments:**
- Overall Totals
- By Date
- By Campaign

## Configuration: Set Threshold

In [19]:
# CONFIGURATION: Set your threshold here
THRESHOLD_PERCENT = 2.0  # Accept differences up to 2%

print("="*80)
print("MERGED ADS FINAL DATA VALIDATION CONFIGURATION")
print("="*80)
print(f"\nThreshold: {THRESHOLD_PERCENT}%")
print(f"Differences under {THRESHOLD_PERCENT}% will be marked as MATCHED")
print("\nYou can change THRESHOLD_PERCENT above to adjust tolerance")

# Interactive visualization libraries
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
import plotly.io as pio

# Set Plotly theme
pio.templates.default = 'plotly_white'

MERGED ADS FINAL DATA VALIDATION CONFIGURATION

Threshold: 2.0%
Differences under 2.0% will be marked as MATCHED

You can change THRESHOLD_PERCENT above to adjust tolerance


## Step 1: Import Libraries

In [20]:
# Install openpyxl if needed
import sys
!{sys.executable} -m pip install openpyxl -q

import pandas as pd
import numpy as np
from datetime import datetime

# Visualization libraries
import matplotlib.pyplot as plt
import matplotlib.patches as mpatches
from matplotlib.gridspec import GridSpec
import base64
from io import BytesIO

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)
pd.set_option('display.float_format', '{:.2f}'.format)

print("✓ Libraries imported successfully")
print(f"Analysis started: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

✓ Libraries imported successfully
Analysis started: 2025-12-20 18:30:25


## Step 2: Load and Prepare Data

In [21]:
# Load Target CSV (skip first 2 metadata rows)
print("Loading Target CSV...")
growth_df = pd.read_csv("growth_team/merged_ads_final.csv", skiprows=2)

# IMPORTANT: Select only the columns we need (CSV has extra trailing columns)
growth_df = growth_df[['Day', 'Campaign', 'Currency code', 'Cost', 'Impr.', 'Clicks']]

# Rename columns to match expected names
growth_df.columns = growth_df.columns.str.strip()
growth_df.rename(columns={
    'Day': 'Day',
    'Campaign': 'Campaign name',
    'Currency code': 'Currency code',
    'Cost': 'Cost',
    'Impr.': 'Impressions',
    'Clicks': 'Clicks'
}, inplace=True)

# Clean numeric columns (remove commas)
for col in ['Cost', 'Impressions', 'Clicks']:
    if growth_df[col].dtype == 'object':
        growth_df[col] = growth_df[col].str.replace(',', '')
    growth_df[col] = pd.to_numeric(growth_df[col], errors='coerce').fillna(0)

# Convert Day to string format for consistent comparison
growth_df['Day'] = pd.to_datetime(growth_df['Day'], dayfirst=True).dt.strftime('%Y-%m-%d')

print(f"✓ Target loaded: {len(growth_df):,} rows")
print(f"  Columns: {growth_df.columns.tolist()}")

# Load Gold Excel
print("\nLoading Gold Excel...")
gold_df = pd.read_excel("merged_ads_final_gold.xlsx")

# Normalize column names
gold_df.columns = gold_df.columns.str.strip()
# Rename to match target structure
gold_df.rename(columns={
    'date': 'Day',
    'campaign_name': 'Campaign name',
    'cost': 'Cost',
    'impressions': 'Impressions',
    'clicks': 'Clicks'
}, inplace=True)

# Convert Day to string format for consistent comparison
gold_df['Day'] = pd.to_datetime(gold_df['Day'], dayfirst=True).dt.strftime('%Y-%m-%d')

print(f"✓ Gold loaded: {len(gold_df):,} rows")
print(f"  Columns: {gold_df.columns.tolist()}")

print("\n" + "="*80)
print("DATA SUMMARY")
print("="*80)
print(f"\nTarget Date Range: {growth_df['Day'].min()} to {growth_df['Day'].max()}")
print(f"Gold Date Range: {gold_df['Day'].min()} to {gold_df['Day'].max()}")
print(f"\nTarget Unique Campaigns: {growth_df['Campaign name'].nunique()}")
print(f"Gold Unique Campaigns: {gold_df['Campaign name'].nunique()}")
print(f"\nTarget Campaign List: {sorted(growth_df['Campaign name'].unique())}")
print(f"\nGold Campaign List: {sorted(gold_df['Campaign name'].unique())}")

Loading Target CSV...
✓ Target loaded: 163 rows
  Columns: ['Day', 'Campaign name', 'Currency code', 'Cost', 'Impressions', 'Clicks']

Loading Gold Excel...
✓ Gold loaded: 974 rows
  Columns: ['Day', 'Campaign name', 'Cost', 'Clicks', 'Impressions', 'Unnamed: 5', 'Unnamed: 6']

DATA SUMMARY

Target Date Range: 2025-11-01 to 2025-11-30
Gold Date Range: 2025-11-01 to 2025-11-30

Target Unique Campaigns: 7
Gold Unique Campaigns: 7

Target Campaign List: ['Cadiveu_Instamart_External_20th_Nov_2025', 'IKONIC-AMZ-Glide-Peach-14-Oct-2025', 'ME_Search_|_Oct_25', 'Me_Sales_P-Max_Oct25', 'Nykaa_Black_Friday_Traffic', 'PRO_Search_|_Oct_25', 'Pro_Sales_P-Max_Oct25']

Gold Campaign List: ['Cadiveu_Instamart_External_20th_Nov_2025', 'IKONIC-AMZ-Glide-Peach-14-Oct-2025', 'ME_Search_|_Oct_25', 'Me_Sales_P-Max_Oct25', 'Nykaa_Black_Friday_Traffic', 'PRO_Search_|_Oct_25', 'Pro_Sales_P-Max_Oct25']


## Step 3: Overall Totals Comparison

In [22]:
print("="*80)
print(f"OVERALL TOTALS COMPARISON (with {THRESHOLD_PERCENT}% threshold)")
print("="*80)

# Calculate totals
growth_totals = growth_df[['Cost', 'Impressions', 'Clicks']].sum()
gold_totals = gold_df[['Cost', 'Impressions', 'Clicks']].sum()

# Create comparison dataframe
overall_comparison = pd.DataFrame({
    'Metric': ['Cost', 'Impressions', 'Clicks'],
    'Target': [growth_totals['Cost'], growth_totals['Impressions'], growth_totals['Clicks']],
    'Gold': [gold_totals['Cost'], gold_totals['Impressions'], gold_totals['Clicks']],
})

overall_comparison['Difference'] = overall_comparison['Target'] - overall_comparison['Gold']
overall_comparison['Diff %'] = (overall_comparison['Difference'] / overall_comparison['Gold'] * 100).round(2)
overall_comparison['Match'] = overall_comparison['Diff %'].abs() <= THRESHOLD_PERCENT
overall_comparison['Status'] = overall_comparison['Match'].apply(lambda x: '✓ PASS' if x else '✗ FAIL')

display(overall_comparison)

# Summary
matches = overall_comparison['Match'].sum()
print(f"\n✓ Matches (within {THRESHOLD_PERCENT}%): {matches}/3 metrics")
if matches == 3:
    print(f"✓✓✓ ALL OVERALL TOTALS MATCH (within {THRESHOLD_PERCENT}% threshold)! ✓✓✓")
else:
    print(f"⚠ {3-matches} metric(s) exceed {THRESHOLD_PERCENT}% threshold")

OVERALL TOTALS COMPARISON (with 2.0% threshold)


Unnamed: 0,Metric,Target,Gold,Difference,Diff %,Match,Status
0,Cost,401869.13,451154.45,-49285.32,-10.92,False,✗ FAIL
1,Impressions,3324957.0,3561483.0,-236526.0,-6.64,False,✗ FAIL
2,Clicks,101520.0,120558.0,-19038.0,-15.79,False,✗ FAIL



✓ Matches (within 2.0%): 0/3 metrics
⚠ 3 metric(s) exceed 2.0% threshold


## Step 4: Validation by Date

In [23]:
print("="*80)
print(f"SEGMENT VALIDATION: BY DATE (with {THRESHOLD_PERCENT}% threshold)")
print("="*80)

# Aggregate by date
growth_by_date = growth_df.groupby('Day').agg({
    'Cost': 'sum',
    'Impressions': 'sum',
    'Clicks': 'sum'
}).reset_index()
growth_by_date.columns = ['Day', 'cost_target', 'impressions_target', 'clicks_target']

gold_by_date = gold_df.groupby('Day').agg({
    'Cost': 'sum',
    'Impressions': 'sum',
    'Clicks': 'sum'
}).reset_index()
gold_by_date.columns = ['Day', 'cost_gold', 'impressions_gold', 'clicks_gold']

# Merge and compare
date_comparison = pd.merge(growth_by_date, gold_by_date, on='Day', how='inner')

# Calculate percentage differences
date_comparison['cost_diff_pct'] = ((date_comparison['cost_target'] - date_comparison['cost_gold']) / date_comparison['cost_gold'] * 100).round(2)
date_comparison['impr_diff_pct'] = ((date_comparison['impressions_target'] - date_comparison['impressions_gold']) / date_comparison['impressions_gold'] * 100).round(2)
date_comparison['clicks_diff_pct'] = ((date_comparison['clicks_target'] - date_comparison['clicks_gold']) / date_comparison['clicks_gold'] * 100).round(2)

# Apply threshold matching
date_comparison['perfect_match'] = (
    (date_comparison['cost_diff_pct'].abs() <= THRESHOLD_PERCENT) & 
    (date_comparison['impr_diff_pct'].abs() <= THRESHOLD_PERCENT) & 
    (date_comparison['clicks_diff_pct'].abs() <= THRESHOLD_PERCENT)
)
date_comparison['status'] = date_comparison['perfect_match'].apply(lambda x: '✓ PASS' if x else '✗ FAIL')

print(f"\nTotal dates compared: {len(date_comparison)}")
print(f"✓ Matches (within {THRESHOLD_PERCENT}%): {date_comparison['perfect_match'].sum()}")
print(f"✗ Exceeds threshold: {(~date_comparison['perfect_match']).sum()}")

print("\nDetailed comparison:")
display(date_comparison[['Day', 'cost_target', 'cost_gold', 'cost_diff_pct',
                          'impressions_target', 'impressions_gold', 'impr_diff_pct',
                          'clicks_target', 'clicks_gold', 'clicks_diff_pct', 'status']].sort_values('Day'))

SEGMENT VALIDATION: BY DATE (with 2.0% threshold)

Total dates compared: 29
✓ Matches (within 2.0%): 24
✗ Exceeds threshold: 5

Detailed comparison:


Unnamed: 0,Day,cost_target,cost_gold,cost_diff_pct,impressions_target,impressions_gold,impr_diff_pct,clicks_target,clicks_gold,clicks_diff_pct,status
0,2025-11-01,10925.15,10925.15,-0.0,236435,236435,0.0,7798,7798,0.0,✓ PASS
1,2025-11-03,1096.78,1096.78,0.0,10160,10160,0.0,254,254,0.0,✓ PASS
2,2025-11-04,11854.49,11854.49,-0.0,131891,131891,0.0,3100,3100,0.0,✓ PASS
3,2025-11-05,1770.35,1770.34,0.0,20631,20631,0.0,930,930,0.0,✓ PASS
4,2025-11-06,5881.04,5881.83,-0.01,85302,85302,0.0,3149,3150,-0.03,✓ PASS
5,2025-11-07,12110.28,12113.55,-0.03,147661,147661,0.0,6123,6129,-0.1,✓ PASS
6,2025-11-08,14108.52,14108.91,-0.0,147160,147166,-0.0,4239,4241,-0.05,✓ PASS
7,2025-11-09,22861.72,22876.05,-0.06,192827,192866,-0.02,6082,6091,-0.15,✓ PASS
8,2025-11-10,20059.33,20059.34,-0.0,136257,136257,0.0,2761,2761,0.0,✓ PASS
9,2025-11-11,15728.38,15728.39,-0.0,140885,140885,0.0,2570,2570,0.0,✓ PASS


## Step 5: Validation by Campaign

In [24]:
print("="*80)
print(f"SEGMENT VALIDATION: BY CAMPAIGN (with {THRESHOLD_PERCENT}% threshold)")
print("="*80)

# Aggregate by campaign
growth_by_campaign = growth_df.groupby('Campaign name').agg({
    'Cost': 'sum',
    'Impressions': 'sum',
    'Clicks': 'sum'
}).reset_index()
growth_by_campaign.columns = ['Campaign name', 'cost_target', 'impressions_target', 'clicks_target']

gold_by_campaign = gold_df.groupby('Campaign name').agg({
    'Cost': 'sum',
    'Impressions': 'sum',
    'Clicks': 'sum'
}).reset_index()
gold_by_campaign.columns = ['Campaign name', 'cost_gold', 'impressions_gold', 'clicks_gold']

# Merge and compare - USE OUTER to show ALL campaigns
campaign_comparison = pd.merge(growth_by_campaign, gold_by_campaign, on='Campaign name', how='outer', indicator=True)

# Fill NaN with 0 for campaigns only in one file
for col in ['cost_target', 'impressions_target', 'clicks_target', 'cost_gold', 'impressions_gold', 'clicks_gold']:
    campaign_comparison[col] = campaign_comparison[col].fillna(0)

# Calculate percentage differences with safe division
def safe_pct_diff(target, gold):
    if gold == 0 and target == 0:
        return 0
    elif gold == 0:
        return 999
    else:
        return round((target - gold) / gold * 100, 2)

campaign_comparison['cost_diff_pct'] = campaign_comparison.apply(
    lambda row: safe_pct_diff(row['cost_target'], row['cost_gold']), axis=1)
campaign_comparison['impr_diff_pct'] = campaign_comparison.apply(
    lambda row: safe_pct_diff(row['impressions_target'], row['impressions_gold']), axis=1)
campaign_comparison['clicks_diff_pct'] = campaign_comparison.apply(
    lambda row: safe_pct_diff(row['clicks_target'], row['clicks_gold']), axis=1)

# Apply threshold matching
campaign_comparison['perfect_match'] = (
    (campaign_comparison['cost_diff_pct'].abs() <= THRESHOLD_PERCENT) & 
    (campaign_comparison['impr_diff_pct'].abs() <= THRESHOLD_PERCENT) & 
    (campaign_comparison['clicks_diff_pct'].abs() <= THRESHOLD_PERCENT)
)
campaign_comparison['status'] = campaign_comparison['perfect_match'].apply(lambda x: '✓ PASS' if x else '✗ FAIL')

# Add source indicator
campaign_comparison['source'] = campaign_comparison['_merge'].map({
    'both': 'Both',
    'left_only': 'Target Only',
    'right_only': 'Gold Only'
})

print(f"\nTotal campaigns: {len(campaign_comparison)}")
print(f"  In both files: {(campaign_comparison['_merge'] == 'both').sum()}")
print(f"  Only in target: {(campaign_comparison['_merge'] == 'left_only').sum()}")
print(f"  Only in gold: {(campaign_comparison['_merge'] == 'right_only').sum()}")
print(f"\n✓ Matches (within {THRESHOLD_PERCENT}%): {campaign_comparison['perfect_match'].sum()}")
print(f"✗ Exceeds threshold: {(~campaign_comparison['perfect_match']).sum()}")

print("\nDetailed comparison:")
display(campaign_comparison[['Campaign name', 'source', 'cost_target', 'cost_gold', 'cost_diff_pct',
                              'impressions_target', 'impressions_gold', 'impr_diff_pct',
                              'clicks_target', 'clicks_gold', 'clicks_diff_pct', 'status']].sort_values('Campaign name'))

SEGMENT VALIDATION: BY CAMPAIGN (with 2.0% threshold)

Total campaigns: 7
  In both files: 7
  Only in target: 0
  Only in gold: 0

✓ Matches (within 2.0%): 2
✗ Exceeds threshold: 5

Detailed comparison:


Unnamed: 0,Campaign name,source,cost_target,cost_gold,cost_diff_pct,impressions_target,impressions_gold,impr_diff_pct,clicks_target,clicks_gold,clicks_diff_pct,status
0,Cadiveu_Instamart_External_20th_Nov_2025,Both,5499.5,7830.87,-29.77,342,463,-26.13,26,39,-33.33,✗ FAIL
1,IKONIC-AMZ-Glide-Peach-14-Oct-2025,Both,30429.57,35091.47,-13.29,287837,324314,-11.25,10622,12189,-12.86,✗ FAIL
2,ME_Search_|_Oct_25,Both,111296.43,131696.03,-15.49,646629,665130,-2.78,13091,15462,-15.33,✗ FAIL
3,Me_Sales_P-Max_Oct25,Both,58665.01,58677.53,-0.02,950096,950106,-0.0,24400,24411,-0.05,✓ PASS
4,Nykaa_Black_Friday_Traffic,Both,3499.35,6058.63,-42.24,216816,376666,-42.44,16089,28485,-43.52,✗ FAIL
5,PRO_Search_|_Oct_25,Both,109587.92,128901.3,-14.98,186906,208435,-10.33,12231,14904,-17.93,✗ FAIL
6,Pro_Sales_P-Max_Oct25,Both,82891.35,82898.63,-0.01,1036331,1036369,-0.0,25061,25068,-0.03,✓ PASS


## Step 6: Final Summary Report

In [25]:
print("="*80)
print(f"MERGED ADS FINAL VALIDATION SUMMARY (with {THRESHOLD_PERCENT}% threshold)")
print("="*80)
print(f"\nAnalysis completed: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Create summary table
summary_data = [
    ['Overall Totals', 3, overall_comparison['Match'].sum(), 3 - overall_comparison['Match'].sum()],
    ['By Date', len(date_comparison), date_comparison['perfect_match'].sum(), 
     (~date_comparison['perfect_match']).sum()],
    ['By Campaign', len(campaign_comparison), campaign_comparison['perfect_match'].sum(), 
     (~campaign_comparison['perfect_match']).sum()]
]

summary_df = pd.DataFrame(summary_data, 
                         columns=['Segment Type', 'Total Segments', 'Matches', 'Exceeds Threshold'])
summary_df['Match %'] = (summary_df['Matches'] / summary_df['Total Segments'] * 100).round(2)

print("\n")
display(summary_df)

# Overall assessment
total_segments = summary_df['Total Segments'].sum()
total_matches = summary_df['Matches'].sum()
overall_match_pct = (total_matches / total_segments * 100)

print("\n" + "="*80)
print(f"OVERALL MATCH RATE (within {THRESHOLD_PERCENT}%): {total_matches}/{total_segments} ({overall_match_pct:.1f}%)")
print("="*80)

if overall_match_pct == 100:
    print(f"\n✓✓✓ PERFECT VALIDATION! All segments within {THRESHOLD_PERCENT}% threshold! ✓✓✓")
elif overall_match_pct >= 95:
    print(f"\n✓ EXCELLENT! {overall_match_pct:.1f}% of segments within {THRESHOLD_PERCENT}% threshold")
elif overall_match_pct >= 80:
    print(f"\n⚠ GOOD: {overall_match_pct:.1f}% within threshold. Some segments need review.")
else:
    print(f"\n⚠ ATTENTION: Only {overall_match_pct:.1f}% within {THRESHOLD_PERCENT}% threshold. Review required.")

print("\n" + "-"*80)
print("KEY INSIGHTS:")
print("-"*80)
print(f"• Threshold used: {THRESHOLD_PERCENT}%")
print(f"• Segments passing: {total_matches}/{total_segments}")
print(f"• Segments exceeding threshold: {total_segments - total_matches}")
print(f"• Target rows: {len(growth_df):,}")
print(f"• Gold rows: {len(gold_df):,}")

print("\n" + "="*80)
print("VALIDATION COMPLETE")
print("="*80)

MERGED ADS FINAL VALIDATION SUMMARY (with 2.0% threshold)

Analysis completed: 2025-12-20 18:30:26




Unnamed: 0,Segment Type,Total Segments,Matches,Exceeds Threshold,Match %
0,Overall Totals,3,0,3,0.0
1,By Date,29,24,5,82.76
2,By Campaign,7,2,5,28.57



OVERALL MATCH RATE (within 2.0%): 26/39 (66.7%)

⚠ ATTENTION: Only 66.7% within 2.0% threshold. Review required.

--------------------------------------------------------------------------------
KEY INSIGHTS:
--------------------------------------------------------------------------------
• Threshold used: 2.0%
• Segments passing: 26/39
• Segments exceeding threshold: 13
• Target rows: 163
• Gold rows: 974

VALIDATION COMPLETE


## Step 7: Create Visual Dashboard

Generate a comprehensive 6-panel visualization dashboard.

In [26]:
print("=" * 80)
print("CREATING INTERACTIVE VISUAL DASHBOARD")
print("=" * 80)

# -----------------------------
# Safety checks
# -----------------------------
date_total = len(date_comparison)
campaign_total = len(campaign_comparison)

date_pass = date_comparison['perfect_match'].sum() if date_total > 0 else 0
date_fail = date_total - date_pass

campaign_pass = campaign_comparison['perfect_match'].sum() if campaign_total > 0 else 0
campaign_fail = campaign_total - campaign_pass

# -----------------------------
# Create Subplots
# -----------------------------
fig = make_subplots(
    rows=3, cols=2,
    subplot_titles=(
        f'Overall Totals Comparison (Threshold: {THRESHOLD_PERCENT}%)',
        f'Date Validation Status ({date_pass}/{date_total} Pass)',
        f'Campaign Validation Status ({campaign_pass}/{campaign_total} Pass)',
        'Cost Difference Trend Over Time',
        'Impressions Difference Trend Over Time',
        'Summary Statistics'
    ),
    specs=[
        [{'type': 'bar'}, {'type': 'pie'}],
        [{'type': 'pie'}, {'type': 'scatter'}],
        [{'type': 'scatter'}, {'type': 'table'}]
    ],
    vertical_spacing=0.12,
    horizontal_spacing=0.10
)

# ============================================================================
# PANEL 1: OVERALL TOTALS COMPARISON (BAR)
# ============================================================================
metrics = overall_comparison['Metric'].tolist()
colors_target = ['#2ecc71' if m else '#e74c3c' for m in overall_comparison['Match']]

fig.add_trace(
    go.Bar(
        name='Target',
        x=metrics,
        y=overall_comparison['Target'],
        marker_color=colors_target,
        text=overall_comparison['Target'].apply(lambda x: f'{x:,.0f}'),
        textposition='auto',
        hovertemplate='<b>%{x}</b><br>Target: %{y:,.2f}<extra></extra>'
    ),
    row=1, col=1
)

fig.add_trace(
    go.Bar(
        name='Gold',
        x=metrics,
        y=overall_comparison['Gold'],
        marker_color='#3498db',
        text=overall_comparison['Gold'].apply(lambda x: f'{x:,.0f}'),
        textposition='auto',
        hovertemplate='<b>%{x}</b><br>Gold: %{y:,.2f}<extra></extra>'
    ),
    row=1, col=1
)

# ============================================================================
# PANEL 2: DATE VALIDATION STATUS (PIE)
# ============================================================================
fig.add_trace(
    go.Pie(
        labels=['Pass', 'Fail'],
        values=[date_pass, date_fail],
        marker=dict(colors=['#2ecc71', '#e74c3c']),
        pull=[0.05, 0],
        hovertemplate='<b>%{label}</b><br>Count: %{value}<br>%{percent}<extra></extra>'
    ),
    row=1, col=2
)

# ============================================================================
# PANEL 3: CAMPAIGN VALIDATION STATUS (PIE)
# ============================================================================
fig.add_trace(
    go.Pie(
        labels=['Pass', 'Fail'],
        values=[campaign_pass, campaign_fail],
        marker=dict(colors=['#2ecc71', '#e74c3c']),
        pull=[0.05, 0],
        hovertemplate='<b>%{label}</b><br>Count: %{value}<br>%{percent}<extra></extra>'
    ),
    row=2, col=1
)

# ============================================================================
# PANEL 4: COST DIFFERENCE TREND (LINE)
# ============================================================================
date_sorted = date_comparison.sort_values('Day')
x_dates = date_sorted['Day']

fig.add_trace(
    go.Scatter(
        x=x_dates,
        y=date_sorted['cost_diff_pct'],
        mode='lines+markers',
        name='Cost Diff %',
        line=dict(color='#3498db', width=3),
        marker=dict(size=8),
        hovertemplate='Date: %{x}<br>Diff: %{y:.2f}%<extra></extra>'
    ),
    row=2, col=2
)

# Threshold lines (SAFE)
for y_val in [THRESHOLD_PERCENT, -THRESHOLD_PERCENT, 0]:
    fig.add_trace(
        go.Scatter(
            x=x_dates,
            y=[y_val] * len(x_dates),
            mode='lines',
            line=dict(
                dash='dash' if y_val != 0 else 'solid',
                color='#2ecc71' if y_val != 0 else 'gray',
                width=2 if y_val != 0 else 1
            ),
            showlegend=False
        ),
        row=2, col=2
    )

# Threshold band
fig.add_trace(
    go.Scatter(
        x=list(x_dates) + list(x_dates[::-1]),
        y=[THRESHOLD_PERCENT] * len(x_dates) + [-THRESHOLD_PERCENT] * len(x_dates),
        fill='toself',
        fillcolor='rgba(46, 204, 113, 0.15)',
        line=dict(width=0),
        hoverinfo='skip',
        showlegend=False
    ),
    row=2, col=2
)

# ============================================================================
# PANEL 5: IMPRESSIONS DIFFERENCE TREND (LINE)
# ============================================================================
fig.add_trace(
    go.Scatter(
        x=x_dates,
        y=date_sorted['impr_diff_pct'],
        mode='lines+markers',
        name='Impressions Diff %',
        line=dict(color='#e67e22', width=3),
        marker=dict(size=8, symbol='square'),
        hovertemplate='Date: %{x}<br>Diff: %{y:.2f}%<extra></extra>'
    ),
    row=3, col=1
)

# Threshold lines (SAFE)
for y_val in [THRESHOLD_PERCENT, -THRESHOLD_PERCENT, 0]:
    fig.add_trace(
        go.Scatter(
            x=x_dates,
            y=[y_val] * len(x_dates),
            mode='lines',
            line=dict(
                dash='dash' if y_val != 0 else 'solid',
                color='#2ecc71' if y_val != 0 else 'gray',
                width=2 if y_val != 0 else 1
            ),
            showlegend=False
        ),
        row=3, col=1
    )

# ============================================================================
# PANEL 6: SUMMARY TABLE
# ============================================================================
summary_text = f"""
VALIDATION SUMMARY
{'='*40}

Threshold: {THRESHOLD_PERCENT}%

Overall Totals:
  Matches: {overall_comparison['Match'].sum()}/3

By Date:
  Total: {date_total}
  Matches: {date_pass}

By Campaign:
  Total: {campaign_total}
  Matches: {campaign_pass}

Overall Match Rate:
  {overall_match_pct:.1f}%
"""

fig.add_trace(
    go.Table(
        header=dict(
            values=['<b>Validation Summary</b>'],
            fill_color='#34495e',
            font=dict(color='white', size=16),
            align='center'
        ),
        cells=dict(
            values=[summary_text.split('\n')],
            fill_color='#ecf0f1',
            font=dict(family='Courier New', size=13),
            align='left'
        )
    ),
    row=3, col=2
)

# ============================================================================
# LAYOUT
# ============================================================================
fig.update_layout(
    title=dict(
        text='<b>Merged Ads Validation Dashboard - Interactive</b>',
        x=0.5,
        font=dict(size=24)
    ),
    height=1400,
    hovermode='closest',
    plot_bgcolor='white',
    paper_bgcolor='#f8f9fa',
    font=dict(family='Arial', size=12),
    showlegend=True
)

fig.update_xaxes(showgrid=True, gridcolor='#ecf0f1')
fig.update_yaxes(showgrid=True, gridcolor='#ecf0f1')

# Save & Show
fig.write_html("validation_dashboard_interactive.html")
print("\n✓ Interactive dashboard saved as 'validation_dashboard_interactive.html'")

fig.show()

print("\n" + "=" * 80)
print("INTERACTIVE DASHBOARD CREATION COMPLETE")
print("=" * 80)

CREATING INTERACTIVE VISUAL DASHBOARD

✓ Interactive dashboard saved as 'validation_dashboard_interactive.html'



INTERACTIVE DASHBOARD CREATION COMPLETE


## Step 8: Export HTML Report

Generate a professional HTML report with embedded visualizations.

In [27]:
# ============================================================================
# STEP 8: EXPORT ENHANCED INTERACTIVE HTML REPORT
# ============================================================================

print("="*80)
print("EXPORTING ENHANCED HTML REPORT")
print("="*80)

from datetime import datetime
import plotly.io as pio
import webbrowser
import os

# Convert Plotly figure to HTML div
dashboard_html = pio.to_html(fig, include_plotlyjs='cdn', div_id='dashboard')

# Create enhanced HTML report
html_content = f'''
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>Merged Ads Validation Report</title>
<style>
body {{
    font-family: Segoe UI, Arial, sans-serif;
    background: #f4f6fb;
    margin: 0;
}}
.container {{
    max-width: 1600px;
    margin: auto;
    background: white;
    border-radius: 15px;
    box-shadow: 0 20px 50px rgba(0,0,0,.2);
    overflow: hidden;
}}
.header {{
    background: linear-gradient(135deg,#667eea,#764ba2);
    color: white;
    padding: 40px;
    text-align: center;
}}
.metrics-grid {{
    display: grid;
    grid-template-columns: repeat(auto-fit,minmax(250px,1fr));
    gap: 20px;
    padding: 30px;
}}
.metric-card {{
    background: white;
    padding: 25px;
    border-radius: 12px;
    text-align: center;
    box-shadow: 0 5px 15px rgba(0,0,0,.1);
}}
.metric-card h3 {{
    color:#667eea;
    font-size:14px;
    text-transform:uppercase;
}}
.metric-card .value {{
    font-size:36px;
    font-weight:bold;
}}
.section {{
    padding: 30px;
}}
table {{
    width: 100%;
    border-collapse: collapse;
}}
th {{
    background:#667eea;
    color:white;
    padding:12px;
    text-align:left;
}}
td {{
    padding:10px;
    border-bottom:1px solid #eee;
}}
.pass {{ color:#27ae60; font-weight:bold; }}
.fail {{ color:#e74c3c; font-weight:bold; }}
.footer {{
    background:#2c3e50;
    color:white;
    text-align:center;
    padding:20px;
}}
</style>
</head>
<body>

<div class="container">
<div class="header">
    <h1>📊 Merged Ads Validation Report</h1>
    <p>Generated: {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}</p>
    <p>Threshold: ±{THRESHOLD_PERCENT}%</p>
</div>

<div class="metrics-grid">
    <div class="metric-card">
        <h3>Overall Match Rate</h3>
        <div class="value">{overall_match_pct:.1f}%</div>
    </div>
    <div class="metric-card">
        <h3>Dates Matched</h3>
        <div class="value">{date_pass}/{len(date_comparison)}</div>
    </div>
    <div class="metric-card">
        <h3>Campaigns Matched</h3>
        <div class="value">{campaign_pass}/{len(campaign_comparison)}</div>
    </div>
    <div class="metric-card">
        <h3>Threshold</h3>
        <div class="value">±{THRESHOLD_PERCENT}%</div>
    </div>
</div>

<div class="section">
<h2>📈 Interactive Dashboard</h2>
{dashboard_html}
</div>

<div class="section">
<h2>📊 Overall Totals Comparison</h2>
<table>
<tr>
<th>Metric</th><th>Target(growth team data)</th><th>Gold(fabric gold)</th>
<th>Difference</th><th>Diff %</th><th>Status</th>
</tr>
'''

# ---------------- OVERALL COMPARISON ----------------
for _, row in overall_comparison.iterrows():
    status_class = 'pass' if row['Match'] else 'fail'
    html_content += f'''
    <tr>
        <td>{row['Metric']}</td>
        <td>{row['Target']:,.2f}</td>
        <td>{row['Gold']:,.2f}</td>
        <td>{row['Difference']:,.2f}</td>
        <td>{row['Diff %']:.2f}%</td>
        <td class="{status_class}">{row['Status']}</td>
    </tr>
    '''

html_content += '''
</table>
</div>

<div class="section">
<h2>📅 Validation by Date</h2>
<table>
<tr>
<th>Date</th>
<th>Cost Target(growth team data)</th><th>Cost Gold(fabric gold)</th><th>Cost Diff %</th>
<th>Impr Target(growth team data)</th><th>Impr Gold(fabric gold)</th><th>Impr Diff %</th>
<th>Clicks Target(growth team data)</th><th>Clicks Gold(fabric gold)</th><th>Clicks Diff %</th>
<th>Status</th>
</tr>
'''

# ---------------- DATE COMPARISON (FIXED COLUMN NAMES) ----------------
for _, row in date_comparison.sort_values('Day').iterrows():
    status_class = 'pass' if row['perfect_match'] else 'fail'
    html_content += f'''
    <tr>
        <td>{row['Day']}</td>
        <td>{row['cost_target']:,.2f}</td>
        <td>{row['cost_gold']:,.2f}</td>
        <td>{row['cost_diff_pct']:.2f}%</td>
        <td>{row['impressions_target']:,.0f}</td>
        <td>{row['impressions_gold']:,.0f}</td>
        <td>{row['impr_diff_pct']:.2f}%</td>
        <td>{row['clicks_target']:,.0f}</td>
        <td>{row['clicks_gold']:,.0f}</td>
        <td>{row['clicks_diff_pct']:.2f}%</td>
        <td class="{status_class}">{row['status']}</td>
    </tr>
    '''

html_content += '''
</table>
</div>

<div class="section">
<h2>🎯 Validation by Campaign</h2>
<table>
<tr>
<th>Campaign</th><th>Source</th>
<th>Cost Target(growth team data)</th><th>Cost Gold(fabric gold)</th><th>Cost Diff %</th>
<th>Impr Target(growth team data)</th><th>Impr Gold(fabric gold)</th><th>Impr Diff %</th>
<th>Clicks Target(growth team data)</th><th>Clicks Gold(fabric gold)</th><th>Clicks Diff %</th>
<th>Status</th>
</tr>
'''

# ---------------- CAMPAIGN COMPARISON ----------------
for _, row in campaign_comparison.sort_values('Campaign name').iterrows():
    status_class = 'pass' if row['perfect_match'] else 'fail'
    html_content += f'''
    <tr>
        <td>{row['Campaign name']}</td>
        <td>{row['source']}</td>
        <td>{row['cost_target']:,.2f}</td>
        <td>{row['cost_gold']:,.2f}</td>
        <td>{row['cost_diff_pct']:.2f}%</td>
        <td>{row['impressions_target']:,.0f}</td>
        <td>{row['impressions_gold']:,.0f}</td>
        <td>{row['impr_diff_pct']:.2f}%</td>
        <td>{row['clicks_target']:,.0f}</td>
        <td>{row['clicks_gold']:,.0f}</td>
        <td>{row['clicks_diff_pct']:.2f}%</td>
        <td class="{status_class}">{row['status']}</td>
    </tr>
    '''

html_content += f'''
</table>
</div>

<div class="footer">
<p><strong>Merged Ads Validation Report</strong></p>
<p>Threshold: ±{THRESHOLD_PERCENT}% | Overall Match: {overall_match_pct:.1f}%</p>
</div>
</div>

</body>
</html>
'''

# Save HTML
with open("validation_report_interactive.html", "w", encoding="utf-8") as f:
    f.write(html_content)

print("✓ Enhanced HTML report saved: validation_report_interactive.html")

# Open automatically
html_path = os.path.abspath("validation_report_interactive.html")
webbrowser.open("file://" + html_path)

print("="*80)
print("HTML EXPORT COMPLETE")
print("="*80)


EXPORTING ENHANCED HTML REPORT
✓ Enhanced HTML report saved: validation_report_interactive.html
HTML EXPORT COMPLETE
