# Notebook 03 — Performance Issue Identification
#### This notebook detects key performance issues in the Facebook Ads dataset using derived metrics and business rules.

In [1]:
import pandas as pd
import numpy as np
import os
import sys

# Add project root
project_root = os.path.abspath("..")
sys.path.append(project_root)

from src.analysis_utils import (
    detect_high_cpc,
    detect_low_roas,
    summarize_metrics
)

In [2]:
input_path = "../outputs/intermediate_files/cleaned_data.csv"

df = pd.read_csv(input_path)
df.head()

Unnamed: 0,campaign_name,adset_name,date,spend,impressions,clicks,ctr,purchases,revenue,roas,creative_type,creative_message,audience_type,platform,country,cpc,cpm,revenue_per_purchase
0,Men ComfortMax Launch,Adset-1 Retarget,2025-01-01,640.09,235597,4313.0,0.0183,80,1514.28,2.37,Image,Breathable organic cotton that moves with you ...,Broad,Facebook,US,0.148409,2.716885,18.9285
1,Men ComfortMax Launch,Adset-1 Retarget,2025-01-02,373.75,276194,5429.0,0.0197,94,4152.81,11.11,Video,No ride‑up guarantee — best‑selling men briefs...,Broad,Facebook,US,0.068843,1.353215,44.17883
2,Men ComfortMax Launch,Adset-1 Retarget,2025-01-03,703.79,466572,9830.0,0.0211,240,4893.43,6.95,UGC,Cooling mesh panels for workouts — men boxers ...,Broad,Facebook,US,0.071596,1.508427,20.389292
3,Men_ComfortMax_Launch,Adset-1 Retarget,2025-01-04,441.5,193230,2424.0,0.0125,61,1330.1,3.01,Image,Cooling mesh panels for workouts — men athleti...,Lookalike,Facebook,US,0.182137,2.284842,21.804918
4,Men Comfortmax Launch,Adset-1 Retarget,2025-01-06,579.03,180096,2356.0,0.0131,41,1545.67,2.67,Image,Breathable bamboo that moves with you — limite...,Lookalike,Instagram,UK,0.245768,3.215119,37.699268


In [3]:
summary = summarize_metrics(df)
summary

{'avg_cpc': np.float64(0.17670642254889576),
 'avg_cpm': np.float64(1.9906662925160878),
 'avg_ctr': np.float64(0.012594870978018476),
 'avg_roas': np.float64(6.57754380375916)}

In [4]:
high_cpc_df = detect_high_cpc(df, threshold=df["cpc"].mean() * 1.5)
len(high_cpc_df)

624

In [5]:
low_roas_df = detect_low_roas(df, threshold=1)
len(low_roas_df)

192

In [6]:
low_ctr_df = df[df["ctr"] < 0.01]
len(low_ctr_df)

1014

In [7]:
high_cpm_df = df[df["cpm"] > df["cpm"].mean() * 1.5]
len(high_cpm_df)

599

In [8]:
low_purchase_df = df[df["purchases"] < df["purchases"].median()]
len(low_purchase_df)

1562

In [9]:
df.groupby("platform")["roas"].mean()

platform
Facebook     6.697510
Instagram    6.459323
Name: roas, dtype: float64

In [10]:
df.groupby("country")["roas"].mean()

country
IN    6.175955
UK    6.843732
US    6.668622
Name: roas, dtype: float64

In [11]:
issues = {
    "high_cpc_campaigns": len(high_cpc_df),
    "low_roas_campaigns": len(low_roas_df),
    "low_ctr_creatives": len(low_ctr_df),
    "high_cpm_campaigns": len(high_cpm_df),
    "low_purchase_campaigns": len(low_purchase_df),
    "platform_roas": df.groupby("platform")["roas"].mean().to_dict(),
    "country_roas": df.groupby("country")["roas"].mean().to_dict()
}

issues

{'high_cpc_campaigns': 624,
 'low_roas_campaigns': 192,
 'low_ctr_creatives': 1014,
 'high_cpm_campaigns': 599,
 'low_purchase_campaigns': 1562,
 'platform_roas': {'Facebook': 6.697509627727856,
  'Instagram': 6.459323213156231},
 'country_roas': {'IN': 6.175955334987592,
  'UK': 6.8437322834645675,
  'US': 6.668621908127208}}

In [12]:
import json

output_path = "../reports/structured_outputs/issues_report.json"
with open(output_path, "w") as f:
    json.dump(issues, f, indent=4)

print("Issue report saved to:", output_path)

Issue report saved to: ../reports/structured_outputs/issues_report.json
