In [None]:
import pandas as pd
import json
import os

# Load JSON data
with open("../output/accounts_master.json", "r", encoding="utf-8") as f:
    data = json.load(f)

accounts = data["accounts"]
df = pd.json_normalize(accounts)

# Prepare fields
df["followers"] = pd.to_numeric(df["stats.followers"], errors="coerce")
df["follower_growth_rate"] = pd.to_numeric(df["stats.follower_growth_rate"], errors="coerce")
df["ad_status.has_run"] = df["ad_status.has_run"].fillna(False)
df["days_since_creation"] = pd.to_numeric(df["days_since_creation"], errors="coerce")

# Define suspicion score
def compute_suspicion_score(row):
    score = 0
    if row["followers"] >= 100_000:
        score += 1
    if row["days_since_creation"] <= 180:
        score += 1
    if not row["ad_status.has_run"]:
        score += 1
    return score

# Filter for high engagement
high_engagement = df[df["followers"] > 50000].copy()
high_engagement["suspicion_score"] = high_engagement.apply(compute_suspicion_score, axis=1)

# Build clickable name column
high_engagement["name"] = high_engagement.apply(
    lambda row: f"[{row['name']}]({row['profile_url']})", axis=1
)

# Select fields for the report
output_df = high_engagement[[
    "name", "followers", "days_since_creation", "follower_growth_rate", "ad_status.has_run", "suspicion_score"
]]

# Sort by suspicion score, then by recency and followers
output_df = output_df.sort_values(by=["suspicion_score", "days_since_creation", "followers"], ascending=[False, True, False])

# Output directory
os.makedirs("../reports", exist_ok=True)

# Write markdown report
with open("../reports/high_engagement_no_ads.md", "w", encoding="utf-8") as report:
    report.write("# High Engagement Pages With Suspicion Score\n\n")
    report.write(
        "This report highlights high-follower pages (over 50,000) that may require further analysis.\n\n"
        "**Suspicion Score Criteria:**\n"
        "\n"
        "- +1 if followers > 100,000\n"
        "- +1 if created within the last 180 days\n"
        "- +1 if they have never run ads\n\n"
    )
    report.write(output_df.to_markdown(index=False))


  df["ad_status.has_run"] = df["ad_status.has_run"].fillna(False)
