In [59]:
import glob
import json

# Read all JSON files matching the pattern
file_paths = glob.glob('../output/profiles/parsed*.json')

# Load JSON data from each file
max_date = None
profiles = []
for file_path in file_paths:
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
        # Extract the date from JSON object (date is "2025-04-30",)
        date_str = data.get('date', None)
        if date_str:
            date_parts = date_str.split('-')
            if len(date_parts) == 3:
                year, month, day = map(int, date_parts)
                # Create a tuple for comparison
                date_tuple = (year, month, day)
                # Update max_date if this date is greater
                if max_date is None or date_tuple > max_date:
                    max_date = date_tuple

        profiles.append(data)

In [60]:
# Filter out profiles with no ads (ad_status.running == None or ad_status.running == False)
top_profiles = [profile for profile in profiles if profile.get('ad_status', {}).get('running')]

# Load data from ../output/augmented_accounts.json
with open('../output/augmented_accounts.json', 'r', encoding='utf-8') as f:
    accounts_data = json.load(f)

accounts = accounts_data.get('accounts', [])

# each profile in top_profiles should have a corresponding account in accounts_data
# use top_profiles.url = accounts_data.profile_url to find the corresponding account

# Prepare the markdown table header
markdown_table =  "| Current Account Name | Creation Date | Follower Count |\n"
markdown_table += "|----------------------|--------------:|---------------:|\n"

cells = []

# Populate the markdown table with profile data
for profile in top_profiles:
    # Find the corresponding account in accounts_data
    account = next((acc for acc in accounts if acc['profile_url'] == profile['url']), None)
    if account:
        # Extract relevant data from the account and profile
        account_name = account.get('name', 'N/A')
        account_name_with_url = f"[{account_name}]({account['profile_url']})" if account_name else "N/A"
        creation_date = account['creation_date'][:10] if account['creation_date'] else "N/A"
        follower_count = account['stats'].get('followers', 0)

        cells.append({
            'account_name_with_url': account_name_with_url,
            'creation_date': creation_date,
            'follower_count': follower_count
        })

# Sort cells and limit to top 15 by follower count
cells.sort(key=lambda x: x['follower_count'], reverse=True)
top_cells = cells[:15]

# Populate the markdown table with the top 15 accounts
for cell in top_cells:
    account_name = cell['account_name_with_url']
    creation_date = cell['creation_date']
    follower_count = cell['follower_count']
    
    markdown_table += f"| {account_name} | {creation_date} | {follower_count:,} |\n"

# Save the markdown table to a file
with open('../reports/accounts-with-ads.md', 'w', encoding='utf-8') as md_file:
    # Write the header and the table to the markdown file
    md_file.write("# Accounts Running Ads\n")
    md_file.write(f"**Acquisition date:** {max_date[0]}-{max_date[1]:02d}-{max_date[2]:02d}\n\n")
    
    md_file.write("## Top 15 accounts running ads\n")
    md_file.write("This table shows the top 15 accounts that are running ads, sorted by follower count.\n\n")
    md_file.write(markdown_table)

    md_file.write("## Summary\n")
    md_file.write(f"**Total Accounts Processed:** {len(profiles)}\n\n")