In [11]:
import glob
import json

# Read all JSON files matching the pattern
file_paths = glob.glob('../output/profiles/parsed*.json')

# Load JSON data from each file
max_date = None
profiles = []
for file_path in file_paths:
    with open(file_path, 'r', encoding='utf-8') as f:
        data = json.load(f)
        # Extract the date from JSON object (date is "2025-04-30",)
        date_str = data.get('date', None)
        if date_str:
            date_parts = date_str.split('-')
            if len(date_parts) == 3:
                year, month, day = map(int, date_parts)
                # Create a tuple for comparison
                date_tuple = (year, month, day)
                # Update max_date if this date is greater
                if max_date is None or date_tuple > max_date:
                    max_date = date_tuple

        profiles.append(data)

In [None]:
# Sort profiles by event count in descending order and take the top 15
top_profiles = sorted(profiles, key=lambda x: len(x.get('history', [])), reverse=True)[:15]

# Load data from ../output/augmented_accounts.json
with open('../output/augmented_accounts.json', 'r', encoding='utf-8') as f:
    accounts_data = json.load(f)

accounts = accounts_data.get('accounts', [])

# each profile in top_profiles should have a corresponding account in accounts_data
# use top_profiles.url = accounts_data.profile_url to find the corresponding account

# Prepare the markdown table header
markdown_table =  "| Current Account Name | Creation Date | Name Change Count | Merge Count | Event Count |\n"
markdown_table += "|----------------------|--------------:|------------------:|------------:|------------:|\n"

# Populate the markdown table with profile data
for profile in top_profiles:
    # Find the corresponding account in accounts_data
    account = next((acc for acc in accounts if acc['profile_url'] == profile['url']), None)
    if account:
        # Extract relevant data from the account and profile
        account_name = account.get('name', 'N/A')
        account_name_with_url = f"[{account_name}]({account['profile_url']})" if account_name else "N/A"
        creation_date = account['creation_date'][:10] if account['creation_date'] else "N/A"
        history = profile.get('history', [])
        name_change_count = sum(1 for event in history if event['event_type'] == 'NameChange')
        merge_count = sum(1 for event in history if event['event_type'] == 'Merge')
        event_count = len(history)

        # Format the row for the markdown table
        markdown_table += f"| {account_name_with_url} | {creation_date} | {name_change_count} | {merge_count} | {event_count} |\n"


# Save the markdown table to a file
with open('../reports/accounts-top-15-events.md', 'w', encoding='utf-8') as md_file:
    # Write the header and the table to the markdown file
    md_file.write("# Most Undecided Accounts\n")
    md_file.write(f"**Acquisition date:** {max_date[0]}-{max_date[1]:02d}-{max_date[2]:02d}\n\n")
    
    md_file.write("## Top 15 accounts by event count\n")
    md_file.write("This table shows the top 15 accounts with the most events in their history.\n\n")
    md_file.write(markdown_table)

    md_file.write("## Summary\n")
    md_file.write(f"**Total Accounts Processed:** {len(profiles)}\n\n")