# User vs. Organization Comparison

This notebook compares sponsorship outcomes between Users and Organizations.

It summarizes:
- Distribution of `total_sponsors` and `estimated_earnings` by account type
- Mean `followers` and `public_repos` by type

In [None]:
import pandas as pd
from data_loader import load_sample_data

In [None]:
def summarize_by_type(df: pd.DataFrame, min_accounts: int = 5) -> pd.DataFrame:
    df = df.copy()
    if "type" not in df.columns:
        df["type"] = "Unknown"

    df["type"] = df["type"].fillna("Unknown")

    summary = (
        df.groupby("type")
        .agg(
            n_accounts=("username", "count"),
            mean_sponsors=("total_sponsors", "mean"),
            median_sponsors=("total_sponsors", "median"),
            mean_earnings=("estimated_earnings", "mean"),
            median_earnings=("estimated_earnings", "median"),
            mean_followers=("followers", "mean"),
            mean_public_repos=("public_repos", "mean"),
        )
        .reset_index()
    )

    summary = summary[summary["n_accounts"] >= min_accounts]
    return summary.sort_values("mean_sponsors", ascending=False)

## Load Data and Compute Type Summary

In [None]:
df = load_sample_data()
summary = summarize_by_type(df, min_accounts=5)

## Display Results

In [None]:
if summary.empty:
    print("No account types with sufficient number of accounts for analysis.")
else:
    pd.set_option("display.max_columns", 20)
    print("\nComparison of sponsorship outcomes by account type:\n")
    summary[
        [
            "type",
            "n_accounts",
            "mean_sponsors",
            "median_sponsors",
            "mean_earnings",
            "median_earnings",
            "mean_followers",
            "mean_public_repos",
        ]
    ]