## Analyze codonors

In [1]:
import datetime
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib import ticker
%matplotlib inline

Time the notebook

In [2]:
start = datetime.datetime.now()

Set some viewing options

In [3]:
pd.set_option('display.max_colwidth', 200)
pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', 100)

Load candidates file

In [4]:
candidates = pd.read_csv(
    "../data/candidates.csv"
)

Read dataframe of all actblue contributions

In [5]:
actblue = pd.read_csv(
    "../output/clean_actblue.csv",
    dtype = {
        "filing_id": "object",
        "contributor_organization_name": "object"
    },
    parse_dates = ['contribution_date']
)

actblue.head(3)

Unnamed: 0,candidate_name,committee_name,Candidate ID,committee_id,entity_type,filer_committee_id_number,transaction_id,contribution_date,contribution_amount,contribution_aggregate,contributor_organization_name,contributor_first_name,contributor_last_name,contributor_street_1,contributor_street_2,contributor_city,contributor_zip_code,contributor_state,contributor_employer,contributor_occupation,contribution_purpose_descrip,memo_code,memo_text_description,filing_id,donor_id,actblue_committee_name,latest_contribution_aggregate
0,Cory Booker,Cory 2020,P00009795,C00695510,IND,C00401224,SA11AI_157915681,2019-05-16,5.0,5.0,,DAVID,A. HEIFETZ,214 RHODE ISLAND AVE NW,,WASHINGTON,20001,DC,NEW POLITICS,CHIEF COMMUNICATIONS OFFICER,Earmark,,Earmarked for CORY 2020 (C00695510),1344765,DAVID|A HEIFETZ|20001,CORY 2020,5.0
1,Cory Booker,Cory 2020,P00009795,C00695510,IND,C00401224,SA11AI_151300127,2019-03-22,20.0,20.0,,CAROL,A.FOSTER,93 HORNE WAY,,MILLBURY,1527,MA,NOT EMPLOYED,NOT EMPLOYED,Earmark,,Earmarked for CORY 2020 (C00695510),1344765,CAROL|A FOSTER|01527,CORY 2020,20.0
2,Cory Booker,Cory 2020,P00009795,C00695510,IND,C00401224,SA11AI_160483217,2019-06-07,5.0,5.0,,REET,AADOSON-PALLADINO,587 WATERCOLOR LN,,WEST SACRAMENTO,95605,CA,NOT EMPLOYED,NOT EMPLOYED,Earmark,,Earmarked for CORY 2020 (C00695510),1344765,REET|AADOSON-PALLADINO|95605,CORY 2020,5.0


Load DataFrame of latest contributions

In [6]:
latest_contribs = (
    pd
    .read_csv(
        "../output/latest_contribs.csv",
        dtype = {
            "filing_id": "str"
        }
    )
)

latest_contribs.head(3)

Unnamed: 0,donor_id,candidate_name,latest_contribution_aggregate
0,!HERBERT|FREEMAN|08831,Beto O'Rourke,7.0
1,!HERBERT|FREEMAN|08831,Joe Biden,122.0
2,"""FITZ"" JOHN|FITZGERALD|15201",Jay Inslee,10.0


Convenience functions for readable output

In [7]:
def dollar(num):
    return '${:,.2f}'.format(num)

def comma(num):
    return '{:,}'.format(num)

## Topline stats

In [8]:
avg_donation = (
    actblue
    .groupby(['donor_id', 'candidate_name'])
    ['latest_contribution_aggregate']
    .max()
    .mean()
)

avg_donation

f"The average aggregate donation is ${round(avg_donation, 2)}."

'The average aggregate donation is $52.68.'

In [9]:
donors_per_candidate = (
    actblue
    ['candidate_name']
    .value_counts()
    .to_frame("Donations")
)

donors_per_candidate.head()

Unnamed: 0,Donations
Bernie Sanders,1820719
Elizabeth Warren,812628
Pete Buttigieg,624204
Kamala Harris,504750
Joe Biden,422720


## Codonors

Find donors who gave to two or more candidates

In [10]:
candidate_pairs = (
    latest_contribs
    .rename(columns = {
        "candidate_name": "candidate"
    })
    [[
        "donor_id",
        "candidate"
    ]]
    # merge with itself
    .pipe(lambda df: (
        df
        .merge(
            df,
            how = "left",
            on = "donor_id",
            suffixes = [ "_x", "_y" ],
        )
    ))
    # This filter prevents us from double-counting candidate-combinations
    .loc[lambda df: df["candidate_x"] < df["candidate_y"]]
    .sort_values([
        "candidate_x",
        "candidate_y",
        "donor_id"
    ])
)

candidate_pairs.head(5)

Unnamed: 0,donor_id,candidate_x,candidate_y
1397,A ROBERT|BAKER|03590,Amy Klobuchar,Andrew Yang
2880,AARON|BOTWICK|11225,Amy Klobuchar,Andrew Yang
4014,AARON|CONRAD|72015,Amy Klobuchar,Andrew Yang
4720,AARON|DIXON|78756,Amy Klobuchar,Andrew Yang
15836,ABBY|ROZA|55418,Amy Klobuchar,Andrew Yang


In [11]:
num_unique = actblue['donor_id'].nunique()

sharing = (candidate_pairs['donor_id'].nunique() / num_unique ) * 100


f"Out of {comma(num_unique)} unique ActBlue donors, {round(sharing)}% of them gave to more than one candidate."

'Out of 2,410,438 unique ActBlue donors, 19% of them gave to more than one candidate.'

Count the distinct donors per candidate-pairs

In [12]:
pair_counts = (
    candidate_pairs
    .groupby([
        "candidate_x",
        "candidate_y",
    ])
    .size()
    .to_frame("count")
    .sort_values("count", ascending = False)
    .reset_index()
    .sort_values("count", ascending = False)
)

# write out
pair_counts.to_csv(
    "../output/candidate-pair-counts.csv",
    index = False
)

count_pairs = candidate_pairs['donor_id'].nunique()

print(f"{comma(count_pairs)} unique donors gave to more than one candidate.")

pair_counts.head()

455,267 unique donors gave to more than one candidate.


Unnamed: 0,candidate_x,candidate_y,count
0,Elizabeth Warren,Kamala Harris,61792
1,Bernie Sanders,Elizabeth Warren,60749
2,Elizabeth Warren,Pete Buttigieg,53802
3,Kamala Harris,Pete Buttigieg,45085
4,Bernie Sanders,Tulsi Gabbard,31068


Find donors who gave to three or more candidates

In [13]:
candidate_triplets = (
    latest_contribs
    .rename(columns = {
        "candidate_name": "candidate"
    })
    [[
        "donor_id",
        "candidate"
    ]]
    .pipe(lambda df: (
        df
        .merge(
            df,
            how = "left",
            on = "donor_id",
            suffixes = [ "_x", "_y" ],
        )
        .merge(
            df.rename(columns = { "candidate": "candidate_z" }),
            how = "left",
            on = "donor_id",
        )
    ))
    # This filter prevents us from double-counting candidate-combinations
    .loc[lambda df: df["candidate_x"] < df["candidate_y"]]
    .loc[lambda df: df["candidate_y"] < df["candidate_z"]]
    .sort_values([
        "candidate_x",
        "candidate_y",
        "candidate_z",
        "donor_id"
    ])
)

candidate_triplets.head(5)

Unnamed: 0,donor_id,candidate_x,candidate_y,candidate_z
127091,ADRIENNE|LEVINSON|22202,Amy Klobuchar,Andrew Yang,Bernie Sanders
145113,AHNI|SALLAWAY|21409,Amy Klobuchar,Andrew Yang,Bernie Sanders
157704,AIMIN|WALSH|49085,Amy Klobuchar,Andrew Yang,Bernie Sanders
184806,ALAN|BELLOMO|15931,Amy Klobuchar,Andrew Yang,Bernie Sanders
190871,ALAN|BRITTENHAM|98058,Amy Klobuchar,Andrew Yang,Bernie Sanders


In [14]:
count_triplets = candidate_triplets['donor_id'].nunique()

print(f"{comma(count_triplets)} unique donors gave to three or more candidates.")

154,122 unique donors gave to three or more candidates.


Count the distinct donors per candidate-triples

In [15]:
triplet_counts = (
    candidate_triplets
    .groupby([
        "candidate_x",
        "candidate_y",
        "candidate_z",
    ])
    .size()
    .sort_values(ascending = False)
    .to_frame("count")
    .reset_index()
)
# write out
triplet_counts.to_csv(
    "../output/candidate-triplet-counts.csv",
    index = False
)

triplet_counts.head()

Unnamed: 0,candidate_x,candidate_y,candidate_z,count
0,Elizabeth Warren,Kamala Harris,Pete Buttigieg,18675
1,Elizabeth Warren,Julián Castro,Kamala Harris,12458
2,Cory Booker,Elizabeth Warren,Kamala Harris,10823
3,Elizabeth Warren,Julián Castro,Pete Buttigieg,9469
4,Julián Castro,Kamala Harris,Pete Buttigieg,9287


## How often do candidates share?

In [16]:
def sharing(candidate):
    # donors who give to this candidate
    candidate_donors = (
        actblue
        .loc[ lambda x: x["candidate_name"] == candidate]
        .groupby('donor_id')
        .first()
        .reset_index()
    )

    # count candidate unique donors
    num_unique_donors = len(candidate_donors)

    print(f"There are {comma(num_unique_donors)} unique ActBlue donors to {candidate}'s campaign.")

    # total unique donors
    actblue_total = actblue['donor_id'].nunique()

    # get percent
    percent_actblue = (num_unique_donors / actblue_total) * 100

    print(f"That's {round(percent_actblue)}% of the entire {comma(actblue_total)} ActBlue donor pool.")

    # number of candidate donors that show up in the codonor list
    candidate_sharing = candidate_donors.loc[
        lambda x: x['donor_id'].isin(candidate_pairs['donor_id'])
    ].pipe(len)

    percent_share = round(candidate_sharing / len(candidate_pairs) * 100)

    print(f"{percent_share}% of ActBlue donors who gave to {candidate} also gave to another candidate.")


In [17]:
sharing("Elizabeth Warren")

There are 413,479 unique ActBlue donors to Elizabeth Warren's campaign.
That's 17% of the entire 2,410,438 ActBlue donor pool.
15% of ActBlue donors who gave to Elizabeth Warren also gave to another candidate.


In [18]:
sharing("Kamala Harris")

There are 271,506 unique ActBlue donors to Kamala Harris's campaign.
That's 11% of the entire 2,410,438 ActBlue donor pool.
11% of ActBlue donors who gave to Kamala Harris also gave to another candidate.


In [19]:
sharing("Pete Buttigieg")

There are 387,081 unique ActBlue donors to Pete Buttigieg's campaign.
That's 16% of the entire 2,410,438 ActBlue donor pool.
11% of ActBlue donors who gave to Pete Buttigieg also gave to another candidate.


In [20]:
sharing("Julián Castro")

There are 108,752 unique ActBlue donors to Julián Castro's campaign.
That's 5% of the entire 2,410,438 ActBlue donor pool.
5% of ActBlue donors who gave to Julián Castro also gave to another candidate.


## Who owns the codonor pool?

In [21]:
codonor_percents = (
    candidate_pairs
    .melt(
        id_vars = [ "donor_id" ],
        value_name = "candidate",
    )
    .groupby("candidate")
    ["donor_id"]
    .nunique()
    .to_frame("codonors")
    .assign(
        pct = lambda df: (100 * df["codonors"] / candidate_pairs["donor_id"].nunique()).round(1)
    )
    .sort_values("codonors", ascending = False)
)

codonor_percents.head(10)

Unnamed: 0_level_0,codonors,pct
candidate,Unnamed: 1_level_1,Unnamed: 2_level_1
Elizabeth Warren,188935,41.5
Bernie Sanders,154564,34.0
Pete Buttigieg,141367,31.1
Kamala Harris,135772,29.8
Joe Biden,71970,15.8
Julián Castro,67198,14.8
Beto O'Rourke,64472,14.2
Cory Booker,55228,12.1
Tulsi Gabbard,42840,9.4
Kirsten Gillibrand,42737,9.4


Check notebook timing

In [22]:
end = datetime.datetime.now()

d = (end - start)

f"The notebook ran for {round(d.total_seconds() / 60, 2) } minutes"

'The notebook ran for 2.21 minutes'

---

---

---