<center>
    <h2>Individual Contributions</h2>
    <h3>Exploratory Data Analysis</h3><br>
</center>

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
%time df = pd.read_csv('../data/individual_contributions.csv', low_memory=False)

CPU times: user 2min 53s, sys: 1min 7s, total: 4min
Wall time: 4min 33s


In [3]:
def add_commas(number):
    n = str(number)
    
    if len(n)>6:
        return n[:-6]+','+n[-6:-3]+','+n[-3:]
    elif len(n)>3:
        return n[:-3]+','+n[-3:]
    else:
        return number

In [4]:
(add_commas(df.shape[0]), df.shape[1])

('24,446,378', 25)

In [5]:
df.columns

Index(['id', 'cycle', 'fec_trans_id', 'contributor_id', 'contributor_name',
       'recipient_id', 'org_name', 'ult_org', 'real_code', 'date', 'amount',
       'street', 'city', 'state', 'zip', 'recip_code', 'type', 'committee_id',
       'other_id', 'gender', 'old_format_employer_occupation', 'microfilm',
       'occupation', 'employer', 'source'],
      dtype='object')

In [6]:
df['amount_mil'] = df['amount'] / 1_000_000

df.sort_values(by='amount_mil', ascending=False)[[
    'contributor_name', 'amount_mil']].drop_duplicates('contributor_name').head(20)

Unnamed: 0,contributor_name,amount_mil
5457875,"MERCER, LEE L MR JR",24.0
5636195,"STEYER, THOMAS F",16.0
24037919,UNITEMIZED TOTAL,10.465912
23958197,"NEUGEBAUER, TOBY",10.0
5905890,,9.350125
828934,AMERICANS FOR TAX REFORM (GENERAL,7.674142
8191697,"SABAN, HAIM MR",7.0
10766216,VICTORY CAMPAIGN 2004,6.15
18705225,"MCMAHON, LINDA",6.0
8346487,DEMOCRATIC SENATORIAL CAMPAIGN COM,5.958333


---
## Party Committees (including loans)

In [7]:
party = []

for code in df['recip_code']:
    if str(code).upper()[0] == 'D':
        party.append('D')
    elif str(code).upper()[0] == 'R':
        party.append('R')
    else:
        party.append(np.nan)
        
df['party'] = party

In [8]:
df['party'].value_counts(normalize=True) # Near-even split between parties

R    0.501228
D    0.498772
Name: party, dtype: float64

In [9]:
df['committee'] = [1 if str(code).upper()[-1] == 'P' else 0 for code in df['recip_code']]

In [10]:
cols_to_filter = [
    'cycle', 'contributor_name', 'org_name', 'ult_org', 'amount', 'city', 'state', 'recip_code', 'type',
    'employer', 'source']

In [11]:
# Top Donations from Republican Committees
df[(df['committee']==1)&(df['party']=='R')][cols_to_filter].sort_values(
    by='amount', ascending=False).drop_duplicates('contributor_name').head(10)

Unnamed: 0,cycle,contributor_name,org_name,ult_org,amount,city,state,recip_code,type,employer,source
19748559,1992,SIGNET BANK/VIRGINIA,Loan Proceeds,,4500000,VIENNA,VA,RP,10,,Rept
8970891,2002,TEXANS FOR JOHN CORNYN,Texans for John Cornyn,,3100000,AUSTIN,TX,RP,10,,Name
7609801,2002,PRESIDENTIAL INAUGURAL COMM,Presidential Inaugural Cmte,,2057560,WASHINGTON,DC,RP,10,,Rept
8902096,2002,REPUBLICAN GOVERNORS ASSOCATION,Republican Governors Assn,,2000000,WASHINGTON,DC,RP,10,,Name
7844441,2002,REPUBLICAN NATIONAL COMMITTEE,Republican National Cmte,,2000000,WASHINGTON,DC,RP,10,,Rept
7833448,2002,NATIONAL REPUBLICAN,National Republican,,1700000,WASHINGTON,DC,RP,10,,Rept
21287964,1994,AMWAY CORPORATION,Amway Corp,Amway,1700000,ADA,MI,RP,10,,PAC
22536993,1996,SIGNET BANK,Signet Bank,,1700000,RICHMOND,VA,RP,10,,Rept
22614768,1996,NEW YORK REP STATE CMTE,New York Republican State Cmte,,1500000,ALBANY,NY,RP,10,,Name
8490580,2002,2002 PRESIDENT'S DINNER CMTE,2002 President's Dinner Cmte,,1061400,WASHINGTON,DC,RP,10,,Rept


In [12]:
# Top Donations from Democrat Committees
df[(df['committee']==1)&(df['party']=='D')][cols_to_filter].sort_values(
    by='amount', ascending=False).drop_duplicates('contributor_name').head(10)

Unnamed: 0,cycle,contributor_name,org_name,ult_org,amount,city,state,recip_code,type,employer,source
8191697,2002,"SABAN, HAIM MR",Saban Capital Group,,7000000,LOS ANGELES,CA,DP,10,,WebSK
8346487,2002,DEMOCRATIC SENATORIAL CAMPAIGN COM,Democratic Senatorial Campaign Cmte,,5958333,WASHINGTON,DC,DP,10,,Rept
8615952,2002,"EYCHANER, FRED MR",Newsweb Corp,,3000000,CHICAGO,IL,DP,10,,WebPN
8191690,2002,"BING, STEPHEN L MR",Shangri-La Entertainment,,3000000,LOS ANGELES,CA,DP,10,,I/Nam
7469205,2000,BANK OF AMERICA,Bank of America,,2500000,WASHINGTON,DC,DP,10,,Rept
7248396,2000,DCCC UNINCORPORATED RECEIPTS ACCT,DCCC Unincorporated Receipts Acct,,2000000,WASHINGTON,DC,DP,10,,Rept
7280392,2000,DCCC NONFEDERAL RECEIPTS-L,DCCC/Non-Federal Account L,,2000000,WASHINGTON,DC,DP,10,,Rept
7469206,2000,DNC NON-FEDERAL UNINCORPORATE,DNC Non-Federal Unincorporated,Democratic National Cmte,2000000,,,DP,10,,Rept
7247944,2000,DCCC NONFEDERAL RECEIPTS-C,DCCC/Non-Federal Account C,,2000000,WASHINGTON,DC,DP,10,,Rept
21147284,1994,NATIONSBANK N A,NationsBank LOAN,,1750000,WASHINGTON,DC,DP,10,,Rept


In [13]:
df[df['committee']==1].groupby(
    by=['party', 'cycle'])[['amount_mil']].sum().sort_values(by=['cycle', 'party'], ascending=False)

Unnamed: 0_level_0,Unnamed: 1_level_0,amount_mil
party,cycle,Unnamed: 2_level_1
R,2016,131.972892
D,2016,68.568677
R,2014,334.311565
D,2014,278.87491
R,2012,1001.230353
D,2012,778.328186
R,2010,217.838172
D,2010,271.818905
R,2008,599.295368
D,2008,575.606451


---
## Corporate Donations (Business, Labor, Single-Issue)

In [14]:
df['recip_code'].unique()

array(['DI', 'DP', 'RP', 'DL', 'DW', 'RN', 'PI', 'RL', 'RW', '3N', 'PB',
       'DC', '3L', '3W', 'DN', 'RI', 'OI', 'UN', 'RC', '3C', 'PL', 'PU',
       '3P', '3O', 'RO', 'DO', '3I', 'OB', 'OU', nan, 'PO', 'OL', 'R ',
       'Pi', '  ', 'UO', 'Rw', 'Dw', '3l', 'UL'], dtype=object)

In [15]:
df['pac'] = [1 if str(code).upper()[0] == 'P' else 0 for code in df['recip_code']]

In [16]:
df['pac'].sum() / len(df['pac']) # 17% of donations are PACs

0.17190730667749635

In [17]:
issue = []

for code in df['recip_code']:
    if str(code).upper()[-1] == 'B':
        issue.append('business')
    elif (str(code).upper() == 'PL') or (str(code).upper() == 'OL'):
        issue.append('labor')
    elif str(code).upper()[-1] == 'I':
        issue.append('ideological')
    elif str(code).upper()[-1] == 'O':
        issue.append('other')
    else:
        issue.append(np.nan)
        
df['issue'] = issue

In [18]:
df['issue'].value_counts(normalize=True)

business       0.504732
ideological    0.388650
other          0.072068
labor          0.034550
Name: issue, dtype: float64

In [19]:
# 23 % of donations are issue-based
df['issue'].notna().sum() / len(df['issue'])

0.22983515185766987

In [20]:
# Top Donations in Business Category
df[df['issue']=='business'][cols_to_filter].sort_values(
    by='amount', ascending=False).drop_duplicates('contributor_name').head(10)

Unnamed: 0,cycle,contributor_name,org_name,ult_org,amount,city,state,recip_code,type,employer,source
5333041,2014,"ASSOCIATION OF REALTORS, NATIONAL",National Assn of Realtors,,3881731,CHICAGO,IL,OB,10,CORPORATION,Name
6855819,2000,AMERICAN MEDICAL ASSOCIATION PAC,[24T Contribution],,1500150,,,PB,24T,,Rept
1168737,2012,COOPERATIVE OF AMERICAN PHYSICIANS,Cooperative of American Physicians,,1169070,LOS ANGELES,CA,OB,10,,PAC
18733061,2010,REPUBLICAN GOVERNORS ASSOCIATION,,,1138000,WASHINGTON,DC,OB,10,,P/PAC
2578104,2012,BIPAC - BUSINESS INSTITUTE FOR POLITICAL ANALYSIS,,,250000,WASHINGTON,DC,PB,10,,P/PAC
7539846,2000,CALIFORNIA MEDICAL PAC STATE PAC,[24T Contribution],,215000,,,PB,24T,,Rept
6253430,2000,AMPAC,[24T Contribution],,176650,,,PB,24T,,Rept
19716727,1992,,[24T Contribution],,164680,,,PB,24T,,Rept
2401259,2012,ADPAC EDUCATION FUND,American Dental Assn,,126500,WASHINGTON,DC,OB,10,,PAC
21557074,1996,AMERICAN DENTAL PAC EDUCATION FUND,[24T Contribution],,125000,,,PB,24T,,Rept


In [21]:
# Top Donations in Labor Category
df[df['issue']=='labor'][cols_to_filter].sort_values(
    by='amount', ascending=False).drop_duplicates('contributor_name').head(10)

Unnamed: 0,cycle,contributor_name,org_name,ult_org,amount,city,state,recip_code,type,employer,source
5392909,2014,NATIONAL EDUCATION ASSOCIATION,National Education Assn,,5000000,WASHINGTON,DC,OL,10,NEA,PAC
5720193,2014,AFL-CIO COPE TREASURY,AFL-CIO,,3000000,WASHINGTON,DC,OL,10,,PAC
3497633,2012,AFSCME SPECIAL ACCOUNT,American Fedn of St/Cnty/Munic Employees,,1832111,WASHINGTON,DC,OL,10,,PAC
3464897,2012,"INTERNATIONAL UNION, UAW",United Auto Workers,,1402000,DETROIT,MI,OL,10,,PAC
5554058,2014,NATIONAL NURSES UNITED,National Nurses United,,1367000,SILVER SPRING,MD,OL,10,,PAC
5390949,2014,NEA FUND FOR CHILDREN & PUBLIC EDUCATION,NEA Fund for Children & Public Education,National Education Assn,1200000,WASHINGTON,DC,OL,10,,Rept
3475506,2012,AFL-CIO,AFL-CIO,,1002868,WASHINGTON,DC,OL,10,,PAC
18736746,2010,CARPENTERS DISTRICT COUNCIL OF KAN,,,972578,KANSAS CITY,MO,OL,10,,P/PAC
4592181,2014,UNITED BROTHERHOOD OF CARPENTERS AND JOINERS,Carpenters & Joiners Union,,757236,WASHINGTON,DC,OL,10,,PAC
955488,2012,COMMITTEE ON LETTER CARRIERS POLITICAL EDUCATION,National Assn of Letter Carriers,,713919,WASHINGTON,DC,OL,10,,PAC
