In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import os

In [2]:
file_list = []
root_folder = '../data'

for file in os.listdir(root_folder):
    file_list.append(file.split('.csv')[0])
    
file_list

['fec_api_committees',
 'pac_to_pacs',
 'individual_contributions',
 'committees',
 'pacs',
 'candidates',
 'backers',
 'industry_codes',
 'politicians',
 'pac_records']

In [3]:
fec_api_committees = pd.read_csv(f"{root_folder}/{file_list[0]}.csv")
pac_to_pacs = pd.read_csv(f"{root_folder}/{file_list[1]}.csv", low_memory=False)
individual_contributions = pd.DataFrame()
committees = pd.read_csv(f"{root_folder}/{file_list[3]}.csv")
pacs = pd.read_csv(f"{root_folder}/{file_list[4]}.csv", low_memory=False)
candidates = pd.read_csv(f"{root_folder}/{file_list[5]}.csv")
backers = pd.read_csv(f"{root_folder}/{file_list[6]}.csv")
industry_codes = pd.read_csv(f"{root_folder}/{file_list[7]}.csv")
politicians = pd.read_csv(f"{root_folder}/{file_list[8]}.csv")
pac_records = pd.read_csv(f"{root_folder}/{file_list[9]}.csv")

In [4]:
DB_list = [fec_api_committees, pac_to_pacs, individual_contributions, committees, pacs, candidates,
           backers, industry_codes, politicians, pac_records]

In [5]:
def add_commas(number):
    n = str(number)
    
    if len(n)>6:
        return n[:-6]+','+n[-6:-3]+','+n[-3:]
    elif len(n)>3:
        return n[:-3]+','+n[-3:]
    else:
        return number

In [6]:
for i, DB in enumerate(DB_list):
    print(str(i+1)+'. '+file_list[i])
    print(f"[{add_commas(DB.shape[0])} rows - {DB.shape[1]} cols]")
    print('--------')
    print(list(DB.columns))
    print()

1. fec_api_committees
[45,507 rows - 13 cols]
--------
['id', 'cid', 'created_at', 'updated_at', 'cycle', 'individual_unitemized_contributions', 'individual_itemized_contributions', 'individual_contributions', 'designation', 'organization_type', 'name', 'committee_id', 'committee_type']

2. pac_to_pacs
[1,083,525 rows - 25 cols]
--------
['id', 'cycle', 'fec_rec_no', 'filer_id', 'donor_committee', 'contrib_lend_trans', 'city', 'state', 'zip', 'fec_occ_emp', 'prim_code', 'date', 'amount', 'recipient_id', 'party', 'other_id', 'recip_code', 'recip_prim_code', 'amend', 'report', 'pg', 'microfilm', 'type', 'real_code', 'source']

3. individual_contributions
[0 rows - 0 cols]
--------
[]

4. committees
[157,542 rows - 15 cols]
--------
['id', 'cycle', 'committee_id', 'pac_short', 'affiliate', 'ultorg', 'recip_id', 'recip_code', 'fec_cand_id', 'party', 'prim_code', 'source', 'sensitive', 'foreign_owned', 'active']

5. pacs
[3,539,657 rows - 11 cols]
--------
['id', 'cycle', 'fec_rec_no', 'pac

In [7]:
industry_codes.drop(0, axis=0)['sector'].value_counts().sort_values(ascending=False)

Misc Business            102
Ideology/Single-Issue     45
Transportation            37
Finance/Insur/RealEst     35
Energy/Nat Resource       30
Health                    30
Agribusiness              30
Labor                     30
Communic/Electronics      27
Other                     23
Construction              22
Defense                    8
Lawyers & Lobbyists        6
Non-contribution           5
Unknown                    5
Party Cmte                 4
Joint Candidate Cmtes      3
Candidate                  1
Name: sector, dtype: int64

In [8]:
industry_codes.head()

Unnamed: 0,id,category_code,category_name,industry_code,industry_name,sector,sector_long
0,1,Catcode,Catname,Catorder,Industry,Sector,Sector Long
1,2,F2600,Private Equity & Investment Firms,F07,Securities & Investment,Finance/Insur/RealEst,"Finance, Insurance & Real Estate"
2,3,D6000,Homeland Security contractors,D03,Misc Defense,Defense,Defense
3,4,X9000,Foreign Governments,W07,Other,Other,Other
4,5,F2700,Hedge Funds,F07,Securities & Investment,Finance/Insur/RealEst,"Finance, Insurance & Real Estate"


In [11]:
backers['backer_level'].value_counts().sort_values(ascending=False)

kickstarter_backer    248
upstart                50
house                  48
senate                 22
leadership              2
Name: backer_level, dtype: int64

In [13]:
win = [code.endswith('W') for code in candidates['recip_code']]
repub = candidates['party']=='R'
lose = [code.endswith('L') for code in candidates['recip_code']]
dem = candidates['party']=='D'
pres = candidates['dist_id_run_for']=='PRES'
cong = candidates['dist_id_run_for']!='PRES'

In [26]:
# Top Losing Repub Congressional Candidates
candidates[(repub & lose) & cong].sort_values(by='raised_total', ascending=False).head(10)

Unnamed: 0,id,cycle,fec_cand_id,cid,first_last_party,party,dist_id_run_for,dist_id_currently_held,current_candidate,cycle_candidate,crpico,recip_code,nopacs,raised_from_pacs,raised_from_individuals,raised_total,raised_unitemized
12345,1202,2000,S0NY00303,N00001176,Rick A Lazio (R),R,NYS1,NY02,Y,Y,O,RL,,4322641.0,20425037.0,60633230,35885552.0
15515,24484,2010,S0CT00151,N00031165,Linda McMahon (R),R,CTS2,,Y,Y,O,RL,,18663.0,53105984.0,53124647,0.0
36004,29890,2012,S0CT00151,N00031165,Linda McMahon (R),R,CTS1,,Y,Y,O,RL,,1044005.0,40295112.0,41597048,257931.0
3983,62510,1998,S0NY00048,N00001158,Alfonse M D'Amato (R),R,NYS2,NYS2,Y,Y,I,RL,,4226025.0,12763172.0,31500718,14511521.0
37704,21251,2010,S0NV00138,N00027581,Sharron Angle (R),R,NVS2,,Y,Y,C,RL,,3327104.0,10674490.0,29622426,15620832.0
11394,30498,2012,S2OH00170,N00033174,Josh Mandel (R),R,OHS1,,Y,Y,C,RL,,7818716.0,16055250.0,27915260,4041294.0
12265,24747,2010,S0CA00330,N00031348,Carly Fiorina (R),R,CAS1,,Y,Y,C,RL,,7187249.0,16999197.0,27399423,3212977.0
9519,30834,2012,S2TX00361,N00033445,David H. Dewhurst (R),R,TXS2,,,Y,O,RL,,1841775.0,23258414.0,25269758,169569.0
17584,13919,2006,S4PA00063,N00001380,Rick Santorum (R),R,PAS2,PAS2,Y,Y,I,RL,,6285363.0,14125686.0,24979491,4568442.0
36589,51518,1994,S4VA00056,N00002032,Oliver North (R),R,VAS1,,Y,Y,C,RL,,868741.0,3089134.0,24513075,20555200.0


In [29]:
# Top Winning Repub Congressional Candidates
candidates[(repub & win) & cong].sort_values(by='raised_total', ascending=False).head(10)

Unnamed: 0,id,cycle,fec_cand_id,cid,first_last_party,party,dist_id_run_for,dist_id_currently_held,current_candidate,cycle_candidate,crpico,recip_code,nopacs,raised_from_pacs,raised_from_individuals,raised_total,raised_unitemized
4094,52083,1994,S4TX00086,N00005675,Kay Bailey Hutchison (R),R,TXS2,TXS2,Y,Y,I,RW,,4817703.0,7492333.0,23337780,11027744.0
26157,36876,2014,S2KY00012,N00003389,Mitch McConnell (R),R,KYS1,KYS1,Y,Y,I,RW,,8372640.0,13470866.0,22776329,932823.0
30409,7117,2002,S2NC00083,N00008071,Elizabeth Dole (R),R,NCS1,,Y,Y,O,RW,,3145058.0,7345289.0,21640133,11149786.0
33562,28075,2012,H0OH08029,N00003675,John Boehner (R),R,OH08,OH08,Y,Y,I,RW,,2784227.0,12692665.0,21344947,5868055.0
7772,23930,2010,S4PA00121,N00001489,Pat Toomey (R),R,PAS1,,Y,Y,O,RW,,6001714.0,11054077.0,21294090,4238299.0
35307,7115,2002,S2TX00106,N00024852,John Cornyn (R),R,TXS1,,Y,Y,O,RW,,4192569.0,7060450.0,19158296,7905277.0
44611,21780,2010,S0FL00338,N00030612,Marco Rubio (R),R,FLS2,,Y,Y,O,RW,,6688686.0,11933201.0,18621887,
2082,32540,2014,H0OH08029,N00003675,John Boehner (R),R,OH08,OH08,Y,Y,I,RW,,3232599.0,12092238.0,18573508,3248671.0
9100,22988,2010,H0IL10120,N00012539,Mark Kirk (R),R,ILS2,IL10,,,O,RW,,6540608.0,10385171.0,18228030,1302251.0
42955,24352,2010,S0IL00261,N00012539,Mark Kirk (R),R,ILS2,IL10,Y,Y,O,RW,,6540608.0,10385171.0,18228030,1302251.0


In [30]:
# Top Losing Dem Congressional Candidates
candidates[(dem & lose) & cong].sort_values(by='raised_total', ascending=False).head(10)

Unnamed: 0,id,cycle,fec_cand_id,cid,first_last_party,party,dist_id_run_for,dist_id_currently_held,current_candidate,cycle_candidate,crpico,recip_code,nopacs,raised_from_pacs,raised_from_individuals,raised_total,raised_unitemized
14938,10982,2004,S4IL00172,N00025738,Blair Hull (D),D,ILS2,,,Y,O,DL,,179.0,22124300.0,22188238,63759.0
4432,10922,2004,S2NC00109,N00024858,Erskine B Bowles (D),D,NCS2,,Y,Y,O,DL,,5353192.0,8069240.0,21927894,8505462.0
28603,37503,2014,S8NC00239,N00029617,Kay R. Hagan (D),D,NCS1,NCS1,Y,Y,I,DL,,5480904.0,10953528.0,21064974,4630542.0
16980,37473,2014,S8CO00172,N00008051,Mark Udall (D),D,COS2,COS2,Y,Y,I,DL,,6334617.0,10519469.0,20135736,3281650.0
7753,24398,2010,S0PA00434,N00028049,"Joseph A. Sestak, Jr (D)",D,PAS1,PA07,Y,Y,O,DL,,6015497.0,10289564.0,18208093,1903032.0
7752,22875,2010,H6PA07105,N00028049,"Joseph A. Sestak, Jr (D)",D,PAS1,PA07,,,O,DL,,6015497.0,10289564.0,18208093,1903032.0
5119,14175,2006,S6TN00240,N00003218,Harold E Ford Jr (D),D,TNS1,TN09,Y,Y,O,DL,,5352292.0,11250219.0,18004627,1402116.0
35557,13683,2006,H6TN09043,N00003218,Harold E Ford Jr (D),D,TNS1,TN09,,,O,DL,,5352292.0,11250219.0,18004627,1402116.0
7358,15545,2006,S6CT05066,N00028063,Ned Lamont (D),D,CTS1,,Y,Y,C,DL,,418474.0,16079735.0,17925934,1427725.0
20797,37114,2014,S4KY00091,N00035486,Alison Grimes (D),D,KYS1,,Y,Y,C,DL,,1363499.0,9160205.0,17144152,6620448.0


In [31]:
# Top Winning Dem Congressional Candidates
candidates[(dem & win) & cong].sort_values(by='raised_total', ascending=False).head(10)

Unnamed: 0,id,cycle,fec_cand_id,cid,first_last_party,party,dist_id_run_for,dist_id_currently_held,current_candidate,cycle_candidate,crpico,recip_code,nopacs,raised_from_pacs,raised_from_individuals,raised_total,raised_unitemized
11322,30892,2012,S2MA00170,N00033492,Elizabeth Warren (D),D,MAS1,,Y,Y,C,DW,,3203496.0,22485898.0,44724683,19035289.0
26812,13908,2006,S0NY00188,N00000019,Hillary Clinton (D),D,NYS1,NYS1,Y,Y,I,DW,,1668082.0,25363388.0,33690345,6658875.0
26811,15501,2006,P00003392,N00000019,Hillary Clinton (D),D,NYS1,NYS1,,,I,DW,,1668082.0,25363388.0,33690345,6658875.0
22395,22618,2010,S2CA00286,N00006692,Barbara Boxer (D),D,CAS1,CAS1,Y,Y,I,DW,,7719079.0,12692881.0,24179959,3767999.0
5880,28585,2012,H2OH13033,N00003535,Sherrod Brown (D),D,OHS1,OHS1,,,I,DW,,6565947.0,10663027.0,24082184,6853210.0
5879,30259,2012,S6OH00163,N00003535,Sherrod Brown (D),D,OHS1,OHS1,Y,Y,I,DW,,6565947.0,10663027.0,24082184,6853210.0
32170,1858,2000,S0NY00188,N00000019,Hillary Clinton (D),D,NYS1,,Y,Y,O,DW,,3343382.0,20262712.0,23768527,162433.0
13297,9252,2004,S2CA00286,N00006692,Barbara Boxer (D),D,CAS1,CAS1,Y,Y,I,DW,,4800003.0,7963428.0,23363069,10599638.0
24124,37136,2014,S4MA00028,N00000270,Ed Markey (D),D,MAS2,MAS2,Y,Y,I,DW,,5826539.0,13264679.0,22241873,3150655.0
16556,30501,2012,S2VA00142,N00033177,Tim Kaine (D),D,VAS1,,Y,Y,O,DW,,4977456.0,13525638.0,21562195,3059101.0


In [24]:
# Top Money Raised Dem Pres Candidates
candidates[dem & pres].sort_values(by='raised_total', ascending=False).head(10)

Unnamed: 0,id,cycle,fec_cand_id,cid,first_last_party,party,dist_id_run_for,dist_id_currently_held,current_candidate,cycle_candidate,crpico,recip_code,nopacs,raised_from_pacs,raised_from_individuals,raised_total,raised_unitemized
33242,19999,2008,P80003205,N00028836,"Lee L Mercer, Jr (D)",D,PRES,,,,,DN,,1000.0,729005408.0,729006408,
23231,29707,2012,P80003338,N00009638,Barack Obama (D),D,PRES,PRES,Y,Y,I,DW,,55911229.0,322775065.0,613095984,234409690.0
14922,20091,2008,S4IL00180,N00009638,Barack Obama (D),D,PRES,ILS2,,,O,DW,,65836708.0,389386184.0,455230591,7699.0
14921,20009,2008,P80003338,N00009638,Barack Obama (D),D,PRES,ILS2,Y,Y,O,DW,,65836708.0,389386184.0,455230591,7699.0
9695,10986,2004,S4MA00069,N00000245,John Kerry (D),D,PRES,MAS2,,,C,DC,Y,116445199.0,160067123.0,276512322,0.0
9694,12806,2004,P80000235,N00000245,John Kerry (D),D,PRES,MAS2,Y,Y,C,DL,Y,116445199.0,160067123.0,276512322,0.0
32023,64372,2016,S0NY00188,N00000019,Hillary Clinton (D),D,PRES,,,,O,DO,,426399.0,60025985.0,82192326,21739942.0
410,65622,2016,P00003392,N00000019,Hillary Clinton (D),D,PRES,,Y,Y,O,DO,,426399.0,60025985.0,82192326,21739942.0
32102,67930,2016,P60007168,N00000528,Bernie Sanders (D),D,PRES,VTS1,Y,Y,O,DO,,86649.0,19169576.0,74172060,54915835.0
67396,36504,2014,P80003205,N00028836,"Lee L Mercer, Jr (D)",D,PRES,,,,,DN,,,72000000.0,72000000,


In [23]:
# Top Money Raised Repub Pres Candidates
candidates[repub & pres].sort_values(by='raised_total', ascending=False).head(10)

Unnamed: 0,id,cycle,fec_cand_id,cid,first_last_party,party,dist_id_run_for,dist_id_currently_held,current_candidate,cycle_candidate,crpico,recip_code,nopacs,raised_from_pacs,raised_from_individuals,raised_total,raised_unitemized
43213,29709,2012,P80003353,N00000286,Mitt Romney (R),R,PRES,,Y,Y,C,RL,,92835231.0,355605910.0,473940398,25499257.0
37682,19970,2008,P80002801,N00006424,John McCain (R),R,PRES,AZS1,Y,Y,O,RL,,26767631.0,166651506.0,243052415,49633278.0
42665,20147,2008,S6AZ00019,N00006424,John McCain (R),R,PRES,AZS1,,,O,RL,,26767631.0,166651506.0,243052415,49633278.0
11245,12906,2004,P00003335,N00008072,George W Bush (R),R,PRES,PRES,Y,Y,I,RW,,37621914.0,186886583.0,224508497,0.0
39207,1590,2000,P00003335,N00008072,George W Bush (R),R,PRES,,Y,Y,O,RW,,23158024.0,81353631.0,104511655,
14064,1750,2000,P60003852,N00000802,Steve Forbes (R),R,PRES,,,Y,O,RL,,1723.0,82263302.0,82265025,0.0
3036,20103,2008,S4MA00143,N00000286,Mitt Romney (R),R,PRES,,,,O,RO,,464481.0,54484163.0,54948644,0.0
3037,17470,2008,P80003353,N00000286,Mitt Romney (R),R,PRES,,,Y,O,RL,,464481.0,54484163.0,54948644,0.0
35715,20027,2008,S0NY00170,N00009908,Rudolph W Giuliani (R),R,PRES,,,,O,RO,,2310906.0,48170423.0,50481329,0.0
6620,19883,2008,P00003251,N00009908,Rudolph W Giuliani (R),R,PRES,,,Y,O,RL,,2310906.0,48170423.0,50481329,0.0


In [21]:
# Top corporate donors by millions of dollars
pac_to_pacs['amount_mil'] = pac_to_pacs['amount']/1_000_000
pac_to_pacs.groupby(by='donor_committee')[['amount_mil']].mean().sort_values(by='amount_mil', ascending=False).head(20)

Unnamed: 0_level_0,amount_mil
donor_committee,Unnamed: 1_level_1
"BANK OF AMERICA, NA",12.0
BANK OF GEORGETOWN,6.666667
AMALGAMATED BANK OF NEW YORK,6.166667
WELLS FARGO,6.0
SUMMIT BANK,5.781316
"SMITH, THOMAS",4.225
"SOROS, GEORGE",3.275
"Feinstein, Dianne",3.18
"Watts, Mikal",2.797334
"BING, STEVE",2.746194
