In [1]:
import fecfile
import pandas as pd
import os
import datetime
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker
from dateutil.parser import parse
from IPython.display import display
import geopandas as gpd
import us

Time the notebook

In [2]:
start = datetime.datetime.now()

Define logging function for writing output

In [3]:
def log(obj, description):
    print(description + ":")
    with open("../output/log.txt", "a") as f:
        f.write(f"{description}:\n\n{obj}\n\n---\n")
    return obj

Set some viewing options

In [4]:
pd.set_option('display.max_colwidth', 200)
pd.set_option('display.max_columns', 40)
pd.set_option('display.max_rows', 500)

Read dataframe of filings

In [5]:
filings = (
    pd.read_csv("../data/filings.csv")
)

filings.head(3)

Unnamed: 0,committee_id,filing_id,cycle,form_type,date_filed,date_coverage_to,date_coverage_from,report_title,report_period,fec_uri,paper,amended,amended_uri,is_amendment,original_filing,original_uri,committee_type,contributions_total,cash_on_hand,disbursements_total,receipts_total
0,C00401224,1190211,2016,F3,2017-11-03,2016-11-28,2016-10-20,POST-GENERAL,PG,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1190211/,False,False,,True,1132265.0,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1132265/,W,69306148.22,24023158.64,70473625.32,70884967.84
1,C00401224,1167570,2016,F3,2017-06-27,2016-10-19,2016-10-01,PRE-GENERAL,PREG,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1167570/,False,False,,True,1118321.0,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1118321/,W,38988748.3,23611816.12,36653226.26,41086481.96
2,C00401224,1166534,2016,F3,2017-06-19,2016-12-31,2016-11-29,YEAR-END,YE,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1166534/,False,False,,True,1144458.0,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1144458/,W,14665549.11,24071137.23,15378810.13,15426788.72


Get only the monthlies

In [6]:
monthlies = (
    filings
    # remove filings that have been superceded by subsequent filings
    .loc[
        lambda x: x['amended'] == False
    ]
    .loc[
        lambda x: x["report_period"].str.contains("M", na = False)
    ]
)

monthlies.head(3)

Unnamed: 0,committee_id,filing_id,cycle,form_type,date_filed,date_coverage_to,date_coverage_from,report_title,report_period,fec_uri,paper,amended,amended_uri,is_amendment,original_filing,original_uri,committee_type,contributions_total,cash_on_hand,disbursements_total,receipts_total
5,C00401224,1148387,2016,F3,2017-02-09,2016-07-31,2016-07-01,AUG MONTHLY,M8,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1148387/,False,False,,True,1096262.0,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1096262/,W,27941112.01,20424886.9,28214365.19,28693496.34
6,C00401224,1148219,2016,F3,2017-02-08,2016-05-31,2016-05-01,JUN MONTHLY,M6,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1148219/,False,False,,True,1079540.0,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1079540/,W,36415717.72,20262477.34,37970358.57,37010389.15
7,C00401224,1148088,2016,F3,2017-02-08,2016-04-30,2016-04-01,MAY MONTHLY,M5,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1148088/,False,False,,True,1074053.0,http://docquery.fec.gov/cgi-bin/dcdev/forms/C00401224/1074053/,W,45279893.8,21222446.76,42975277.54,45917148.95


Load candidates file

In [7]:
candidates = pd.read_csv(
    "../data/candidates.csv"
)

Read dataframe of all actblue contributions

In [8]:
actblue = pd.read_csv(
    "../output/merged_actblue.csv",
    dtype = {
        "filing_id": "object",
        "contributor_organization_name": "object"
    },
    parse_dates = ['contribution_date',]
)

actblue.head(3)

Unnamed: 0,entity_type,filer_committee_id_number,filing_id,transaction_id,contribution_date,contribution_amount,contribution_aggregate,contributor_organization_name,contributor_first_name,contributor_last_name,contributor_street_1,contributor_street_2,contributor_state,contributor_zip_code,contributor_state.1,contributor_employer,contributor_occupation,contribution_purpose_descrip,memo_text_description,donor_id,committee_id,candidate_name,latest_contribution_aggregate
0,IND,C00401224,1148387,SA11AI_52399442,2016-07-04,10.0,293.0,,BARBARA,A COLOE,5043 ENGLISH CREEK AVE,,NJ,8234,NJ,USER NAME: COLOEBARBAR,CLINICAL S.W. SEMI-RETIRED,Earmark,Earmarked for END CITIZENS UNITED PAC (C00573261),BARBARA|A COLOE|08234,C00573261,END CITIZENS UNITED PAC,293.0
1,IND,C00401224,1148387,SA11AI_52534596,2016-07-07,3.0,293.0,,BARBARA,A COLOE,5043 ENGLISH CREEK AVE,,NJ,8234,NJ,USER NAME: COLOEBARBAR,CLINICAL S.W. SEMI-RETIRED,Earmark,Earmarked for END CITIZENS UNITED PAC (C00573261),BARBARA|A COLOE|08234,C00573261,END CITIZENS UNITED PAC,293.0
2,IND,C00401224,1148387,SA11AI_53732042,2016-07-28,15.0,293.0,,BARBARA,A COLOE,5043 ENGLISH CREEK AVE,,NJ,8234,NJ,USER NAME: COLOEBARBAR,CLINICAL S.W. SEMI-RETIRED,Earmark,Earmarked for END CITIZENS UNITED PAC (C00573261),BARBARA|A COLOE|08234,C00573261,END CITIZENS UNITED PAC,293.0


Read zip code populations from census

In [35]:
zcta_pop = (
    pd
    .read_csv(
        "../data/census/ACS_17_5YR_B01003_COUNTY/ACS_17_5YR_B01003_with_ann.csv",
        encoding = "ISO-8859-1",
        skiprows = 1, 
        dtype = {
            "Id2": str
        }
    )
)

zcta_pop.head(3)

Unnamed: 0,Id,Id2,Geography,Estimate; Total,Margin of Error; Total
0,0500000US01001,1001,"Autauga County, Alabama",55036,*****
1,0500000US01003,1003,"Baldwin County, Alabama",203360,*****
2,0500000US01005,1005,"Barbour County, Alabama",26201,*****


Read zip to county crosswalk from census

In [36]:
zcta_counties = (
    pd
    .read_csv(
        "../data/zcta_county.csv", 
        dtype = { 
            "ZCTA5": str, 
            "STATE": str
        }
    )
)

zcta_counties.head(3)

Unnamed: 0,ZCTA5,STATE,COUNTY,GEOID,POPPT,HUPT,AREAPT,AREALANDPT,ZPOP,ZHU,ZAREA,ZAREALAND,COPOP,COHU,COAREA,COAREALAND,ZPOPPCT,ZHUPCT,ZAREAPCT,ZAREALANDPCT,COPOPPCT,COHUPCT,COAREAPCT,COAREALANDPCT
0,601,72,1,72001,18465,7695,165132671,164333375,18570,7744,167459085,166659789,19483,8125,173777444,172725651,99.43,99.37,98.61,98.6,94.77,94.71,95.03,95.14
1,601,72,141,72141,105,49,2326414,2326414,18570,7744,167459085,166659789,33149,14192,298027589,294039825,0.57,0.63,1.39,1.4,0.32,0.35,0.78,0.79
2,602,72,3,72003,41520,18073,83734431,79288158,41520,18073,83734431,79288158,41959,18258,117948080,79904246,100.0,100.0,100.0,100.0,98.95,98.99,70.99,99.23


Some ZIP codes cross state boundaries. The code below finds the main state, by population, associated with any given ZIP Code Tabulation Area.

In [37]:
zcta_states = zcta_counties.groupby([ "ZCTA5", "STATE" ])["ZPOPPCT"].sum()\
    .reset_index().sort_values("ZPOPPCT", ascending=False)\
    .groupby("ZCTA5")["STATE"]\
    .first().apply(lambda x: us.states.lookup(x).name)

zcta_states.head()

ZCTA5
00601    Puerto Rico
00602    Puerto Rico
00603    Puerto Rico
00606    Puerto Rico
00610    Puerto Rico
Name: STATE, dtype: object

In [113]:
zip_totals = pd.DataFrame({
    "contributors": actblue.groupby(["contributor_zip_code"]).size(),
    "state": zcta_states,
    "population": zcta_pop.set_index("Id2")["Estimate; Total"],
})


In [129]:
donations_per_zip = (
    actblue
    .groupby(
        ['candidate_name', 'contributor_zip_code']
    )
    ['filing_id']
    .count()
    .reset_index()
    .pivot(
        columns = "candidate_name",
        values = 'filing_id',
        index = 'contributor_zip_code'
    )
    .reset_index()
)

donations_per_zip.head(3)

candidate_name,contributor_zip_code,A NEW DIRECTION PAC,A WHOLE LOT OF PEOPLE FOR GRIJALVA CONGRESSIONAL COMMITTEE,AAPI VICTORY FUND,ADAM SMITH FOR CONGRESS,AKINYEMI AGBEDE FOR U,AL FRANKEN FOR SENATE,AL LAWSON FOR CONGRESS,ALAMEDA COUNTY DEMOCRATIC CENTRAL COMMITTEE,ALAN LOWENTHAL FOR CONGRESS,ALEXANDRIA DEMOCRATIC COMMITTEE,ALIDA SKOLD FOR CONGRESS,ALINA VALDES FOR CONGRESS,ALL AMERICA PAC,ALMA ADAMS FOR CONGRESS,AMERICA,AMERICA VOTES ACTION FUND,AMERICA WORKS PAC,AMERICANS FOR RESPONSIBLE SOLUTIONS PAC,AMERIPAC,...,WEST VIRGINIA STATE DEMOCRATIC EXECUTIVE COMMITTEE,WESTCHESTER,WETHEPEOPLE FOUNDATION,WHARTON FOR CONGRESS,WHITEHOUSE FOR SENATE,WICKLUND FOR CONGRESS,WILL YANDIK FOR CONGRESS,WILLIAM MATTA FOR CONGRESS,WIRTH FOR CONGRESS,WOMENS POLITICAL COMMITTEE,WORKERS,WORKING FAMILIES PARTY NATIONAL POLITICAL ACTION COMMITTEE,WRIGHT,WY DEMOCRATIC STATE CENTRAL COMMITTEE,WYDEN FOR SENATE,YARMUTH FOR CONGRESS,YOLO COUNTY DEMOCRATIC CENTRAL COMMITTEE,YOUNG AWARE AMERICANS PAC,YOUNG FOR CONGRESS,YOUTH PROGRESS PAC
0,0,,,,,,,,,,,,2.0,,,,,,3.0,,...,,,,,,,,,,,,,,,,,,,,
1,0,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,
2,4,,,,,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,


In [130]:
zip_candidates = zip_totals.reset_index().merge(
    donations_per_zip,
    left_on = 'index',
    right_on = 'contributor_zip_code'
)

zip_candidates.head()

Unnamed: 0,index,contributors,state,population,contributor_zip_code,A NEW DIRECTION PAC,A WHOLE LOT OF PEOPLE FOR GRIJALVA CONGRESSIONAL COMMITTEE,AAPI VICTORY FUND,ADAM SMITH FOR CONGRESS,AKINYEMI AGBEDE FOR U,AL FRANKEN FOR SENATE,AL LAWSON FOR CONGRESS,ALAMEDA COUNTY DEMOCRATIC CENTRAL COMMITTEE,ALAN LOWENTHAL FOR CONGRESS,ALEXANDRIA DEMOCRATIC COMMITTEE,ALIDA SKOLD FOR CONGRESS,ALINA VALDES FOR CONGRESS,ALL AMERICA PAC,ALMA ADAMS FOR CONGRESS,AMERICA,...,WEST VIRGINIA STATE DEMOCRATIC EXECUTIVE COMMITTEE,WESTCHESTER,WETHEPEOPLE FOUNDATION,WHARTON FOR CONGRESS,WHITEHOUSE FOR SENATE,WICKLUND FOR CONGRESS,WILL YANDIK FOR CONGRESS,WILLIAM MATTA FOR CONGRESS,WIRTH FOR CONGRESS,WOMENS POLITICAL COMMITTEE,WORKERS,WORKING FAMILIES PARTY NATIONAL POLITICAL ACTION COMMITTEE,WRIGHT,WY DEMOCRATIC STATE CENTRAL COMMITTEE,WYDEN FOR SENATE,YARMUTH FOR CONGRESS,YOLO COUNTY DEMOCRATIC CENTRAL COMMITTEE,YOUNG AWARE AMERICANS PAC,YOUNG FOR CONGRESS,YOUTH PROGRESS PAC
0,0,209.0,,,0,,,,,,,,,,,,2.0,,,,...,,,,,,,,,,,,,,,,,,,,
1,0,1.0,,,0,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,
2,4,1.0,,,4,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,
3,11,1.0,,,11,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,
4,15,1.0,,,15,,,,,,,,,,,,,,,,...,,,,,,,,,,,,,,,,,,,,


In [131]:
def calculate_per_capita(row):
    if row["population"] == 0: return None
    return round(row["contributors"] * 1000.0 / row["population"], 1)

In [132]:
zip_candidates["donors_per_1000_people"] = zip_totals.apply(calculate_per_capita, axis=1)

Look at Bernie's

In [133]:
zip_candidates.head(1)

Unnamed: 0,index,contributors,state,population,contributor_zip_code,A NEW DIRECTION PAC,A WHOLE LOT OF PEOPLE FOR GRIJALVA CONGRESSIONAL COMMITTEE,AAPI VICTORY FUND,ADAM SMITH FOR CONGRESS,AKINYEMI AGBEDE FOR U,AL FRANKEN FOR SENATE,AL LAWSON FOR CONGRESS,ALAMEDA COUNTY DEMOCRATIC CENTRAL COMMITTEE,ALAN LOWENTHAL FOR CONGRESS,ALEXANDRIA DEMOCRATIC COMMITTEE,ALIDA SKOLD FOR CONGRESS,ALINA VALDES FOR CONGRESS,ALL AMERICA PAC,ALMA ADAMS FOR CONGRESS,AMERICA,...,WESTCHESTER,WETHEPEOPLE FOUNDATION,WHARTON FOR CONGRESS,WHITEHOUSE FOR SENATE,WICKLUND FOR CONGRESS,WILL YANDIK FOR CONGRESS,WILLIAM MATTA FOR CONGRESS,WIRTH FOR CONGRESS,WOMENS POLITICAL COMMITTEE,WORKERS,WORKING FAMILIES PARTY NATIONAL POLITICAL ACTION COMMITTEE,WRIGHT,WY DEMOCRATIC STATE CENTRAL COMMITTEE,WYDEN FOR SENATE,YARMUTH FOR CONGRESS,YOLO COUNTY DEMOCRATIC CENTRAL COMMITTEE,YOUNG AWARE AMERICANS PAC,YOUNG FOR CONGRESS,YOUTH PROGRESS PAC,donors_per_1000_people
0,0,209.0,,,0,,,,,,,,,,,,2.0,,,,...,,,,,,,,,,,,,,,,,,,,


In [135]:
cols = [
    "contributors",
    "state",
    "population",
    "contributor_zip_code",
    "donors_per_1000_people",
    "BERNIE"
]

zip_candidates[
    cols
].sort_values("BERNIE", ascending=False).head(100)

Unnamed: 0,contributors,state,population,contributor_zip_code,donors_per_1000_people,BERNIE
33414,1016.0,California,,94110,,95.0
36429,925.0,Washington,,98103,,87.0
2316,392.0,Vermont,,5401,,84.0
2280,389.0,Vermont,,5301,,73.0
33898,1371.0,California,,94611,,73.0
30858,446.0,California,,90026,,71.0
33467,745.0,California,,94117,,70.0
34698,803.0,California,,95472,,70.0
33613,918.0,California,,94501,,70.0
35769,394.0,Oregon,,97214,,67.0


In [None]:
end = datetime.datetime.now()

d = (end - start)

f"The notebook ran for {round(d.total_seconds() / 60, 2) } minutes"

---

---

---