# Analyzing Tech Employees' Donations To The Clinton and Trump Campaigns

Please see the [main page](https://github.com/BuzzFeedNews/2016-10-tech-employee-contributions) for details.

## Load the data

In [1]:
import pandas as pd
import glob
import re

In [2]:
def read_contributions(path):
    df = pd.read_csv(path, low_memory=False)
    df["contributor_employer"] = df["contributor_employer"].fillna("").str.strip().str.upper()
    return df[df["entity_type"] == "IND"][[
        "filer_committee_id_number", "transaction_id", "contribution_date",
        "contributor_last_name", "contributor_first_name", "contributor_middle_name",
        "contributor_state", "contributor_employer", "contribution_amount", "contribution_aggregate"
    ]]

In [3]:
contributions = pd.concat([ read_contributions(path)
    for path in glob.glob("../data/contributions/*.csv") ])

In [4]:
# Assign committee names to committee IDs
contributions["committee"] = contributions["filer_committee_id_number"].apply({
    "C00580100": "Donald J. Trump For President",
    "C00575795": "Hillary For America"
}.get)

In [5]:
contributions.head().T

Unnamed: 0,0,1,2,3,4
filer_committee_id_number,C00575795,C00575795,C00575795,C00575795,C00575795
transaction_id,C5,C2,C947,C183810,C135530
contribution_date,20150407,20150407,20150412,20150601,20150513
contributor_last_name,McAuliffe,McAuliffe,Gensler,Macchio,Benson
contributor_first_name,Terence,Dorothy,Gary,Steven,Barbara
contributor_middle_name,R.,S.,,,
contributor_state,VA,VA,MD,NY,CO
contributor_employer,COMMONWEALTH OF VIRGINIA,,HILLARY FOR AMERICA,CLEARFLO TECHNOLOGIES,
contribution_amount,2700,2700,2700,2700,1000
contribution_aggregate,2700,2700,2700,2700,1000


## Identify contributions from major technology companies

The code below identifies alternative spellings and aliases for 20 major technology companies. For each company, we include "matching" patterns and anti-matching patterns (i.e., similar company names to exclude, e.g., "Amazon Consultin").

In [6]:
employers = [
    ("Adobe", re.compile(r"^ADOBE$|ADOBE, INC|ADOBE SYSTEMS"), None),
    ("Airbnb", re.compile(r"AIRBNB"), None),
    ("Amazon", re.compile(r"AMAZON\b"), re.compile(r"AMAZON CONSULTING|AMAZON PRODUCE")), # Ignore Amazon Computing/Produce
    ("Apple", re.compile(r"^APPLE$|^APPLE,? INC|\bAPPLE TECH"), None),
    ("Box", re.compile(r"^BOX, INC|^BOX$"), None),
    ("Cisco", re.compile(r"^CISCO,? INC|^CISCO$|^CISCO SYSTEMS"), None),
    ("Dell", re.compile(r"^DELL,? INC|^DELL$|^DELL COMPUTER|^DELL SOFTWARE|^DELL SERVICES"), None),
    ("Ebay", re.compile(r"^EBAY\b"), None),
    ("Facebook", re.compile(r"^FACEBOOK"), None),
    ("Google/Alphabet", re.compile(r"\bGOOGLE|ALPHABET"), re.compile(r"ALPHABET ENERGY")), # Ignore Alphabet Energy
    ("Hewlett-Packard", re.compile(r"HEWLETT.PACKARD|\bHP,? INC|\bHP ENTERPRISE|\bHP LABS"), None),
    ("IBM", re.compile(r"^IBM|\bIBM CO"), None),
    ("Intel", re.compile(r"^INTEL CORP|^INTEL,? INC|^INTEL$"), None),
    ("Microsoft", re.compile(r"MICROSOFT"), None),
    ("Netflix", re.compile(r"NETFLIX"), None),
    ("Oracle", re.compile(r"\bORACLE\b"), None),
    ("Salesforce", re.compile(r"SALESFORCE"), None),
    ("Square", re.compile(r"^SQUARE$|^SQUARE,? INC"), None),
    ("Twitter", re.compile(r"\bTWITTER\b"), None),
    ("Uber", re.compile(r"\bUBER\b"), re.compile(r"AIRBNB")), # Ignore Airbnb to avoid double-counting
]

In [7]:
contributions["meta_employer"] = None
for name, pat, anti_pat in employers:
    contributions.loc[
        (contributions["contributor_employer"].apply(lambda x: re.search(pat, x) != None)) &
        (contributions["contributor_employer"].apply(lambda x: re.search(anti_pat, x) == None) if anti_pat else True)
    , "meta_employer"] = name

In [8]:
tech_contributions = contributions[
    contributions["meta_employer"].notnull()
]
tech_contributions.head().T

Unnamed: 0,48,72,80,96,200
filer_committee_id_number,C00575795,C00575795,C00575795,C00575795,C00575795
contribution_date,20150530,20150414,20150603,20150529,20150420
contributor_last_name,Burns,Thomas,Windsheimer,Martin,Fuller
contributor_first_name,Bonnie,Kristin,Marci,Brendon,Jacquelline
contributor_middle_name,,,,,
contributor_state,GA,SC,CA,IL,CA
contributor_employer,IBM,MICROSOFT,APPLE INC.,GOOGLE,GOOGLE
contribution_amount,158.92,50,200.12,10,2700
contribution_aggregate,208.92,277,1480.82,2710,2700
committee,Hillary For America,Hillary For America,Hillary For America,Hillary For America,Hillary For America


## Group contributions by employer

In [9]:
tech_contributions_by_employer = tech_contributions\
    .groupby([ "meta_employer", "committee" ])["contribution_amount"].sum()\
    .to_frame()\
    .sort_index()
tech_contributions_by_employer

Unnamed: 0_level_0,Unnamed: 1_level_0,contribution_amount
meta_employer,committee,Unnamed: 2_level_1
Adobe,Donald J. Trump For President,1000.0
Adobe,Hillary For America,52199.1
Airbnb,Hillary For America,26192.0
Amazon,Donald J. Trump For President,3084.0
Amazon,Hillary For America,165156.37
Apple,Donald J. Trump For President,2844.45
Apple,Hillary For America,255614.08
Box,Hillary For America,13185.0
Cisco,Donald J. Trump For President,18043.38
Cisco,Hillary For America,109981.5


## Total estimated contributions from the 20 companies, by committee

In [10]:
tech_contributions\
    .groupby([ "committee" ])["contribution_amount"].sum()\
    .to_frame()\
    .sort_values("contribution_amount", ascending=False)

Unnamed: 0_level_0,contribution_amount
committee,Unnamed: 1_level_1
Hillary For America,3108877.47
Donald J. Trump For President,110099.44


## Save results

In [11]:
tech_contributions[[ "meta_employer", "contributor_employer"]].drop_duplicates()\
    .sort_values([ "meta_employer", "contributor_employer"])\
    .to_csv("../output/raw-employer-names.csv", index=False)

In [12]:
tech_contributions.sort_values([
        "meta_employer", "contributor_employer",
        "contributor_last_name", "contributor_first_name", "contributor_middle_name",
        "contribution_date"
    ])\
    .to_csv("../output/tech-contributions.csv", index=False)

In [13]:
tech_contributions_by_employer.to_csv("../output/by-employer.csv")

---

# Rough estimate of companies whose employees have donated the most to each campaign

Note: The numbers below are meant only as rough estimates. They do not, for example, attempt to merge sibling/parent companies or misspellings.

### Take basic steps to normalize employer names

- Remove "LLC", "INC", "INCORPORATED", "CORP", "CO", and "LLP" endings.
- Convert all letters to lowercase.
- Convert all non-letters into underscores.

In [6]:
import namestand

In [13]:
cruft_pat = re.compile(r",? (LLC|INC|INCORPORATED|CORPORATION|CORPRATION|CORP|CO|LLP)$")

In [14]:
def normalize_name(name):
    stripped = re.sub(cruft_pat, "", name.strip().strip("."))
    return namestand.downscore(stripped).strip("_")

In [15]:
contributions["employer_norm"] = contributions["contributor_employer"].apply(normalize_name)

In [16]:
grp_employer_norm = contributions.groupby([ "committee", "employer_norm" ])
by_employer_norm = pd.DataFrame({
    "amount": grp_employer_norm["contribution_amount"].sum(),
    "spellings": grp_employer_norm["contributor_employer"].apply(lambda x: " • ".join(sorted(x.unique())))
}).sort_values("amount", ascending=False)

### Hillary For America

In [17]:
by_employer_norm.loc["Hillary For America"].head(50)

Unnamed: 0_level_0,amount,spellings
employer_norm,Unnamed: 1_level_1,Unnamed: 2_level_1
,51124352.63,• - • -- • --- • ----- • --------------------...
self_employed,35461657.47,SELF EMPLOYED • SELF EMPLOYED • SELF- EMPLOYE...
retired,9498222.64,RETIRED • RETIRED.
information_requested,4303868.14,INFORMATION REQUESTED
not_employed,2088412.01,NOT EMPLOYED • NOT-EMPLOYED
google,807016.23,"GOOGLE • GOOGLE INC. • GOOGLE, INC • GOOGLE, INC."
morgan_morgan,382953.75,MORGAN & MORGAN
stanford_university,371950.99,STANFORD UNIVERSITY
none,367971.03,--NONE-- • NONE • NONE.
microsoft,351649.11,MICROSOFT • MICROSOFT CORP. • MICROSOFT CORPOR...


### Donald J. Trump For President

In [12]:
by_employer_norm.loc["Donald J. Trump For President"].head(50)

Unnamed: 0_level_0,amount,spellings
employer_norm,Unnamed: 1_level_1,Unnamed: 2_level_1
retired,9720710.68,RETIRED • RETIRED.
self_employed,4793521.79,SELF -EMPLOYED • SELF EMPLOYED • SELF EMPLOYED...
information_requested,2493563.66,INFORMATION REQUESTED
homemaker,271160.07,HOMEMAKER
self,158114.5,SELF • SELF LLC
boch_automotive_group,86936.8,BOCH AUTOMOTIVE GROUP
not_employed,34832.67,NOT EMPLOYED
owner,34389.19,OWNER
,28875.44,
american_airlines,25798.38,AMERICAN AIRLINES


In [22]:
by_employer_norm.loc["Donald J. Trump For President"].loc["murray_energy"]

amount               32.35
spellings    MURRAY ENERGY
Name: murray_energy, dtype: object

In [23]:
contributions[
    contributions["contributor_employer"].str.contains(r"MURRAY ENER")
]

Unnamed: 0,filer_committee_id_number,transaction_id,contribution_date,contributor_last_name,contributor_first_name,contributor_middle_name,contributor_state,contributor_employer,contribution_amount,contribution_aggregate,committee,employer_norm
16381,C00580100,SA17A.1310961,20160721,MATOSZKIA,JOSEPH,,OH,MURRAY ENERGY,32.35,1032.35,Donald J. Trump For President,murray_energy


In [24]:
namestand.downscore("""FORM TYPE
FILER COMMITTEE ID NUMBER
TRANSACTION ID NUMBER
BACK REFERENCE TRAN ID NUMBER
BACK REFERENCE SCHED NAME
ENTITY TYPE
PAYEE ORGANIZATION NAME
PAYEE LAST NAME
PAYEE FIRST NAME
PAYEE MIDDLE NAME
PAYEE PREFIX
PAYEE SUFFIX
PAYEE STREET 1
PAYEE STREET 2
PAYEE CITY
PAYEE STATE
PAYEE ZIP
ELECTION CODE
ELECTION OTHER DESCRIPTION
EXPENDITURE DATE
EXPENDITURE AMOUNT {F3L Bundled}
SEMI-ANNUAL REFUNDED BUNDLED AMT
EXPENDITURE PURPOSE DESCRIP
CATEGORY CODE
BENEFICIARY COMMITTEE FEC ID
BENEFICIARY COMMITTEE NAME
BENEFICIARY CANDIDATE FEC ID
BENEFICIARY CANDIDATE LAST NAME
BENEFICIARY CANDIDATE FIRST NAME
BENEFICIARY CANDIDATE MIDDLE NAME
BENEFICIARY CANDIDATE PREFIX
BENEFICIARY CANDIDATE SUFFIX
BENEFICIARY CANDIDATE OFFICE
BENEFICIARY CANDIDATE STATE
BENEFICIARY CANDIDATE DISTRICT
CONDUIT NAME
CONDUIT STREET 1
CONDUIT STREET 2
CONDUIT CITY
CONDUIT STATE
CONDUIT ZIP
MEMO CODE
MEMO TEXT/DESCRIPTION
Reference to SI or SL system code that identifies the Account""".strip().split("\n"))

['form_type',
 'filer_committee_id_number',
 'transaction_id_number',
 'back_reference_tran_id_number',
 'back_reference_sched_name',
 'entity_type',
 'payee_organization_name',
 'payee_last_name',
 'payee_first_name',
 'payee_middle_name',
 'payee_prefix',
 'payee_suffix',
 'payee_street_1',
 'payee_street_2',
 'payee_city',
 'payee_state',
 'payee_zip',
 'election_code',
 'election_other_description',
 'expenditure_date',
 'expenditure_amount_f3l_bundled',
 'semi_annual_refunded_bundled_amt',
 'expenditure_purpose_descrip',
 'category_code',
 'beneficiary_committee_fec_id',
 'beneficiary_committee_name',
 'beneficiary_candidate_fec_id',
 'beneficiary_candidate_last_name',
 'beneficiary_candidate_first_name',
 'beneficiary_candidate_middle_name',
 'beneficiary_candidate_prefix',
 'beneficiary_candidate_suffix',
 'beneficiary_candidate_office',
 'beneficiary_candidate_state',
 'beneficiary_candidate_district',
 'conduit_name',
 'conduit_street_1',
 'conduit_street_2',
 'conduit_city',


---

---

---