# Contributions from out-of-state donors

In [27]:
import pandas as pd
import us

%load_ext jupyternotify

pd.set_option("display.max_columns", 50)
pd.set_option("display.max_rows", 500)

The jupyternotify extension is already loaded. To reload it, use:
  %reload_ext jupyternotify


Import [National Institute on Money in Politics](https://www.followthemoney.org/) API key.

In [7]:
nimp_key = open("nimp_api_key.txt", "r").readline()

Import [Census Bureau](https://www.census.gov/) API key.

In [8]:
census_key = open("census_api_key.txt", "r").readline()

## Import and format the data

### Import data on contributions to state legislative races in 2018

In [3]:
%%notify
contribs_18 = pd.read_csv("https://www.followthemoney.org/aaengine/aafetch.php?dt=1&y=2018&c-exi=1&c-r-ot=S,H&gro=c-t-id,d-eid,d-ins&APIKey="+nimp_key+"&mode=csv", encoding="ISO-8859-1")
contribs_18.to_csv("data/contribs_18.csv", index=False)
contribs_18 = pd.read_csv("data/contribs_18.csv", encoding="ISO-8859-1")
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 665936 entries, 0 to 665935
Data columns (total 45 columns):
request                        665936 non-null object
Candidate:token                665936 non-null object
Candidate:id                   665936 non-null int64
Candidate                      665936 non-null object
Candidate_Entity:token         665936 non-null object
Candidate_Entity:id            665936 non-null int64
Candidate_Entity               665936 non-null object
Election_Status:token          665936 non-null object
Election_Status:id             665936 non-null object
Election_Status                665936 non-null object
Status_of_Candidate:token      665936 non-null object
Status_of_Candidate:id         665936 non-null int64
Status_of_Candidate            665936 non-null object
Specific_Party:token           665936 non-null object
Specific_Party:id              665936 non-null int64
Specific_Party                 665936 non-null object
General_Party:token          

<IPython.core.display.Javascript object>

In [4]:
contribs_18.head(1)

Unnamed: 0,request,Candidate:token,Candidate:id,Candidate,Candidate_Entity:token,Candidate_Entity:id,Candidate_Entity,Election_Status:token,Election_Status:id,Election_Status,Status_of_Candidate:token,Status_of_Candidate:id,Status_of_Candidate,Specific_Party:token,Specific_Party:id,Specific_Party,General_Party:token,General_Party:id,General_Party,Election_Jurisdiction:token,Election_Jurisdiction:id,Election_Jurisdiction,Election_Year:token,Election_Year:id,Election_Year,Election_Type:token,Election_Type:id,Election_Type,Office_Sought:token,Office_Sought:id,Office_Sought,Incumbency_Status:token,Incumbency_Status:id,Incumbency_Status,Contributor:token,Contributor:id,Contributor,Type_of_Contributor:token,Type_of_Contributor:id,Type_of_Contributor,In-State:token,In-State:id,In-State,#_of_Records,Total_$
0,"dt=1&y=2018&c-exi=1&c-r-ot=S,H&c-t-id=229342&d...",c-t-id,229342,"HUFFINES, PHILLIP WAYNE",c-t-eid,11168692,"HUFFINES, PHILLIP WAYNE",c-t-ftsts,Lost-Primary,Lost-Primary,c-t-sts,2,Lost,c-t-pt,139,REPUBLICAN,c-t-p,2,Republican,s,TX,TX,y,2018,2018,c-r-t,1,Standard,c-r-osid,7531,SENATE DISTRICT 008,c-t-ico,O,Open,d-eid,11168692,"HUFFINES, PHILLIP WAYNE",d-et,2,Individual,d-ins,1,1,5,5250000.0


In [48]:
donors_18 = donors_18[["Candidate", "Candidate:id", "General_Party", "Election_Jurisdiction", "Election_Year", "Incumbency_Status",   "Contributor", "Contributor:id", "Type_of_Contributor", "In-State", "Total_$"]]
donors_18.rename(columns={"Candidate": "candidate", "Candidate:id": "candidate_id", "Election_Jurisdiction": "state", "Election_Year": "year", "Incumbency_Status": "incumbency_status", "Contributor": "contributor", "Contributor:id": "contributor_id", "Type_of_Contributor": "type_of_contributor", "In-State": "in_state", "Total_$": "contributions"}, inplace=True)
donors_18.head(1)

Unnamed: 0,candidate,candidate_id,General_Party,state,year,incumbency_status,contributor,contributor_id,type_of_contributor,in_state,contributions
0,"HUFFINES, PHILLIP WAYNE",229342,Republican,TX,2018,Open,"HUFFINES, PHILLIP WAYNE",11168692,Individual,1,5250000.0


Filter out unitemized donations as it is impossible to determine where those contributions originated.

In [49]:
donors_18 = donors_18[donors_18["contributor"] != "UNITEMIZED DONATIONS"]
donors_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 629441 entries, 0 to 631990
Data columns (total 11 columns):
candidate              629441 non-null object
candidate_id           629441 non-null int64
General_Party          629441 non-null object
state                  629441 non-null object
year                   629441 non-null int64
incumbency_status      629441 non-null object
contributor            629417 non-null object
contributor_id         629441 non-null int64
type_of_contributor    629441 non-null object
in_state               629441 non-null int64
contributions          629441 non-null float64
dtypes: float64(1), int64(4), object(6)
memory usage: 57.6+ MB


### Import and format state population data

Import 2017 population estimates from the Census Bureau.

In [85]:
query = "https://api.census.gov/data/2017/pep/population?get=POP,GEONAME&for=state:*&key="+census_key
population = pd.read_json(query)
population = population.drop(population.index[0]).reset_index(drop=True) # Drop first row containing column names
population.columns = ["population", "state", "fips"]
population["population"] = pd.to_numeric(population["population"])
population.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 3 columns):
population    52 non-null int64
state         52 non-null object
fips          52 non-null object
dtypes: int64(1), object(2)
memory usage: 1.3+ KB


In [86]:
population.head(1)

Unnamed: 0,population,state,fips
0,4874747,Alabama,1


We need to add state abbreviations in order to later join the population data with the campaign contributions data. To do so, we will use the [US package](https://github.com/unitedstates/python-us).

In [87]:
states = pd.DataFrame.from_dict(us.states.mapping("abbr", "fips"), orient="index", columns=["fips"])
states.reset_index(inplace=True)
states.rename(columns={"index": "abbreviation"}, inplace=True)
states.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59 entries, 0 to 58
Data columns (total 2 columns):
abbreviation    59 non-null object
fips            56 non-null object
dtypes: object(2)
memory usage: 1.0+ KB


In [88]:
population = population.merge(states, on="fips")
population.drop("state", axis=1, inplace=True)
population.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 52 entries, 0 to 51
Data columns (total 3 columns):
population      52 non-null int64
fips            52 non-null object
abbreviation    52 non-null object
dtypes: int64(1), object(2)
memory usage: 1.6+ KB


In [89]:
population.head(1)

Unnamed: 0,population,fips,abbreviation
0,4874747,1,AL


## Join and analyze the data