# Contributions from out-of-state donors

In [2]:
from functools import reduce
import numpy as np
import pandas as pd

%load_ext jupyternotify

pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 500)
pd.options.display.float_format = "{:,.2f}".format # Format floats

The jupyternotify extension is already loaded. To reload it, use:
  %reload_ext jupyternotify


Import [National Institute on Money in Politics](https://www.followthemoney.org/) API key.

In [2]:
nimp_key = open("nimp_api_key.txt", "r").readline()

## Import and format the data

### Import contribution-level data for gubernatorial, attorney general, secretary of state and state legislative races in 2018

In [None]:
%%notify
contribs_18 = pd.read_csv("https://www.followthemoney.org/aaengine/aafetch.php?dt=1&y=2018&c-exi=1&c-r-oc=Z10,Z70&c-r-ot=G,S,H&gro=s,d-id&APIKey="+nimp_key+"&mode=csv",
                          encoding="ISO-8859-1", error_bad_lines=False)
contribs_18.to_csv("data/contribs_18.csv", index=False)

In [3]:
contribs_18 = pd.read_csv("data/contribs_18.csv", encoding="ISO-8859-1", error_bad_lines=False)
contribs_18.info()

b'Skipping line 1099594: expected 49 fields, saw 50\n'
  interactivity=interactivity, compiler=compiler, result=result)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2118571 entries, 0 to 2118570
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 object
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         object
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           object
Specific_Business              object
General_Industry:token         object
General_Industry:id            object
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                object
Broad_Sector                   object
Amount:token                   object
Amount:id     

Convert the contribution amount column to numeric (float) data type.

In [4]:
contribs_18["Amount"] = pd.to_numeric(contribs_18["Amount"], errors="coerce")
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2118571 entries, 0 to 2118570
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 object
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         object
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           object
Specific_Business              object
General_Industry:token         object
General_Industry:id            object
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                object
Broad_Sector                   object
Amount:token                   object
Amount:id     

Convert the contribution date column to datetime data type.

In [5]:
contribs_18["Date"] = pd.to_datetime(contribs_18["Date"], errors="coerce")
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 2118571 entries, 0 to 2118570
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 object
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         object
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           object
Specific_Business              object
General_Industry:token         object
General_Industry:id            object
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                object
Broad_Sector                   object
Amount:token                   object
Amount:id     

Filter out unitemized donations as it is impossible to determine where those contributions originated. Then filter the data to just the state, contribution amount, contribution date and in-vs.-out-of-state columns.

In [6]:
contribs_18 = contribs_18[contribs_18["Contributor"] != "UNITEMIZED DONATIONS"]
contribs_18 = contribs_18[["Election_Jurisdiction", "Amount", "Date", "In-State"]]
contribs_18.rename(columns={"Election_Jurisdiction": "state", "Amount": "amount", "Date": "date", "In-State": "in_out_state"}, inplace=True)
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2089975 entries, 0 to 2118570
Data columns (total 4 columns):
state           object
amount          float64
date            datetime64[ns]
in_out_state    float64
dtypes: datetime64[ns](1), float64(2), object(1)
memory usage: 79.7+ MB


Rename the categories in the in-vs.-out-of-state column.

In [7]:
# 0 = out-of-state, 1 = in-state, 2 = unknown
contribs_18["in_out_state"] = contribs_18["in_out_state"].replace({0: "out-of-state", 1: "in-state", 2: "unknown"})
contribs_18.head(1)

Unnamed: 0,state,amount,date,in_out_state
0,IL,50000000.0,2016-12-20,in-state


Extract the month and year from the contribution date column.

In [8]:
contribs_18["month"] = contribs_18["date"].dt.to_period("M")
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2089975 entries, 0 to 2118570
Data columns (total 5 columns):
state           object
amount          float64
date            datetime64[ns]
in_out_state    object
month           object
dtypes: datetime64[ns](1), float64(1), object(3)
memory usage: 95.7+ MB


Group the contributions by state and month.

In [9]:
grouped_by_month = contribs_18.groupby(["state", "month"])["amount"].sum().reset_index()
contribs_18.drop("month", axis=1, inplace=True) # The month column is no longer needed
grouped_by_month.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 3 columns):
state     1460 non-null object
month     1460 non-null object
amount    1460 non-null float64
dtypes: float64(1), object(2)
memory usage: 34.3+ KB


Because we eventually want to use each state's month column as the cut-off date for contributions, we need to add a day to the month and the year and then convert the column into datetime data type.

In [10]:
grouped_by_month["month"] = grouped_by_month["month"].astype(str) + "-28" # No month has fewer than 28 days
grouped_by_month["month"] = pd.to_datetime(grouped_by_month["month"], errors="coerce")
grouped_by_month.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1460 entries, 0 to 1459
Data columns (total 3 columns):
state     1460 non-null object
month     1460 non-null datetime64[ns]
amount    1460 non-null float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 34.3+ KB


In [11]:
grouped_by_month.head()

Unnamed: 0,state,month,amount
0,AK,2017-04-28,223.93
1,AK,2017-05-28,1177.04
2,AK,2017-06-28,1750.0
3,AK,2017-07-28,9684.21
4,AK,2017-08-28,63999.19


We know some of the contribution dates are wrong. We know this because some of the dates occur in the future and, unless we've got some time travelling campaign donors, these are data entry errors. To eliminate this noise, we will filter out months after August 2018 or months with fewer than $2,500 in contributions. This latter filter will catch contributions that were miscoded as occuring during or before August 2018.

In [12]:
grouped_by_month = grouped_by_month[(grouped_by_month["month"] <= "2018-08-28") & (grouped_by_month["amount"] >= 2500)]
grouped_by_month.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1173 entries, 3 to 1459
Data columns (total 3 columns):
state     1173 non-null object
month     1173 non-null datetime64[ns]
amount    1173 non-null float64
dtypes: datetime64[ns](1), float64(1), object(1)
memory usage: 36.7+ KB


Return the most recent month with at least $2,500 in contributions for each state.

In [13]:
latest_month = grouped_by_month.groupby("state")["month"].max().reset_index()
latest_month.rename(columns={"month": "latest_month"}, inplace=True)
latest_month

Unnamed: 0,state,latest_month
0,AK,2018-07-28
1,AL,2018-07-28
2,AR,2018-03-28
3,AZ,2017-12-28
4,CA,2018-07-28
5,CO,2018-06-28
6,CT,2018-03-28
7,FL,2018-08-28
8,GA,2018-07-28
9,HI,2017-12-28


### Import contribution-level data for gubernatorial, attorney general, secretary of state and state legislative races in 2014

In [None]:
%%notify
contribs_14 = pd.read_csv("https://www.followthemoney.org/aaengine/aafetch.php?dt=1&y=2014&c-exi=1&c-r-oc=Z10,Z70&c-r-ot=G,S,H&gro=s,d-id&APIKey="+nimp_key+"&mode=csv",
                          encoding="ISO-8859-1", error_bad_lines=False)
contribs_14.to_csv("data/contribs_14.csv", index=False)

In [14]:
contribs_14 = pd.read_csv("data/contribs_14.csv", encoding="ISO-8859-1")
contribs_14.info()

  interactivity=interactivity, compiler=compiler, result=result)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3525121 entries, 0 to 3525120
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 int64
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         int64
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           int64
Specific_Business              object
General_Industry:token         object
General_Industry:id            int64
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                int64
Broad_Sector                   object
Amount:token                   object
Amount:id          

Convert contribution amount column to numeric (float) data type.

In [15]:
contribs_14["Amount"] = pd.to_numeric(contribs_14["Amount"], errors="coerce")
contribs_14.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3525121 entries, 0 to 3525120
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 int64
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         int64
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           int64
Specific_Business              object
General_Industry:token         object
General_Industry:id            int64
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                int64
Broad_Sector                   object
Amount:token                   object
Amount:id          

Convert the contribution date column to datetime data type.

In [16]:
contribs_14["Date"] = pd.to_datetime(contribs_14["Date"], errors="coerce")
contribs_14.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3525121 entries, 0 to 3525120
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 int64
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         int64
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           int64
Specific_Business              object
General_Industry:token         object
General_Industry:id            int64
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                int64
Broad_Sector                   object
Amount:token                   object
Amount:id          

Filter out unitemized donations as it is impossible to determine where those contributions originated. Then filter the data to just the state, contribution amount, contribution date and in-vs.-out-of-state columns.

In [17]:
contribs_14 = contribs_14[contribs_14["Contributor"] != "UNITEMIZED DONATIONS"]
contribs_14 = contribs_14[["Election_Jurisdiction", "Amount", "Date", "In-State"]]
contribs_14.rename(columns={"Election_Jurisdiction": "state", "Amount": "amount", "Date": "date", "In-State": "in_out_state"}, inplace=True)
contribs_14.head(1)

Unnamed: 0,state,amount,date,in_out_state
0,IL,10000000.0,2014-12-31,1


Rename the categories in the in-vs.-out-of-state column.

In [18]:
# 0 = out-of-state, 1 = in-state, 2 = unknown
contribs_14["in_out_state"] = contribs_14["in_out_state"].replace({0: "out-of-state", 1: "in-state", 2: "unknown"})
contribs_14.head(1)

Unnamed: 0,state,amount,date,in_out_state
0,IL,10000000.0,2014-12-31,in-state


### Import contribution-level data for gubernatorial, attorney general, secretary of state and state legislative races in 2010

In [None]:
%%notify
contribs_10 = pd.read_csv("https://www.followthemoney.org/aaengine/aafetch.php?dt=1&y=2010&c-exi=1&c-r-oc=Z10,Z70&c-r-ot=G,S,H&gro=s,d-id&APIKey="+nimp_key+"&mode=csv",
                          encoding="ISO-8859-1")
contribs_10.to_csv("data/contribs_10.csv", index=False)

In [19]:
contribs_10 = pd.read_csv("data/contribs_10.csv", encoding="ISO-8859-1")
contribs_10.info()

  interactivity=interactivity, compiler=compiler, result=result)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3459166 entries, 0 to 3459165
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 int64
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         int64
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           int64
Specific_Business              object
General_Industry:token         object
General_Industry:id            int64
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                int64
Broad_Sector                   object
Amount:token                   object
Amount:id          

Convert contribution amount column to numeric (float) data type.

In [20]:
contribs_10["Amount"] = pd.to_numeric(contribs_10["Amount"], errors="coerce")
contribs_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3459166 entries, 0 to 3459165
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 int64
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         int64
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           int64
Specific_Business              object
General_Industry:token         object
General_Industry:id            int64
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                int64
Broad_Sector                   object
Amount:token                   object
Amount:id          

Convert the contribution date column to datetime data type.

In [21]:
contribs_10["Date"] = pd.to_datetime(contribs_10["Date"], errors="coerce")
contribs_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3459166 entries, 0 to 3459165
Data columns (total 49 columns):
request                        object
Election_Jurisdiction:token    object
Election_Jurisdiction:id       object
Election_Jurisdiction          object
Original_Name:token            object
Original_Name:id               int64
Original_Name                  object
Contributor:token              object
Contributor:id                 int64
Contributor                    object
Type_of_Contributor:token      object
Type_of_Contributor:id         int64
Type_of_Contributor            object
Specific_Business:token        object
Specific_Business:id           int64
Specific_Business              object
General_Industry:token         object
General_Industry:id            int64
General_Industry               object
Broad_Sector:token             object
Broad_Sector:id                int64
Broad_Sector                   object
Amount:token                   object
Amount:id          

Filter out unitemized donations as it is impossible to determine where those contributions originated. Then filter the data to just the state, contribution amount, contribution date and in-vs.-out-of-state columns.

In [22]:
contribs_10 = contribs_10[contribs_10["Contributor"] != "UNITEMIZED DONATIONS"]
contribs_10 = contribs_10[["Election_Jurisdiction", "Amount", "Date", "In-State"]]
contribs_10.rename(columns={"Election_Jurisdiction": "state", "Amount": "amount", "Date": "date", "In-State": "in_out_state"}, inplace=True)
contribs_10.head(1)

Unnamed: 0,state,amount,date,in_out_state
0,CA,20000000.0,2010-01-19,1


Rename the categories in the in-vs.-out-of-state column.

In [23]:
# 0 = out-of-state, 1 = in-state, 2 = unknown
contribs_10["in_out_state"] = contribs_10["in_out_state"].replace({0: "out-of-state", 1: "in-state", 2: "unknown"})
contribs_10.head(1)

Unnamed: 0,state,amount,date,in_out_state
0,CA,20000000.0,2010-01-19,in-state


### Import contributor-level data for gubernatorial, attorney general, secretary of state and state legislative races in 2018

In [None]:
%%notify
contributors_18 = pd.read_csv("https://www.followthemoney.org/aaengine/aafetch.php?dt=1&y=2018&c-exi=1&c-r-oc=Z10,Z70&c-r-ot=G,S,H&gro=c-t-id,d-eid,d-ins&APIKey="+nimp_key+"&mode=csv",
                          encoding="ISO-8859-1", error_bad_lines=False)
contributors_18.to_csv("data/candidates_18.csv", index=False)

In [24]:
contributors_18 = pd.read_csv("data/contributors_18.csv", encoding="ISO-8859-1")
contributors_18.info()

  interactivity=interactivity, compiler=compiler, result=result)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1510733 entries, 0 to 1510732
Data columns (total 45 columns):
request                        1510733 non-null object
Candidate:token                1510733 non-null object
Candidate:id                   1510733 non-null int64
Candidate                      1510733 non-null object
Candidate_Entity:token         1510733 non-null object
Candidate_Entity:id            1510733 non-null object
Candidate_Entity               1510733 non-null object
Election_Status:token          1510733 non-null object
Election_Status:id             1510733 non-null object
Election_Status                1510733 non-null object
Status_of_Candidate:token      1510733 non-null object
Status_of_Candidate:id         1510733 non-null int64
Status_of_Candidate            1510733 non-null object
Specific_Party:token           1510733 non-null object
Specific_Party:id              1510733 non-null int64
Specific_Party                 1510733 non-null object
General_Pa

Filter out unitemized donations as it is impossible to determine where those contributions originated. Then filter the data to just the candidate, candidate ID, state, district, in-vs.-out-of-state and total contributions columns.

In [25]:
contributors_18 = contributors_18[contributors_18["Contributor"] != "UNITEMIZED DONATIONS"]
contributors_18 = contributors_18[["Candidate", "Candidate:id", "Election_Jurisdiction", "Office_Sought", "In-State", "Total_$"]]
contributors_18.rename(columns={"Candidate": "candidate", "Candidate:id": "candidate_id", "Election_Jurisdiction": "state", "Office_Sought": "district", "In-State": "in_out_state", "Total_$": "contributions"}, inplace=True)
contributors_18.head(1)

Unnamed: 0,candidate,candidate_id,state,district,in_out_state,contributions
0,"PRITZKER, JAY ROBERT (J B) & STRATTON, JULIANA W",223590,IL,GOVERNOR / LIEUTENANT GOVERNOR,1,106500034.95


Rename the categories in the in-vs.-out-of-state column.

In [26]:
# 0 = out-of-state, 1 = in-state, 2 = unknown
contributors_18["in_out_state"] = contributors_18["in_out_state"].replace({0: "out-of-state", 1: "in-state", 2: "unknown"})
contributors_18.head(1)

Unnamed: 0,candidate,candidate_id,state,district,in_out_state,contributions
0,"PRITZKER, JAY ROBERT (J B) & STRATTON, JULIANA W",223590,IL,GOVERNOR / LIEUTENANT GOVERNOR,in-state,106500034.95


## Join and filter the data

### Join the table of the 2018 cycle's latest contribution months with the 2018, 2014 and 2010 contribution-level data

In [27]:
contribs_18 = contribs_18.merge(latest_month, on="state")
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2089946 entries, 0 to 2089945
Data columns (total 5 columns):
state           object
amount          float64
date            datetime64[ns]
in_out_state    object
latest_month    datetime64[ns]
dtypes: datetime64[ns](2), float64(1), object(2)
memory usage: 95.7+ MB


In [28]:
contribs_18.head(1)

Unnamed: 0,state,amount,date,in_out_state,latest_month
0,IL,50000000.0,2016-12-20,in-state,2018-06-28


In [29]:
contribs_14 = contribs_14.merge(latest_month, on="state")
contribs_14.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3399817 entries, 0 to 3399816
Data columns (total 5 columns):
state           object
amount          float64
date            datetime64[ns]
in_out_state    object
latest_month    datetime64[ns]
dtypes: datetime64[ns](2), float64(1), object(2)
memory usage: 155.6+ MB


Convert the year in the latest month column to its equivalent in the relevant election cycle.

In [30]:
# 2017 = 2013, 2018 = 2014
contribs_14["latest_month"] = contribs_14["latest_month"].mask(contribs_14["latest_month"].dt.year == 2017, contribs_14["latest_month"] + pd.offsets.DateOffset(year=2013))
contribs_14["latest_month"] = contribs_14["latest_month"].mask(contribs_14["latest_month"].dt.year == 2018, contribs_14["latest_month"] + pd.offsets.DateOffset(year=2014))
contribs_14.head(1)



Unnamed: 0,state,amount,date,in_out_state,latest_month
0,IL,10000000.0,2014-12-31,in-state,2014-06-28


In [31]:
contribs_10 = contribs_10.merge(latest_month, on="state")
contribs_10.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 3329803 entries, 0 to 3329802
Data columns (total 5 columns):
state           object
amount          float64
date            datetime64[ns]
in_out_state    object
latest_month    datetime64[ns]
dtypes: datetime64[ns](2), float64(1), object(2)
memory usage: 152.4+ MB


Convert the year in the latest month column to its equivalent in the relevant election cycle.

In [32]:
# 2017 = 2009, 2018 = 2010
contribs_10["latest_month"] = contribs_10["latest_month"].mask(contribs_10["latest_month"].dt.year == 2017, contribs_10["latest_month"] + pd.offsets.DateOffset(year=2009))
contribs_10["latest_month"] = contribs_10["latest_month"].mask(contribs_10["latest_month"].dt.year == 2018, contribs_10["latest_month"] + pd.offsets.DateOffset(year=2010))
contribs_10.head(1)



Unnamed: 0,state,amount,date,in_out_state,latest_month
0,CA,20000000.0,2010-01-19,in-state,2010-07-28


In [33]:
contribs_10.head(1)

Unnamed: 0,state,amount,date,in_out_state,latest_month
0,CA,20000000.0,2010-01-19,in-state,2010-07-28


### Filter the data to eliminate contributions after the 2018 cycle's latest contribution month in each state

In [34]:
contribs_18 = contribs_18[contribs_18["date"] <= contribs_18["latest_month"]]
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2073429 entries, 0 to 2089944
Data columns (total 5 columns):
state           object
amount          float64
date            datetime64[ns]
in_out_state    object
latest_month    datetime64[ns]
dtypes: datetime64[ns](2), float64(1), object(2)
memory usage: 94.9+ MB


In [35]:
contribs_18.head(1)

Unnamed: 0,state,amount,date,in_out_state,latest_month
0,IL,50000000.0,2016-12-20,in-state,2018-06-28


In [36]:
contribs_14 = contribs_14[contribs_14["date"] <= contribs_14["latest_month"]]
contribs_14.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1802201 entries, 3 to 3399729
Data columns (total 5 columns):
state           object
amount          float64
date            datetime64[ns]
in_out_state    object
latest_month    datetime64[ns]
dtypes: datetime64[ns](2), float64(1), object(2)
memory usage: 82.5+ MB


In [37]:
contribs_14.head(1)

Unnamed: 0,state,amount,date,in_out_state,latest_month
3,IL,2500000.0,2014-06-11,in-state,2014-06-28


In [38]:
contribs_10 = contribs_10[contribs_10["date"] <= contribs_10["latest_month"]]
contribs_10.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1748378 entries, 0 to 3329802
Data columns (total 5 columns):
state           object
amount          float64
date            datetime64[ns]
in_out_state    object
latest_month    datetime64[ns]
dtypes: datetime64[ns](2), float64(1), object(2)
memory usage: 80.0+ MB


In [39]:
contribs_10.head(1)

Unnamed: 0,state,amount,date,in_out_state,latest_month
0,CA,20000000.0,2010-01-19,in-state,2010-07-28


## Analyze the data

### Calculate the proportion of contributions from out-of-state each election cycle

### Calculate out-of-state contributions by state in 2018

Calculate contributions by in-vs.-out-of-state status and group by state.

In [77]:
contribs_by_state_18 = contribs_18.groupby(["state", "in_out_state"])["amount"].sum().reset_index()
contribs_by_state_18.rename(columns={"amount": "contributions_18"}, inplace=True)
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 120 entries, 0 to 119
Data columns (total 3 columns):
state               120 non-null object
in_out_state        120 non-null object
contributions_18    120 non-null float64
dtypes: float64(1), object(2)
memory usage: 2.9+ KB


In [78]:
contribs_by_state_18.head()

Unnamed: 0,state,in_out_state,contributions_18
0,AK,in-state,3761979.27
1,AK,out-of-state,185829.48
2,AK,unknown,9343.86
3,AL,in-state,38606724.15
4,AL,out-of-state,3509914.99


Pivot dataframe to aggregate each state's data in a single row.

In [79]:
contribs_by_state_18 = pd.pivot_table(contribs_by_state_18, index=["state"], columns=["in_out_state"]).reset_index()
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 4 columns):
(state, )                           42 non-null object
(contributions_18, in-state)        41 non-null float64
(contributions_18, out-of-state)    41 non-null float64
(contributions_18, unknown)         38 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


It appears that some states have no contributions categorized as "unknown." Let's set those values equal to zero to be sure any calculations we run on them are correct.

In [80]:
contribs_by_state_18.fillna(0, inplace=True)
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 42 entries, 0 to 41
Data columns (total 4 columns):
(state, )                           42 non-null object
(contributions_18, in-state)        42 non-null float64
(contributions_18, out-of-state)    42 non-null float64
(contributions_18, unknown)         42 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


Flatten the resulting dataframe's multi-index columns.

In [81]:
contribs_by_state_18.columns = ["_".join(column).replace("-","_").strip("_") for column in contribs_by_state_18.columns.values]
contribs_by_state_18 = contribs_by_state_18.merge(latest_month, on="state")
contribs_by_state_18.rename(columns={"latest_month": "latest_month_18"}, inplace=True)
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 42 entries, 0 to 41
Data columns (total 5 columns):
state                            42 non-null object
contributions_18_in_state        42 non-null float64
contributions_18_out_of_state    42 non-null float64
contributions_18_unknown         42 non-null float64
latest_month_18                  42 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(3), object(1)
memory usage: 2.0+ KB


Calculate the proportion of out-of-state contributions.

In [82]:
contribs_by_state_18["pct_18_out_of_state"] = contribs_by_state_18["contributions_18_out_of_state"] / (contribs_by_state_18["contributions_18_in_state"] + contribs_by_state_18["contributions_18_out_of_state"] + contribs_by_state_18["contributions_18_unknown"])
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 42 entries, 0 to 41
Data columns (total 6 columns):
state                            42 non-null object
contributions_18_in_state        42 non-null float64
contributions_18_out_of_state    42 non-null float64
contributions_18_unknown         42 non-null float64
latest_month_18                  42 non-null datetime64[ns]
pct_18_out_of_state              42 non-null float64
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 2.3+ KB


### Calculate out-of-state contributions by state in 2014

Calculate contributions by in-vs.-out-of-state status and group by state.

In [89]:
contribs_by_state_14 = contribs_14.groupby(["state", "in_out_state"])["amount"].sum().reset_index()
contribs_by_state_14.rename(columns={"amount": "contributions_14"}, inplace=True)
contribs_by_state_14.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 121 entries, 0 to 120
Data columns (total 3 columns):
state               121 non-null object
in_out_state        121 non-null object
contributions_14    121 non-null float64
dtypes: float64(1), object(2)
memory usage: 2.9+ KB


In [90]:
contribs_by_state_14.head()

Unnamed: 0,state,in_out_state,contributions_14
0,AK,in-state,4317169.35
1,AK,out-of-state,225928.05
2,AK,unknown,200.0
3,AL,in-state,34227125.33
4,AL,out-of-state,2813671.25


Pivot dataframe to aggregate each state's data in a single row.

In [91]:
contribs_by_state_14 = pd.pivot_table(contribs_by_state_14, index=["state"], columns=["in_out_state"]).reset_index()
contribs_by_state_14.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
(state, )                           41 non-null object
(contributions_14, in-state)        41 non-null float64
(contributions_14, out-of-state)    41 non-null float64
(contributions_14, unknown)         39 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


It appears that some states have no contributions categorized as "unknown." Let's set those values equal to zero to be sure any calculations we run on them are correct.

In [92]:
contribs_by_state_14.fillna(0, inplace=True)
contribs_by_state_14.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
(state, )                           41 non-null object
(contributions_14, in-state)        41 non-null float64
(contributions_14, out-of-state)    41 non-null float64
(contributions_14, unknown)         41 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


Flatten the resulting dataframe's multi-index columns.

In [93]:
contribs_by_state_14.columns = ["_".join(column).replace("-","_").strip("_") for column in contribs_by_state_14.columns.values]
contribs_by_state_14 = contribs_by_state_14.merge(latest_month, on="state")
contribs_by_state_14.rename(columns={"latest_month": "latest_month_14"}, inplace=True)
contribs_by_state_14.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41 entries, 0 to 40
Data columns (total 5 columns):
state                            41 non-null object
contributions_14_in_state        41 non-null float64
contributions_14_out_of_state    41 non-null float64
contributions_14_unknown         41 non-null float64
latest_month_14                  41 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(3), object(1)
memory usage: 1.9+ KB


Calculate the proportion of out-of-state contributions.

In [94]:
contribs_by_state_14["pct_14_out_of_state"] = contribs_by_state_14["contributions_14_out_of_state"] / (contribs_by_state_14["contributions_14_in_state"] + contribs_by_state_14["contributions_14_out_of_state"] + contribs_by_state_14["contributions_14_unknown"])
contribs_by_state_14.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41 entries, 0 to 40
Data columns (total 6 columns):
state                            41 non-null object
contributions_14_in_state        41 non-null float64
contributions_14_out_of_state    41 non-null float64
contributions_14_unknown         41 non-null float64
latest_month_14                  41 non-null datetime64[ns]
pct_14_out_of_state              41 non-null float64
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 2.2+ KB


### Calculate out-of-state contributions by state in 2010

Calculate contributions by in-vs.-out-of-state status and group by state.

In [95]:
contribs_by_state_10 = contribs_10.groupby(["state", "in_out_state"])["amount"].sum().reset_index()
contribs_by_state_10.rename(columns={"amount": "contributions_10"}, inplace=True)
contribs_by_state_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 117 entries, 0 to 116
Data columns (total 3 columns):
state               117 non-null object
in_out_state        117 non-null object
contributions_10    117 non-null float64
dtypes: float64(1), object(2)
memory usage: 2.8+ KB


In [96]:
contribs_by_state_10.head()

Unnamed: 0,state,in_out_state,contributions_10
0,AK,in-state,4453472.71
1,AK,out-of-state,220586.95
2,AK,unknown,3417.35
3,AL,in-state,49265134.88
4,AL,out-of-state,2549822.9


Pivot dataframe to aggregate each state's data in a single row.

In [97]:
contribs_by_state_10 = pd.pivot_table(contribs_by_state_10, index=["state"], columns=["in_out_state"]).reset_index()
contribs_by_state_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
(state, )                           41 non-null object
(contributions_10, in-state)        41 non-null float64
(contributions_10, out-of-state)    41 non-null float64
(contributions_10, unknown)         35 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


It appears that some states have no contributions categorized as "unknown." Let's set those values equal to zero to be sure any calculations we run on them are correct.

In [98]:
contribs_by_state_10.fillna(0, inplace=True)
contribs_by_state_10.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
(state, )                           41 non-null object
(contributions_10, in-state)        41 non-null float64
(contributions_10, out-of-state)    41 non-null float64
(contributions_10, unknown)         41 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


Flatten the resulting dataframe's multi-index columns.

In [99]:
contribs_by_state_10.columns = ["_".join(column).replace("-","_").strip("_") for column in contribs_by_state_10.columns.values]
contribs_by_state_10 = contribs_by_state_10.merge(latest_month, on="state")
contribs_by_state_10.rename(columns={"latest_month": "latest_month_10"}, inplace=True)
contribs_by_state_10.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41 entries, 0 to 40
Data columns (total 5 columns):
state                            41 non-null object
contributions_10_in_state        41 non-null float64
contributions_10_out_of_state    41 non-null float64
contributions_10_unknown         41 non-null float64
latest_month_10                  41 non-null datetime64[ns]
dtypes: datetime64[ns](1), float64(3), object(1)
memory usage: 1.9+ KB


Calculate the proportion of out-of-state contributions.

In [100]:
contribs_by_state_10["pct_10_out_of_state"] = contribs_by_state_10["contributions_10_out_of_state"] / (contribs_by_state_10["contributions_10_in_state"] + contribs_by_state_10["contributions_10_out_of_state"] + contribs_by_state_10["contributions_10_unknown"])
contribs_by_state_10.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41 entries, 0 to 40
Data columns (total 6 columns):
state                            41 non-null object
contributions_10_in_state        41 non-null float64
contributions_10_out_of_state    41 non-null float64
contributions_10_unknown         41 non-null float64
latest_month_10                  41 non-null datetime64[ns]
pct_10_out_of_state              41 non-null float64
dtypes: datetime64[ns](1), float64(4), object(1)
memory usage: 2.2+ KB


### Join the 2018, 2014 and 2010 contributions by state data and calculate out-of-state figures

In [101]:
list_of_contribs_by_state = [contribs_by_state_18, contribs_by_state_14, contribs_by_state_10]
contribs_by_state = reduce(lambda left, right: pd.merge(left, right, on="state"), list_of_contribs_by_state)
contribs_by_state

Unnamed: 0,state,contributions_18_in_state,contributions_18_out_of_state,contributions_18_unknown,latest_month_18,pct_18_out_of_state,contributions_14_in_state,contributions_14_out_of_state,contributions_14_unknown,latest_month_14,pct_14_out_of_state,contributions_10_in_state,contributions_10_out_of_state,contributions_10_unknown,latest_month_10,pct_10_out_of_state
0,AK,3761979.27,185829.48,9343.86,2018-07-28,0.05,4317169.35,225928.05,200.0,2018-07-28,0.05,4453472.71,220586.95,3417.35,2018-07-28,0.05
1,AL,38606724.15,3509914.99,750342.89,2018-07-28,0.08,34227125.33,2813671.25,28102.53,2018-07-28,0.08,49265134.88,2549822.9,203308.72,2018-07-28,0.05
2,AR,8041656.93,1028099.01,101023.95,2018-03-28,0.11,10815624.44,1540814.5,108411.96,2018-03-28,0.12,6519555.62,812923.84,39828.32,2018-03-28,0.11
3,AZ,6393754.83,1485774.55,112341.49,2017-12-28,0.19,4636616.0,761785.23,2950.0,2017-12-28,0.14,1646015.09,159145.71,0.0,2017-12-28,0.09
4,CA,161377400.97,21986862.92,206490.11,2018-07-28,0.12,97202680.89,13749369.08,32338.83,2018-07-28,0.12,244977981.11,15029650.99,22382.81,2018-07-28,0.06
5,CO,29453109.87,3803974.94,129842.15,2018-06-28,0.11,9441948.59,1378513.97,5186.67,2018-06-28,0.13,7057970.04,908339.73,-5119.09,2018-06-28,0.11
6,CT,2712839.12,119512.0,150.0,2018-03-28,0.04,851386.61,76075.0,1621.82,2018-03-28,0.08,3263032.53,354954.55,50.0,2018-03-28,0.1
7,FL,102715412.45,6938527.22,2520372.21,2018-08-28,0.06,39664617.0,3839833.59,69790.88,2018-08-28,0.09,96462601.21,4768849.13,170402.38,2018-08-28,0.05
8,GA,37067696.47,5873099.41,13850.0,2018-07-28,0.14,26639081.37,5655233.06,1000.0,2018-07-28,0.18,35195924.45,5324416.95,1925.0,2018-07-28,0.13
9,HI,3384761.81,521055.0,191284.31,2017-12-28,0.13,3832319.31,728060.12,0.0,2017-12-28,0.16,1712958.67,275440.0,0.0,2017-12-28,0.14


Calculate the change in out-of-state spending, in absolute and proportional terms.

In [102]:
contribs_by_state["change_from_14"] = contribs_by_state["contributions_18_out_of_state"] - contribs_by_state["contributions_14_out_of_state"]
contribs_by_state["change_from_10"] = contribs_by_state["contributions_18_out_of_state"] - contribs_by_state["contributions_10_out_of_state"]
contribs_by_state["pct_change_from_14"] = (contribs_by_state["contributions_18_out_of_state"] - contribs_by_state["contributions_14_out_of_state"]) / contribs_by_state["contributions_14_out_of_state"]
contribs_by_state["pct_change_from_10"] = (contribs_by_state["contributions_18_out_of_state"] - contribs_by_state["contributions_10_out_of_state"]) / contribs_by_state["contributions_10_out_of_state"]

Convert the year in the latest month column to its equivalent in the relevant election cycle.

In [104]:
contribs_by_state["latest_month_14"] = contribs_by_state["latest_month_14"].mask(contribs_by_state["latest_month_14"].dt.year == 2017, contribs_by_state["latest_month_14"] + pd.offsets.DateOffset(year=2013))
contribs_by_state["latest_month_14"] = contribs_by_state["latest_month_14"].mask(contribs_by_state["latest_month_14"].dt.year == 2018, contribs_by_state["latest_month_14"] + pd.offsets.DateOffset(year=2014))
contribs_by_state["latest_month_10"] = contribs_by_state["latest_month_10"].mask(contribs_by_state["latest_month_10"].dt.year == 2017, contribs_by_state["latest_month_10"] + pd.offsets.DateOffset(year=2009))
contribs_by_state["latest_month_10"] = contribs_by_state["latest_month_10"].mask(contribs_by_state["latest_month_10"].dt.year == 2018, contribs_by_state["latest_month_10"] + pd.offsets.DateOffset(year=2010))
contribs_by_state.head(1)



Unnamed: 0,state,contributions_18_in_state,contributions_18_out_of_state,contributions_18_unknown,latest_month_18,pct_18_out_of_state,contributions_14_in_state,contributions_14_out_of_state,contributions_14_unknown,latest_month_14,pct_14_out_of_state,contributions_10_in_state,contributions_10_out_of_state,contributions_10_unknown,latest_month_10,pct_10_out_of_state,change_from_14,change_from_10,pct_change_from_14,pct_change_from_10
0,AK,3761979.27,185829.48,9343.86,2018-07-28,0.05,4317169.35,225928.05,200.0,2014-07-28,0.05,4453472.71,220586.95,3417.35,2010-07-28,0.05,-40098.57,-34757.47,-0.18,-0.16


Export the data to Excel.

In [105]:
contribs_by_state.to_excel("data/contribs_by_state.xlsx", index=False)

### Calculate out-of-state contributions by candidate in 2018

In [106]:
contribs_by_candidate_18 = contributors_18.groupby(["candidate", "state", "district", "in_out_state"])["contributions"].sum().reset_index()
contribs_by_candidate_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 14697 entries, 0 to 14696
Data columns (total 5 columns):
candidate        14697 non-null object
state            14697 non-null object
district         14697 non-null object
in_out_state     14697 non-null object
contributions    14697 non-null float64
dtypes: float64(1), object(4)
memory usage: 574.2+ KB


In [107]:
contribs_by_candidate_18.head()

Unnamed: 0,candidate,state,district,in_out_state,contributions
0,"ABBOTT, DAVID H",IN,HOUSE DISTRICT 082,in-state,26065.0
1,"ABBOTT, GHERT",AK,HOUSE DISTRICT 036,in-state,45.9
2,"ABBOTT, GREG",TX,GOVERNOR,in-state,61189628.95
3,"ABBOTT, GREG",TX,GOVERNOR,out-of-state,4590344.58
4,"ABBOTT, GREG",TX,GOVERNOR,unknown,1020.0


Pivot dataframe to aggregate each candidate's data in a single row.

In [108]:
contribs_by_candidate_18 = pd.pivot_table(contribs_by_candidate_18, index=["candidate", "state", "district"], columns=["in_out_state"]).reset_index()
contribs_by_candidate_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7398 entries, 0 to 7397
Data columns (total 6 columns):
(candidate, )                    7398 non-null object
(state, )                        7398 non-null object
(district, )                     7398 non-null object
(contributions, in-state)        7354 non-null float64
(contributions, out-of-state)    5804 non-null float64
(contributions, unknown)         1539 non-null float64
dtypes: float64(3), object(3)
memory usage: 346.9+ KB


Again, some records have no contributions for certain categories. Let's set those values equal to zero to be sure any calculations we run on them are correct.

In [109]:
contribs_by_candidate_18.fillna(0, inplace=True)
contribs_by_candidate_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7398 entries, 0 to 7397
Data columns (total 6 columns):
(candidate, )                    7398 non-null object
(state, )                        7398 non-null object
(district, )                     7398 non-null object
(contributions, in-state)        7398 non-null float64
(contributions, out-of-state)    7398 non-null float64
(contributions, unknown)         7398 non-null float64
dtypes: float64(3), object(3)
memory usage: 346.9+ KB


Flatten the resulting dataframe's multi-index columns.

In [110]:
contribs_by_candidate_18.columns = ["_".join(column).replace("-","_").strip("_") for column in contribs_by_candidate_18.columns.values]
contribs_by_candidate_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7398 entries, 0 to 7397
Data columns (total 6 columns):
candidate                     7398 non-null object
state                         7398 non-null object
district                      7398 non-null object
contributions_in_state        7398 non-null float64
contributions_out_of_state    7398 non-null float64
contributions_unknown         7398 non-null float64
dtypes: float64(3), object(3)
memory usage: 346.9+ KB


Calculate the proportion of in-vs.-out-of-state contributions.

In [111]:
contribs_by_candidate_18["pct_out_of_state"] = contribs_by_candidate_18["contributions_out_of_state"] / (contribs_by_candidate_18["contributions_in_state"] + contribs_by_candidate_18["contributions_out_of_state"] + contribs_by_candidate_18["contributions_unknown"])
contribs_by_candidate_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7398 entries, 0 to 7397
Data columns (total 7 columns):
candidate                     7398 non-null object
state                         7398 non-null object
district                      7398 non-null object
contributions_in_state        7398 non-null float64
contributions_out_of_state    7398 non-null float64
contributions_unknown         7398 non-null float64
pct_out_of_state              7398 non-null float64
dtypes: float64(4), object(3)
memory usage: 404.7+ KB


Export the data to Excel.

In [112]:
contribs_by_candidate_18.to_excel("data/contribs_by_candidate_18.xlsx", index=False)

## Key findings

In [129]:
contribs_18["amount"].sum()

1438439125.7900002

In [126]:
contribs_18.groupby("in_out_state")["amount"].sum()

in_out_state
in-state        1.265455e+09
out-of-state    1.604762e+08
unknown         1.250780e+07
Name: amount, dtype: float64

In [131]:
contribs_14["amount"].sum()

966983285.76999962

In [132]:
contribs_14.groupby("in_out_state")["amount"].sum()

in_out_state
in-state        8.109896e+08
out-of-state    1.371495e+08
unknown         1.884424e+07
Name: amount, dtype: float64

In [133]:
contribs_10["amount"].sum()

1126987443.3900001

In [134]:
contribs_10.groupby("in_out_state")["amount"].sum()

in_out_state
in-state        1.022590e+09
out-of-state    1.009673e+08
unknown         3.430096e+06
Name: amount, dtype: float64

In [135]:
contribs_by_state.sort_values("pct_18_out_of_state", ascending=False)

Unnamed: 0,state,contributions_18_in_state,contributions_18_out_of_state,contributions_18_unknown,pct_18_out_of_state,contributions_14_in_state,contributions_14_out_of_state,contributions_14_unknown,pct_14_out_of_state,contributions_10_in_state,contributions_10_out_of_state,contributions_10_unknown,pct_10_out_of_state,change_from_14,change_from_10,pct_change_from_14,pct_change_from_10
37,VT,9.99,4000.0,0.0,0.997509,1260367.0,911995.8,5620.0,0.418734,3836951.0,1192212.15,7385.83,0.236712,-907995.8,-1188212.15,-0.995614,-0.996645
32,RI,2651880.0,2459387.77,98549.45,0.472068,3684281.0,1699150.13,23100.0,0.314277,2307368.0,424549.59,8450.0,0.154924,760237.64,2034838.18,0.447422,4.792934
39,WI,23028800.0,13290424.62,576157.06,0.360219,17002200.0,10926356.89,23416.32,0.390898,16635770.0,1296884.24,18562.0,0.072245,2364067.73,11993540.38,0.216364,9.247965
23,NE,3656762.0,1554718.25,2093.77,0.298206,10504220.0,2231676.23,17918.0,0.174981,3164239.0,1126667.91,-396088.72,0.289274,-676957.98,428050.34,-0.303341,0.379926
10,IA,20638010.0,8581899.73,14446.39,0.293555,10798730.0,3307637.45,100.0,0.234477,15307590.0,4803934.4,166051.18,0.236909,5274262.28,3777965.33,1.594571,0.786431
24,NH,747706.2,306858.75,1570.0,0.290549,932764.5,434858.03,11175.0,0.315389,380930.8,134310.0,6560.0,0.257397,-127999.28,172548.75,-0.294347,1.284705
14,KS,4196275.0,1258336.03,20493.02,0.229829,3258274.0,1013394.73,19633.38,0.236151,2133664.0,945861.8,3257.0,0.306821,244941.3,312474.23,0.241704,0.330359
17,MD,43937580.0,13076554.31,2293750.12,0.220486,38309120.0,8532791.38,1212248.96,0.177566,25725630.0,6099789.23,23866.6,0.19152,4543762.93,6976765.08,0.532506,1.143772
38,WA,2505002.0,702800.1,16364.36,0.217979,3996551.0,865847.06,6328.92,0.177838,3960453.0,841414.25,9874.09,0.174867,-163046.96,-138614.15,-0.188309,-0.164739
26,NV,4472228.0,1243646.0,15305.0,0.216997,7816388.0,1883495.77,16275.0,0.193852,4115241.0,698652.96,52485.0,0.143567,-639849.77,544993.04,-0.339714,0.780063


### In which states has the proportion of out-of-state funding increased the most relative to 2014?

In [114]:
contribs_by_state.sort_values("pct_change_from_14", ascending=False)

Unnamed: 0,state,contributions_18_in_state,contributions_18_out_of_state,contributions_18_unknown,pct_18_out_of_state,contributions_14_in_state,contributions_14_out_of_state,contributions_14_unknown,pct_14_out_of_state,contributions_10_in_state,contributions_10_out_of_state,contributions_10_unknown,pct_10_out_of_state,change_from_14,change_from_10,pct_change_from_14,pct_change_from_10
5,CO,29453110.0,3803974.94,129842.15,0.113936,9441949.0,1378513.97,5186.67,0.127338,7057970.0,908339.73,-5119.09,0.114096,2425460.97,2895635.21,1.759475,3.187833
10,IA,20638010.0,8581899.73,14446.39,0.293555,10798730.0,3307637.45,100.0,0.234477,15307590.0,4803934.4,166051.18,0.236909,5274262.28,3777965.33,1.594571,0.786431
30,OR,12724110.0,3445710.62,166815.0,0.210919,9610847.0,1473960.91,7081.34,0.132886,9392901.0,1773034.32,3390.0,0.158741,1971749.71,1672676.3,1.337722,0.943398
40,WY,334150.2,33650.0,293.0,0.091417,37112.14,15375.0,250.0,0.29154,288423.3,26915.0,0.0,0.085353,18275.0,6735.0,1.188618,0.250232
19,MI,50110970.0,5249713.25,115489.04,0.09463,34605940.0,2525524.46,20428.25,0.067978,38169800.0,1739881.69,41641.22,0.04355,2724188.79,3509831.56,1.078663,2.017282
3,AZ,6393755.0,1485774.55,112341.49,0.185911,4636616.0,761785.23,2950.0,0.141036,1646015.0,159145.71,0.0,0.088162,723989.32,1326628.84,0.950385,8.335938
7,FL,102715400.0,6938527.22,2520372.21,0.061855,39664620.0,3839833.59,69790.88,0.088122,96462600.0,4768849.13,170402.38,0.047029,3098693.63,2169678.09,0.806986,0.454969
4,CA,161377400.0,21986862.92,206490.11,0.119773,97202680.0,13749369.08,32338.83,0.123886,244978000.0,15029650.99,22382.81,0.0578,8237493.84,6957211.93,0.599118,0.462899
6,CT,2712839.0,119512.0,150.0,0.042193,851386.6,76075.0,1621.82,0.081882,3263033.0,354954.55,50.0,0.098107,43437.0,-235442.55,0.570976,-0.663303
17,MD,43937580.0,13076554.31,2293750.12,0.220486,38309120.0,8532791.38,1212248.96,0.177566,25725630.0,6099789.23,23866.6,0.19152,4543762.93,6976765.08,0.532506,1.143772


The proportion of out-of-state funding has more than doubled in five states (Colorado, Iowa, Oregon, Wyoming and Michigan) as compared with this point in the 2014 cycle.

### In which states has the proportion of out-of-state funding increased the most relative to 2010?

In [143]:
contribs_by_state.sort_values("pct_change_from_10", ascending=False)

Unnamed: 0,state,contributions_18_in_state,contributions_18_out_of_state,contributions_18_unknown,pct_18_out_of_state,contributions_14_in_state,contributions_14_out_of_state,contributions_14_unknown,pct_14_out_of_state,contributions_10_in_state,contributions_10_out_of_state,contributions_10_unknown,pct_10_out_of_state,change_from_14,change_from_10,pct_change_from_14,pct_change_from_10
39,WI,23028798.19,13290424.62,576157.06,0.36,17002198.01,10926356.89,23416.32,0.39,16635774.34,1296884.24,18562.0,0.07,2364067.73,11993540.38,0.22,9.25
3,AZ,6393754.83,1485774.55,112341.49,0.19,4636616.0,761785.23,2950.0,0.14,1646015.09,159145.71,0.0,0.09,723989.32,1326628.84,0.95,8.34
32,RI,2651879.77,2459387.77,98549.45,0.47,3684281.22,1699150.13,23100.0,0.31,2307368.25,424549.59,8450.0,0.15,760237.64,2034838.18,0.45,4.79
5,CO,29453109.87,3803974.94,129842.15,0.11,9441948.59,1378513.97,5186.67,0.13,7057970.04,908339.73,-5119.09,0.11,2425460.97,2895635.21,1.76,3.19
19,MI,50110970.75,5249713.25,115489.04,0.09,34605937.64,2525524.46,20428.25,0.07,38169795.36,1739881.69,41641.22,0.04,2724188.79,3509831.56,1.08,2.02
12,IL,266548804.14,14695149.46,83129.49,0.05,60751842.55,12540388.4,15024.97,0.17,61680202.36,6095747.07,1545525.03,0.09,2154761.06,8599402.39,0.17,1.41
24,NH,747706.24,306858.75,1570.0,0.29,932764.49,434858.03,11175.0,0.32,380930.83,134310.0,6560.0,0.26,-127999.28,172548.75,-0.29,1.28
17,MD,43937575.05,13076554.31,2293750.12,0.22,38309121.21,8532791.38,1212248.96,0.18,25725629.26,6099789.23,23866.6,0.19,4543762.93,6976765.08,0.53,1.14
30,OR,12724108.38,3445710.62,166815.0,0.21,9610846.82,1473960.91,7081.34,0.13,9392900.62,1773034.32,3390.0,0.16,1971749.71,1672676.3,1.34,0.94
9,HI,3384761.81,521055.0,191284.31,0.13,3832319.31,728060.12,0.0,0.16,1712958.67,275440.0,0.0,0.14,-207005.12,245615.0,-0.28,0.89


The proportion of out-of-state funding has more than doubled in eight states (Wisconsin, Arizona, Rhode Island, Colorado, Michigan, Illinois, New Hampshire and Maryland) as compared with this point in the 2010 cycle.

### Hawaii has a law on the books limiting the amount of out-of-state funding candidates can take. How has the proportion of out-of-state funding for Hawaii changed from 2014 and 2010?

In [130]:
contribs_by_state[(contribs_by_state["state"] == "HI") | (contribs_by_state["state"] == "AK")]

Unnamed: 0,state,contributions_18_in_state,contributions_18_out_of_state,contributions_18_unknown,pct_18_out_of_state,contributions_14_in_state,contributions_14_out_of_state,contributions_14_unknown,pct_14_out_of_state,contributions_10_in_state,contributions_10_out_of_state,contributions_10_unknown,pct_10_out_of_state,change_from_14,change_from_10,pct_change_from_14,pct_change_from_10
0,AK,3761979.27,185829.48,9343.86,0.04696,4317169.35,225928.05,200.0,0.049728,4453472.71,220586.95,3417.35,0.047159,-40098.57,-34757.47,-0.177484,-0.157568
9,HI,3384761.81,521055.0,191284.31,0.127177,3832319.31,728060.12,0.0,0.159649,1712958.67,275440.0,0.0,0.138524,-207005.12,245615.0,-0.284324,0.891719


The proportion of out-of-state funding in Hawaii has declined by 28 percent as compared with this point in the 2014 cycle and has increased by 89 percent as compared with this point in the 2010 cycle.

### Which candidates are the largest recipients of out-of-state funding this election cycle?

In [125]:
contribs_by_candidate_18[contribs_by_candidate_18["contributions_in_state"] >= 1000].sort_values("pct_out_of_state", ascending=False)

Unnamed: 0,candidate,state,district,contributions_in_state,contributions_out_of_state,contributions_unknown,pct_out_of_state
5323,"POPE, THOMAS E TOMMY",SC,HOUSE DISTRICT 047,3500.00,8000.00,-3650.00,1.019108
6252,"SOHI, PAUL",OH,HOUSE DISTRICT 028,1235.20,36477.43,0.00,0.967247
2472,"GLASSON, CATHY",IA,GOVERNOR,109806.37,2182830.42,0.00,0.952105
5726,"ROSENTHAL, JON E",TX,HOUSE DISTRICT 135,1075.00,11787.13,0.00,0.916421
2692,"GUSTAVSON, ALLISON M",MA,HOUSE DISTRICT FOURTH ESSEX,1550.00,11387.00,0.00,0.880189
6950,"WASDEN, LAWRENCE",ID,ATTORNEY GENERAL,14486.98,91100.00,500.00,0.858729
7385,"ZOKLE, GEORGE",MD,HOUSE DISTRICT 020,3135.00,16821.60,0.00,0.842909
6741,"TURNER, DARRELL",AL,SENATE DISTRICT 013,4530.84,22559.41,0.00,0.832750
5345,"POWDRELL-CULBERT, JANE E",NM,HOUSE DISTRICT 044,3050.00,14550.00,682.81,0.795830
2097,"FERRELL, MARCUS",AZ,HOUSE DISTRICT 024,1186.00,4282.00,0.00,0.783102
