# Out-of-State-Contributions: Candidates Analysis

In [1]:
from functools import reduce
import numpy as np
import pandas as pd

%load_ext jupyternotify

pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 500)
pd.options.display.float_format = "{:,.2f}".format # Format floats

<IPython.core.display.Javascript object>

Import contribution-level data.

In [2]:
contributions = pd.read_csv("data/contributions.csv")
contributions.info()

  interactivity=interactivity, compiler=compiler, result=result)


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6051953 entries, 0 to 6051952
Data columns (total 18 columns):
candidate                 object
year                      int64
state                     object
party                     object
election_status           object
contributor               object
amount                    float64
date                      object
in_out_state              object
no_veto                   object
office                    object
latest_month              object
redistricting_role        object
independent_commission    object
single_house_district     object
standardized_office       object
standardized_status       object
two_year_term             object
dtypes: float64(1), int64(1), object(16)
memory usage: 831.1+ MB


Convert the contribution date and latest month columns to datetime data type.

In [3]:
contributions["date"] = pd.to_datetime(contributions["date"], errors="coerce")
contributions["latest_month"] = pd.to_datetime(contributions["latest_month"], errors="coerce")

## How much out-of-state money are candidates raising in the 2018 election cycle, in absolute and proportional terms, thus far and how does that compare with the 2014 and 2010 cycles?

Separate the data by election cycle.

In [47]:
contributions_18 = contributions[contributions["year"] == 2018]
contributions_14 = contributions[contributions["year"] == 2014]
contributions_10 = contributions[contributions["year"] == 2010]

In [50]:
contributions_by_candidate_18 = contributions_18.groupby(["candidate", "state", "year", "standardized_office", "standardized_status", "redistricting_role", "in_out_state"])["amount"].sum().reset_index()
contributions_by_candidate_18.rename(columns={"amount": "amount_18"}, inplace=True)
contributions_by_candidate_14 = contributions_14.groupby(["candidate", "state", "year", "standardized_office", "standardized_status", "in_out_state"])["amount"].sum().reset_index()
contributions_by_candidate_14.rename(columns={"amount": "amount_14"}, inplace=True)
contributions_by_candidate_10 = contributions_10.groupby(["candidate", "state", "year", "standardized_office", "standardized_status", "in_out_state"])["amount"].sum().reset_index()
contributions_by_candidate_10.rename(columns={"amount": "amount_10"}, inplace=True)

Pivot the dataframes to aggregate each candidate's data in a single row.

In [51]:
contributions_by_candidate_18 = pd.pivot_table(contributions_by_candidate_18, index=["candidate", "state", "year", "standardized_office", "standardized_status", "redistricting_role"], columns=["in_out_state"]).reset_index()
contributions_by_candidate_14 = pd.pivot_table(contributions_by_candidate_14, index=["candidate", "state", "year", "standardized_office", "standardized_status"], columns=["in_out_state"]).reset_index()
contributions_by_candidate_10 = pd.pivot_table(contributions_by_candidate_10, index=["candidate", "state", "year", "standardized_office", "standardized_status"], columns=["in_out_state"]).reset_index()

Some records have no contributions for certain categories. Let's set those values equal to zero to be sure any calculations we run on them are correct.

In [55]:
contributions_by_candidate_18.fillna(0, inplace=True)
contributions_by_candidate_14.fillna(0, inplace=True)
contributions_by_candidate_10.fillna(0, inplace=True)

Flatten the resulting dataframes' multi-index columns.

In [56]:
contributions_by_candidate_18.columns = ["_".join(column).replace("-","_").strip("_") for column in contributions_by_candidate_18.columns.values]
contributions_by_candidate_14.columns = ["_".join(column).replace("-","_").strip("_") for column in contributions_by_candidate_14.columns.values]
contributions_by_candidate_10.columns = ["_".join(column).replace("-","_").strip("_") for column in contributions_by_candidate_10.columns.values]

In [60]:
contributions_by_candidate_18.rename(columns={"standardized_office": "standardized_office_18"}, inplace=True)
contributions_by_candidate_14.rename(columns={"standardized_office": "standardized_office_14"}, inplace=True)
contributions_by_candidate_10.rename(columns={"standardized_office": "standardized_office_10"}, inplace=True)

Calculate the proportion of in-state, out-of-state and unknown contributions.

In [61]:
contributions_by_candidate_18["pct_18_in_state"] = contributions_by_candidate_18["amount_18_in_state"] / (contributions_by_candidate_18["amount_18_in_state"] + contributions_by_candidate_18["amount_18_out_of_state"] + contributions_by_candidate_18["amount_18_unknown"])
contributions_by_candidate_18["pct_18_out_of_state"] = contributions_by_candidate_18["amount_18_out_of_state"] / (contributions_by_candidate_18["amount_18_in_state"] + contributions_by_candidate_18["amount_18_out_of_state"] + contributions_by_candidate_18["amount_18_unknown"])
contributions_by_candidate_18["pct_18_unknown"] = contributions_by_candidate_18["amount_18_unknown"] / (contributions_by_candidate_18["amount_18_in_state"] + contributions_by_candidate_18["amount_18_out_of_state"] + contributions_by_candidate_18["amount_18_unknown"])
contributions_by_candidate_14["pct_14_in_state"] = contributions_by_candidate_14["amount_14_in_state"] / (contributions_by_candidate_14["amount_14_in_state"] + contributions_by_candidate_14["amount_14_out_of_state"] + contributions_by_candidate_14["amount_14_unknown"])
contributions_by_candidate_14["pct_14_out_of_state"] = contributions_by_candidate_14["amount_14_out_of_state"] / (contributions_by_candidate_14["amount_14_in_state"] + contributions_by_candidate_14["amount_14_out_of_state"] + contributions_by_candidate_14["amount_14_unknown"])
contributions_by_candidate_14["pct_14_unknown"] = contributions_by_candidate_14["amount_14_unknown"] / (contributions_by_candidate_14["amount_14_in_state"] + contributions_by_candidate_14["amount_14_out_of_state"] + contributions_by_candidate_14["amount_14_unknown"])
contributions_by_candidate_10["pct_10_in_state"] = contributions_by_candidate_10["amount_10_in_state"] / (contributions_by_candidate_10["amount_10_in_state"] + contributions_by_candidate_10["amount_10_out_of_state"] + contributions_by_candidate_10["amount_10_unknown"])
contributions_by_candidate_10["pct_10_out_of_state"] = contributions_by_candidate_10["amount_10_out_of_state"] / (contributions_by_candidate_10["amount_10_in_state"] + contributions_by_candidate_10["amount_10_out_of_state"] + contributions_by_candidate_10["amount_10_unknown"])
contributions_by_candidate_10["pct_10_unknown"] = contributions_by_candidate_10["amount_10_unknown"] / (contributions_by_candidate_10["amount_10_in_state"] + contributions_by_candidate_10["amount_10_out_of_state"] + contributions_by_candidate_10["amount_10_unknown"])

Join the 2018, 2014 and 2010 contributions by candidate data

In [63]:
list_of_contributions_by_candidate = [contributions_by_candidate_18, contributions_by_candidate_14, contributions_by_candidate_10]
contributions_by_candidate = reduce(lambda left, right: pd.merge(left, right, on=["candidate", "state"], how="outer"), list_of_contributions_by_candidate)
contributions_by_candidate.drop(["year_x", "year_y", "year"], axis=1, inplace=True)
contributions_by_candidate.rename(columns={"standardized_status_x": "18_standardized_status", "standardized_status_y": "14_standardized_status"})
contributions_by_candidate.head()

Unnamed: 0,candidate,state,standardized_office_18,standardized_status_x,redistricting_role,amount_18_in_state,amount_18_out_of_state,amount_18_unknown,pct_18_in_state,pct_18_out_of_state,pct_18_unknown,standardized_office_14,standardized_status_y,amount_14_in_state,amount_14_out_of_state,amount_14_unknown,pct_14_in_state,pct_14_out_of_state,pct_14_unknown,standardized_office_10,standardized_status,amount_10_in_state,amount_10_out_of_state,amount_10_unknown,pct_10_in_state,pct_10_out_of_state,pct_10_unknown
0,"ABBOTT, DAVID H",IN,STATE HOUSE/ASSEMBLY,ADVANCED TO GENERAL,Y,26065.0,0.0,0.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,
1,"ABBOTT, GHERT",AK,STATE HOUSE/ASSEMBLY,ADVANCED TO GENERAL,N,45.9,0.0,0.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,
2,"ABBOTT, GREG",TX,GOVERNOR/LIEUTENANT GOVERNOR,ADVANCED TO GENERAL,Y,59614107.87,4528077.58,1020.0,0.93,0.07,0.0,GOVERNOR/LIEUTENANT GOVERNOR,ADVANCED TO GENERAL,22988410.36,1488413.64,4791017.12,0.79,0.05,0.16,,,,,,,,
3,"ABDUL-RAHIM, ANEES",MD,STATE HOUSE/ASSEMBLY,DID NOT ADVANCE,Y,8841.07,0.0,0.0,1.0,0.0,0.0,,,,,,,,,,,,,,,,
4,"ABERCROMBIE, CATHERINE F",CT,STATE HOUSE/ASSEMBLY,ADVANCED TO GENERAL,N,2485.0,0.0,0.0,1.0,0.0,0.0,STATE HOUSE/ASSEMBLY,ADVANCED TO GENERAL,5130.0,7.0,20.0,0.99,0.0,0.0,,,,,,,,


Export the data to Excel.

In [65]:
%%notify
contributions_by_candidate.to_excel("data/contributions_by_candidate.xlsx", index=False)

<IPython.core.display.Javascript object>