# Out-of-State-Contributions: National Analysis

How much out-of-state money have candidates nationally raised in the 2018 election cycle, in absolute and proportional terms, thus far and how does that compare with this point in the 2014 and 2010 cycles?

In [1]:
from functools import reduce
import numpy as np
import pandas as pd

%load_ext jupyternotify

pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 500)
pd.options.display.float_format = "{:,.2f}".format # Format floats

<IPython.core.display.Javascript object>

Import contributions data.

In [2]:
%%notify
contributions = pd.read_csv("data/contributions.csv")
contributions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 6040385 entries, 0 to 6040384
Data columns (total 13 columns):
candidate              object
election_status        object
party                  object
state                  object
year                   int64
office                 object
contributor            object
amount                 float64
date                   object
in_out_state           object
standardized_office    object
standardized_status    object
latest_month           object
dtypes: float64(1), int64(1), object(11)
memory usage: 599.1+ MB


<IPython.core.display.Javascript object>

## Calculate out-of-state contributions by party and year

Group by year and party and in-vs.-out-of-state contribution status and sum contributions.

In [95]:
contributions_by_party = contributions.groupby(["year", "party", "in_out_state"])["amount"].sum().reset_index()
contributions_by_party

Unnamed: 0,year,party,in_out_state,amount
0,2010,Democratic,in-state,440437500.65
1,2010,Democratic,out-of-state,57510341.18
2,2010,Democratic,unknown,1245732.64
3,2010,Nonpartisan,in-state,942676.0
4,2010,Nonpartisan,out-of-state,136753.7
5,2010,Nonpartisan,unknown,-19688.72
6,2010,Republican,in-state,633194243.7
7,2010,Republican,out-of-state,48646339.25
8,2010,Republican,unknown,663662.68
9,2010,Third-Party,in-state,5471020.1


Pivot dataframe to aggregate each party's data in a single row.

In [96]:
contributions_by_party = pd.pivot_table(contributions_by_party, index=["party"], columns=["year", "in_out_state"]).reset_index()
contributions_by_party

Unnamed: 0_level_0,party,amount,amount,amount,amount,amount,amount,amount,amount,amount
year,Unnamed: 1_level_1,2010,2010,2010,2014,2014,2014,2018,2018,2018
in_out_state,Unnamed: 1_level_2,in-state,out-of-state,unknown,in-state,out-of-state,unknown,in-state,out-of-state,unknown
0,Democratic,440437500.65,57510341.18,1245732.64,382680047.01,68220969.39,5995721.92,598188686.04,91865905.61,6668177.58
1,Nonpartisan,942676.0,136753.7,-19688.72,1617096.45,156743.05,4068.0,1376332.38,310506.3,-3156.23
2,Republican,633194243.7,48646339.25,663662.68,455243717.29,74940741.84,10255204.82,697440803.82,68007772.51,10607929.23
3,Third-Party,5471020.1,658788.33,1530159.31,8860109.22,531708.04,36375.05,3722193.74,333749.0,114731.23
4,Unknown,,,,,,,28042.01,7944.09,


Some records have no contributions for certain categories. Let's set those values equal to zero to be sure any calculations we run on them are correct.

In [97]:
contributions_by_party.fillna(0, inplace=True)

Flatten the resulting dataframe's multi-index columns.

In [98]:
contributions_by_party.columns = ["party", "2010_in_state", "2010_out_of_state", "2010_unknown",
                                  "2014_in_state", "2014_out_of_state", "2014_unknown",
                                  "2018_in_state", "2018_out_of_state", "2018_unknown"]
contributions_by_party

Unnamed: 0,party,2010_in_state,2010_out_of_state,2010_unknown,2014_in_state,2014_out_of_state,2014_unknown,2018_in_state,2018_out_of_state,2018_unknown
0,Democratic,440437500.65,57510341.18,1245732.64,382680047.01,68220969.39,5995721.92,598188686.04,91865905.61,6668177.58
1,Nonpartisan,942676.0,136753.7,-19688.72,1617096.45,156743.05,4068.0,1376332.38,310506.3,-3156.23
2,Republican,633194243.7,48646339.25,663662.68,455243717.29,74940741.84,10255204.82,697440803.82,68007772.51,10607929.23
3,Third-Party,5471020.1,658788.33,1530159.31,8860109.22,531708.04,36375.05,3722193.74,333749.0,114731.23
4,Unknown,0.0,0.0,0.0,0.0,0.0,0.0,28042.01,7944.09,0.0


Calculate the proportion of in-state, out-of-state and unknown contributions.

In [99]:
contributions_by_party["pct_18_in_state"] = contributions_by_party["2018_in_state"] / (contributions_by_party["2018_in_state"] + contributions_by_party["2018_out_of_state"] + contributions_by_party["2018_unknown"])
contributions_by_party["pct_18_out_of_state"] = contributions_by_party["2018_out_of_state"] / (contributions_by_party["2018_in_state"] + contributions_by_party["2018_out_of_state"] + contributions_by_party["2018_unknown"])
contributions_by_party["pct_18_unknown"] = contributions_by_party["2018_unknown"] / (contributions_by_party["2018_in_state"] + contributions_by_party["2018_out_of_state"] + contributions_by_party["2018_unknown"])
contributions_by_party["pct_14_in_state"] = contributions_by_party["2014_in_state"] / (contributions_by_party["2014_in_state"] + contributions_by_party["2014_out_of_state"] + contributions_by_party["2014_unknown"])
contributions_by_party["pct_14_out_of_state"] = contributions_by_party["2014_out_of_state"] / (contributions_by_party["2014_in_state"] + contributions_by_party["2014_out_of_state"] + contributions_by_party["2014_unknown"])
contributions_by_party["pct_14_unknown"] = contributions_by_party["2010_unknown"] / (contributions_by_party["2014_in_state"] + contributions_by_party["2014_out_of_state"] + contributions_by_party["2014_unknown"])
contributions_by_party["pct_10_in_state"] = contributions_by_party["2010_in_state"] / (contributions_by_party["2010_in_state"] + contributions_by_party["2010_out_of_state"] + contributions_by_party["2010_unknown"])
contributions_by_party["pct_10_out_of_state"] = contributions_by_party["2014_out_of_state"] / (contributions_by_party["2010_in_state"] + contributions_by_party["2010_out_of_state"] + contributions_by_party["2010_unknown"])
contributions_by_party["pct_10_unknown"] = contributions_by_party["2010_unknown"] / (contributions_by_party["2010_in_state"] + contributions_by_party["2010_out_of_state"] + contributions_by_party["2010_unknown"])
contributions_by_party

Unnamed: 0,party,2010_in_state,2010_out_of_state,2010_unknown,2014_in_state,2014_out_of_state,2014_unknown,2018_in_state,2018_out_of_state,2018_unknown,pct_18_in_state,pct_18_out_of_state,pct_18_unknown,pct_14_in_state,pct_14_out_of_state,pct_14_unknown,pct_10_in_state,pct_10_out_of_state,pct_10_unknown
0,Democratic,440437500.65,57510341.18,1245732.64,382680047.01,68220969.39,5995721.92,598188686.04,91865905.61,6668177.58,0.86,0.13,0.01,0.84,0.15,0.0,0.88,0.14,0.0
1,Nonpartisan,942676.0,136753.7,-19688.72,1617096.45,156743.05,4068.0,1376332.38,310506.3,-3156.23,0.82,0.18,-0.0,0.91,0.09,-0.01,0.89,0.15,-0.02
2,Republican,633194243.7,48646339.25,663662.68,455243717.29,74940741.84,10255204.82,697440803.82,68007772.51,10607929.23,0.9,0.09,0.01,0.84,0.14,0.0,0.93,0.11,0.0
3,Third-Party,5471020.1,658788.33,1530159.31,8860109.22,531708.04,36375.05,3722193.74,333749.0,114731.23,0.89,0.08,0.03,0.94,0.06,0.16,0.71,0.07,0.2
4,Unknown,0.0,0.0,0.0,0.0,0.0,0.0,28042.01,7944.09,0.0,0.78,0.22,0.0,,,,,,


Export the data.

In [100]:
contributions_by_party.to_csv("contributions_by_party.csv", index=False)

## Calculate 2018 out-of-state contributions by redistricting status

Filter the contributions data to the 2018 cycle.

In [12]:
contributions_18 = contributions[contributions["year"] == 2018]
contributions_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 2132284 entries, 0 to 2132283
Data columns (total 16 columns):
candidate                 object
election_status           object
party                     object
state                     object
year                      int64
office                    object
contributor               object
amount                    float64
date                      object
in_out_state              object
standardized_office       object
single_house_district     object
independent_commission    object
no_veto                   object
two_year_term             object
latest_month              object
dtypes: float64(1), int64(1), object(14)
memory usage: 276.6+ MB


Filter 2018 contributions to those in races where the office plays a role in redistricting.

In [20]:
redistricting_contributions = contributions_18[
    (
        (contributions_18["standardized_office"] == "GOVERNOR/LIEUTENANT GOVERNOR") &
        (contributions_18["single_house_district"] != "X") &
        (contributions_18["independent_commission"] != "X") &
        (contributions_18["no_veto"] != "X")
    )
    |
    (
        (
            (contributions_18["standardized_office"] == "STATE HOUSE/ASSEMBLY") |
            (contributions_18["standardized_office"] == "STATE SENATE")
        ) &
        (contributions_18["single_house_district"] != "X") &
        (contributions_18["independent_commission"] != "X") &
        (contributions_18["two_year_term"] != "X")
    )
].reset_index()
redistricting_contributions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1737332 entries, 0 to 1737331
Data columns (total 17 columns):
index                     int64
candidate                 object
election_status           object
party                     object
state                     object
year                      int64
office                    object
contributor               object
amount                    float64
date                      object
in_out_state              object
standardized_office       object
single_house_district     object
independent_commission    object
no_veto                   object
two_year_term             object
latest_month              object
dtypes: float64(1), int64(2), object(14)
memory usage: 225.3+ MB


In [21]:
non_redistricting_contributions = contributions_18[
    (
        (contributions_18["standardized_office"] == "GOVERNOR/LIEUTENANT GOVERNOR") &
        (contributions_18["single_house_district"] == "X") |
        (contributions_18["independent_commission"] == "X") |
        (contributions_18["no_veto"] == "X")
    )
    |
    (
        (
            (contributions_18["standardized_office"] == "STATE HOUSE/ASSEMBLY") |
            (contributions_18["standardized_office"] == "STATE SENATE")
        ) &
        (contributions_18["single_house_district"] == "X") |
        (contributions_18["independent_commission"] == "X") |
        (contributions_18["two_year_term"] == "X")
    )
].reset_index()
non_redistricting_contributions.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 383465 entries, 0 to 383464
Data columns (total 17 columns):
index                     383465 non-null int64
candidate                 383465 non-null object
election_status           383465 non-null object
party                     383465 non-null object
state                     383465 non-null object
year                      383465 non-null int64
office                    383465 non-null object
contributor               383465 non-null object
amount                    383465 non-null float64
date                      380656 non-null object
in_out_state              383465 non-null object
standardized_office       336387 non-null object
single_house_district     20823 non-null object
independent_commission    227702 non-null object
no_veto                   134940 non-null object
two_year_term             107850 non-null object
latest_month              0 non-null object
dtypes: float64(1), int64(2), object(14)
memory usage: 49.7+ MB

In [23]:
contributions_18.groupby("standardized_office").size()

standardized_office
GOVERNOR/LIEUTENANT GOVERNOR    1108991
STATE HOUSE                      586715
STATE SENATE                     350923
dtype: int64

In [24]:
contributions_18[contributions_18["standardized_office"].isna()]

Unnamed: 0,candidate,election_status,party,state,year,office,contributor,amount,date,in_out_state,standardized_office,single_house_district,independent_commission,no_veto,two_year_term,latest_month
656000,"HASKIN, GREG",Lost-Top Two Primary,Republican,CA,2018,ASSEMBLY DISTRICT 072,"HASKIN, GREG",120000.00,2018-04-17,in-state,,,X,,,
656007,"JEANDRON, GARY",Lost-Top Two Primary,Republican,CA,2018,ASSEMBLY DISTRICT 042,"JEANDRON, GARY",100000.00,2018-02-19,in-state,,,X,,,
656008,"HASKIN, GREG",Lost-Top Two Primary,Republican,CA,2018,ASSEMBLY DISTRICT 072,"HASKIN, GREG",100000.00,2017-12-07,in-state,,,X,,,
656018,"CHOI, STEVEN S",Pending-General,Republican,CA,2018,ASSEMBLY DISTRICT 068,"CHOI, STEVEN S",60000.00,2017-12-31,in-state,,,X,,,
656019,"HASKIN, GREG",Lost-Top Two Primary,Republican,CA,2018,ASSEMBLY DISTRICT 072,"HASKIN, GREG",60000.00,2018-05-15,in-state,,,X,,,
656020,"HASKIN, GREG",Lost-Top Two Primary,Republican,CA,2018,ASSEMBLY DISTRICT 072,"HASKIN, GREG",60000.00,2018-05-18,in-state,,,X,,,
656021,"ACOSTA, DANTE",Pending-General,Republican,CA,2018,ASSEMBLY DISTRICT 038,CALIFORNIA REPUBLICAN PARTY,60000.00,2018-04-18,in-state,,,X,,,
656024,"GABRIEL, JESSE",Won-General,Democratic,CA,2018,ASSEMBLY DISTRICT 045,"GABRIEL, JESSE",50000.00,2018-01-03,in-state,,,X,,,
656025,"KATZ, ANDY",Lost-Top Two Primary,Democratic,CA,2018,ASSEMBLY DISTRICT 015,"KATZ, ANDY",50000.00,2017-12-30,in-state,,,X,,,
656029,"GUBLER, WARREN",Lost-Top Two Primary,Republican,CA,2018,ASSEMBLY DISTRICT 026,"GUBLER, WARREN",50000.00,2017-09-28,in-state,,,X,,,


## Export the data

In [None]:
contributions_by_year.to_csv("contributions_by_year.csv", index=False)
contributions_by_party.to_csv("contributions_by_party.csv", index=False)
contributions_by_redistricting_status.to_csv("contributions_by_redistricting_status.csv", index=False)