# Contributions from out-of-state donors

In [1]:
import pandas as pd
import us

%load_ext jupyternotify

pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 500)

<IPython.core.display.Javascript object>

Import [National Institute on Money in Politics](https://www.followthemoney.org/) API key.

In [2]:
nimp_key = open("nimp_api_key.txt", "r").readline()

Import [Census Bureau](https://www.census.gov/) API key.

In [3]:
census_key = open("census_api_key.txt", "r").readline()

## Import and format the data

### Import data on donors to state legislative races in 2018

In [4]:
%%notify
contribs_18 = pd.read_csv("https://www.followthemoney.org/aaengine/aafetch.php?dt=1&y=2018&c-exi=1&c-r-ot=S,H&gro=c-t-id,d-eid,d-ccb,d-ins,d-empl,d-occupation&APIKey="+nimp_key+"&mode=csv", encoding="ISO-8859-1")
contribs_18.to_csv("data/contribs_18.csv", index=False)
contribs_18 = pd.read_csv("data/contribs_18.csv", encoding="ISO-8859-1")
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 686826 entries, 0 to 686825
Data columns (total 60 columns):
request                        686826 non-null object
Candidate:token                686826 non-null object
Candidate:id                   686826 non-null int64
Candidate                      686826 non-null object
Candidate_Entity:token         686826 non-null object
Candidate_Entity:id            686826 non-null int64
Candidate_Entity               686826 non-null object
Election_Status:token          686826 non-null object
Election_Status:id             686826 non-null object
Election_Status                686826 non-null object
Status_of_Candidate:token      686826 non-null object
Status_of_Candidate:id         686826 non-null int64
Status_of_Candidate            686826 non-null object
Specific_Party:token           686826 non-null object
Specific_Party:id              686826 non-null int64
Specific_Party                 686826 non-null object
General_Party:token          

<IPython.core.display.Javascript object>

In [5]:
contribs_18.head(1)

Unnamed: 0,request,Candidate:token,Candidate:id,Candidate,Candidate_Entity:token,Candidate_Entity:id,Candidate_Entity,Election_Status:token,Election_Status:id,Election_Status,Status_of_Candidate:token,Status_of_Candidate:id,Status_of_Candidate,Specific_Party:token,Specific_Party:id,Specific_Party,General_Party:token,General_Party:id,General_Party,Election_Jurisdiction:token,Election_Jurisdiction:id,Election_Jurisdiction,Election_Year:token,Election_Year:id,Election_Year,Election_Type:token,Election_Type:id,Election_Type,Office_Sought:token,Office_Sought:id,Office_Sought,Incumbency_Status:token,Incumbency_Status:id,Incumbency_Status,Contributor:token,Contributor:id,Contributor,Type_of_Contributor:token,Type_of_Contributor:id,Type_of_Contributor,Specific_Business:token,Specific_Business:id,Specific_Business,General_Industry:token,General_Industry:id,General_Industry,Broad_Sector:token,Broad_Sector:id,Broad_Sector,In-State:token,In-State:id,In-State,Employer:token,Employer:id,Employer,Occupation:token,Occupation:id,Occupation,#_of_Records,Total_$
0,show-me?t=1&y=2018&c-exi=1&c-r-ot=S%2CH&c-t-id...,c-t-id,229342,"HUFFINES, PHILLIP WAYNE",c-t-eid,11168692,"HUFFINES, PHILLIP WAYNE",c-t-ftsts,Lost-Primary,Lost-Primary,c-t-sts,2,Lost,c-t-pt,139,REPUBLICAN,c-t-p,2,Republican,s,TX,TX,y,2018,2018,c-r-t,1,Standard,c-r-osid,7531,SENATE DISTRICT 008,c-t-ico,O,Open,d-eid,11168692,"HUFFINES, PHILLIP WAYNE",d-et,2,Individual,d-ccb,450,Candidate Self Finance,d-cci,133,Candidate Contributions,d-ccg,17,Candidate Contributions,d-ins,1,1,d-empl,HUFFINES COMMUNITIES,HUFFINES COMMUNITIES,d-occupation,REAL ESTATE INVESTMENT AND DEVLOPMENT,REAL ESTATE INVESTMENT AND DEVLOPMENT,5,5250000.0


In [6]:
contribs_18 = contribs_18[["Candidate", "Candidate:id", "Election_Jurisdiction", "Office_Sought", "Election_Year", "General_Party", "Incumbency_Status", "Contributor", "Contributor:id", "Type_of_Contributor", "Specific_Business", "General_Industry", "Broad_Sector", "Employer", "Employer:id", "Occupation", "Occupation:id", "In-State", "Total_$", "#_of_Records"]]
contribs_18.rename(columns={"Candidate": "candidate", "Candidate:id": "candidate_id", "Election_Jurisdiction": "state", "Office_Sought": "district", "Election_Year": "year", "General_Party": "party", "Incumbency_Status": "incumbency_status", "Contributor": "contributor", "Contributor:id": "contributor_id", "Type_of_Contributor": "contributor_type", "Specific_Business": "business", "General_Industry": "industry", "Broad_Sector": "sector", "Employer": "employer", "Employer:id": "employer_id", "Occupation": "occupation", "Occupation:id": "occupation_id", "In-State": "in_out_state", "Total_$": "contributions", "#_of_Records": "records"}, inplace=True)
contribs_18.head(1)

Unnamed: 0,candidate,candidate_id,state,district,year,party,incumbency_status,contributor,contributor_id,contributor_type,business,industry,sector,employer,employer_id,occupation,occupation_id,in_out_state,contributions,records
0,"HUFFINES, PHILLIP WAYNE",229342,TX,SENATE DISTRICT 008,2018,Republican,Open,"HUFFINES, PHILLIP WAYNE",11168692,Individual,Candidate Self Finance,Candidate Contributions,Candidate Contributions,HUFFINES COMMUNITIES,HUFFINES COMMUNITIES,REAL ESTATE INVESTMENT AND DEVLOPMENT,REAL ESTATE INVESTMENT AND DEVLOPMENT,1,5250000.0,5


Rename the categories in the in-vs.-out-of-state column.

In [7]:
# 0 = out-of-state, 1 = in-state, 2 = unknown
contribs_18["in_out_state"] = contribs_18["in_out_state"].replace({0: "out-of-state", 1: "in-state", 2: "unknown"})
contribs_18.head(1)

Unnamed: 0,candidate,candidate_id,state,district,year,party,incumbency_status,contributor,contributor_id,contributor_type,business,industry,sector,employer,employer_id,occupation,occupation_id,in_out_state,contributions,records
0,"HUFFINES, PHILLIP WAYNE",229342,TX,SENATE DISTRICT 008,2018,Republican,Open,"HUFFINES, PHILLIP WAYNE",11168692,Individual,Candidate Self Finance,Candidate Contributions,Candidate Contributions,HUFFINES COMMUNITIES,HUFFINES COMMUNITIES,REAL ESTATE INVESTMENT AND DEVLOPMENT,REAL ESTATE INVESTMENT AND DEVLOPMENT,in-state,5250000.0,5


Filter out unitemized donations as it is impossible to determine where those contributions originated.

In [8]:
contribs_18 = contribs_18[contribs_18["contributor"] != "UNITEMIZED DONATIONS"]
contribs_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 684229 entries, 0 to 686825
Data columns (total 20 columns):
candidate            684229 non-null object
candidate_id         684229 non-null int64
state                684229 non-null object
district             684229 non-null object
year                 684229 non-null int64
party                684229 non-null object
incumbency_status    684229 non-null object
contributor          684204 non-null object
contributor_id       684229 non-null int64
contributor_type     684229 non-null object
business             684229 non-null object
industry             684229 non-null object
sector               684229 non-null object
employer             554998 non-null object
employer_id          554998 non-null object
occupation           292097 non-null object
occupation_id        292097 non-null object
in_out_state         684229 non-null object
contributions        684229 non-null float64
records              684229 non-null int64
dtypes: floa

### Import data on donors to state legislative races in 2016

In [9]:
%%notify
contribs_16 = pd.read_csv("https://www.followthemoney.org/aaengine/aafetch.php?dt=1&y=2016&c-exi=1&c-r-ot=S,H&gro=c-t-id,d-eid,d-ccb,d-ins,d-empl,d-occupation&APIKey="+nimp_key+"&mode=csv")
contribs_16.to_csv("data/contribs_16.csv", index=False)
contribs_16 = pd.read_csv("data/contribs_16.csv", encoding="ISO-8859-1")
contribs_16.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1466910 entries, 0 to 1466909
Data columns (total 60 columns):
request                        1466910 non-null object
Candidate:token                1466910 non-null object
Candidate:id                   1466910 non-null int64
Candidate                      1466910 non-null object
Candidate_Entity:token         1466910 non-null object
Candidate_Entity:id            1466910 non-null int64
Candidate_Entity               1466910 non-null object
Election_Status:token          1466910 non-null object
Election_Status:id             1466910 non-null object
Election_Status                1466910 non-null object
Status_of_Candidate:token      1466910 non-null object
Status_of_Candidate:id         1466910 non-null int64
Status_of_Candidate            1466910 non-null object
Specific_Party:token           1466910 non-null object
Specific_Party:id              1466910 non-null int64
Specific_Party                 1466910 non-null object
General_Par

<IPython.core.display.Javascript object>

In [10]:
contribs_16.head(1)

Unnamed: 0,request,Candidate:token,Candidate:id,Candidate,Candidate_Entity:token,Candidate_Entity:id,Candidate_Entity,Election_Status:token,Election_Status:id,Election_Status,Status_of_Candidate:token,Status_of_Candidate:id,Status_of_Candidate,Specific_Party:token,Specific_Party:id,Specific_Party,General_Party:token,General_Party:id,General_Party,Election_Jurisdiction:token,Election_Jurisdiction:id,Election_Jurisdiction,Election_Year:token,Election_Year:id,Election_Year,Election_Type:token,Election_Type:id,Election_Type,Office_Sought:token,Office_Sought:id,Office_Sought,Incumbency_Status:token,Incumbency_Status:id,Incumbency_Status,Contributor:token,Contributor:id,Contributor,Type_of_Contributor:token,Type_of_Contributor:id,Type_of_Contributor,Specific_Business:token,Specific_Business:id,Specific_Business,General_Industry:token,General_Industry:id,General_Industry,Broad_Sector:token,Broad_Sector:id,Broad_Sector,In-State:token,In-State:id,In-State,Employer:token,Employer:id,Employer,Occupation:token,Occupation:id,Occupation,#_of_Records,Total_$
0,show-me?t=1&y=2016&c-exi=1&c-r-ot=S%2CH&c-t-id...,c-t-id,195402,"DURKIN, JAMES B (JIM)",c-t-eid,3105296,"DURKIN, JAMES B (JIM)",c-t-ftsts,Won-General,Won-General,c-t-sts,1,Won,c-t-pt,139,REPUBLICAN,c-t-p,2,Republican,s,IL,IL,y,2016,2016,c-r-t,1,Standard,c-r-osid,2268,HOUSE DISTRICT 082,c-t-ico,I,Incumbent,d-eid,14414073,"RAUNER, BRUCE VINCENT & DIANA MENDLEY",d-et,2,Individual,d-ccb,439,Candidate Contribution,d-cci,133,Candidate Contributions,d-ccg,17,Candidate Contributions,d-ins,1,1,d-empl,STATE OF ILLINOIS,STATE OF ILLINOIS,d-occupation,GOVERNOR,GOVERNOR,1,9000000.0


In [11]:
contribs_16 = contribs_16[["Candidate", "Candidate:id", "Election_Jurisdiction", "Office_Sought", "Election_Year", "General_Party", "Incumbency_Status", "Contributor", "Contributor:id", "Type_of_Contributor", "Specific_Business", "General_Industry", "Broad_Sector", "Employer", "Employer:id", "Occupation", "Occupation:id", "In-State", "Total_$", "#_of_Records"]]
contribs_16.rename(columns={"Candidate": "candidate", "Candidate:id": "candidate_id", "Election_Jurisdiction": "state", "Office_Sought": "district", "Election_Year": "year", "General_Party": "party", "Incumbency_Status": "incumbency_status", "Contributor": "contributor", "Contributor:id": "contributor_id", "Type_of_Contributor": "contributor_type", "Specific_Business": "business", "General_Industry": "industry", "Broad_Sector": "sector", "Employer": "employer", "Employer:id": "employer_id", "Occupation": "occupation", "Occupation:id": "occupation_id", "In-State": "in_out_state", "Total_$": "contributions", "#_of_Records": "records"}, inplace=True)
contribs_16.head(1)

Unnamed: 0,candidate,candidate_id,state,district,year,party,incumbency_status,contributor,contributor_id,contributor_type,business,industry,sector,employer,employer_id,occupation,occupation_id,in_out_state,contributions,records
0,"DURKIN, JAMES B (JIM)",195402,IL,HOUSE DISTRICT 082,2016,Republican,Incumbent,"RAUNER, BRUCE VINCENT & DIANA MENDLEY",14414073,Individual,Candidate Contribution,Candidate Contributions,Candidate Contributions,STATE OF ILLINOIS,STATE OF ILLINOIS,GOVERNOR,GOVERNOR,1,9000000.0,1


Again rename the categories in the in-vs.-out-of-state column.

In [12]:
# 0 = out-of-state, 1 = in-state, 2 = unknown
contribs_16["in_out_state"] = contribs_16["in_out_state"].replace({0: "out-of-state", 1: "in-state", 2: "unknown"})
contribs_16.head(1)

Unnamed: 0,candidate,candidate_id,state,district,year,party,incumbency_status,contributor,contributor_id,contributor_type,business,industry,sector,employer,employer_id,occupation,occupation_id,in_out_state,contributions,records
0,"DURKIN, JAMES B (JIM)",195402,IL,HOUSE DISTRICT 082,2016,Republican,Incumbent,"RAUNER, BRUCE VINCENT & DIANA MENDLEY",14414073,Individual,Candidate Contribution,Candidate Contributions,Candidate Contributions,STATE OF ILLINOIS,STATE OF ILLINOIS,GOVERNOR,GOVERNOR,in-state,9000000.0,1


Again filter out unitemized donations.

In [13]:
contribs_16 = contribs_16[contribs_16["contributor"] != "UNITEMIZED DONATIONS"]
contribs_16.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 1460407 entries, 0 to 1466909
Data columns (total 20 columns):
candidate            1460407 non-null object
candidate_id         1460407 non-null int64
state                1460407 non-null object
district             1460407 non-null object
year                 1460407 non-null int64
party                1460407 non-null object
incumbency_status    1460407 non-null object
contributor          1460402 non-null object
contributor_id       1460407 non-null int64
contributor_type     1460407 non-null object
business             1460407 non-null object
industry             1460407 non-null object
sector               1460407 non-null object
employer             1094163 non-null object
employer_id          1094163 non-null object
occupation           629497 non-null object
occupation_id        629497 non-null object
in_out_state         1460407 non-null object
contributions        1460407 non-null float64
records              1460407 non-nul

### Import and format state population data

Import 2017 population estimates from the Census Bureau.

In [14]:
query = "https://api.census.gov/data/2017/pep/population?get=POP,GEONAME&for=state:*&key="+census_key
population = pd.read_json(query)
population = population.drop(population.index[0]).reset_index(drop=True) # Drop first row containing column names
population.columns = ["population", "state", "fips"]
population["population"] = pd.to_numeric(population["population"])
population.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 52 entries, 0 to 51
Data columns (total 3 columns):
population    52 non-null int64
state         52 non-null object
fips          52 non-null object
dtypes: int64(1), object(2)
memory usage: 1.3+ KB


In [15]:
population.head(1)

Unnamed: 0,population,state,fips
0,4874747,Alabama,1


We need to add state abbreviations in order to later join the population data with the campaign contributions data. To do so, we will use the [US package](https://github.com/unitedstates/python-us).

In [16]:
states = pd.DataFrame.from_dict(us.states.mapping("abbr", "fips"), orient="index", columns=["fips"])
states.reset_index(inplace=True)
states.rename(columns={"index": "abbreviation"}, inplace=True)
states.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 59 entries, 0 to 58
Data columns (total 2 columns):
abbreviation    59 non-null object
fips            56 non-null object
dtypes: object(2)
memory usage: 1.0+ KB


In [17]:
population = population.merge(states, on="fips")
population.drop("state", axis=1, inplace=True)
population.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 52 entries, 0 to 51
Data columns (total 3 columns):
population      52 non-null int64
fips            52 non-null object
abbreviation    52 non-null object
dtypes: int64(1), object(2)
memory usage: 1.6+ KB


In [18]:
population.head(1)

Unnamed: 0,population,fips,abbreviation
0,4874747,1,AL


## Join and analyze the data

### Calculate out-of-state contributions by state.

Calculate contributions by in-vs.-out-of-state status and group by state.

In [19]:
contribs_by_state_18 = contribs_18.groupby(["state", "in_out_state"])["contributions"].sum().reset_index()
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 116 entries, 0 to 115
Data columns (total 3 columns):
state            116 non-null object
in_out_state     116 non-null object
contributions    116 non-null float64
dtypes: float64(1), object(2)
memory usage: 2.8+ KB


In [20]:
contribs_by_state_18.head()

Unnamed: 0,state,in_out_state,contributions
0,AK,in-state,2423533.25
1,AK,out-of-state,110922.49
2,AK,unknown,9285.0
3,AL,in-state,5185371.17
4,AL,out-of-state,532085.12


Pivot dataframe to aggregate each state's data in a single row.

In [21]:
contribs_by_state_18 = pd.pivot_table(contribs_by_state_18, index=["state"], columns=["in_out_state"]).reset_index()
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
(state, )                        41 non-null object
(contributions, in-state)        41 non-null float64
(contributions, out-of-state)    41 non-null float64
(contributions, unknown)         34 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


It appears that seven states have no contributions categorized as "unknown." Let's set those values equal to zero to be sure any calculations we run on them are correct.

In [22]:
contribs_by_state_18.fillna(0, inplace=True)
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
(state, )                        41 non-null object
(contributions, in-state)        41 non-null float64
(contributions, out-of-state)    41 non-null float64
(contributions, unknown)         41 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


Flatten the resulting dataframe's multi-index columns.

In [23]:
contribs_by_state_18.columns = ["_".join(column).replace("-","_").strip("_") for column in contribs_by_state_18.columns.values]
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 41 entries, 0 to 40
Data columns (total 4 columns):
state                         41 non-null object
contributions_in_state        41 non-null float64
contributions_out_of_state    41 non-null float64
contributions_unknown         41 non-null float64
dtypes: float64(3), object(1)
memory usage: 1.4+ KB


Join state population and campaign contributions data.

In [24]:
contribs_by_state_18 = contribs_by_state_18.merge(population, left_on="state", right_on="abbreviation")
contribs_by_state_18.drop("abbreviation", axis=1, inplace=True)
contribs_by_state_18 = contribs_by_state_18[["state", "fips", "contributions_in_state", "contributions_out_of_state", "contributions_unknown", "population"]]
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41 entries, 0 to 40
Data columns (total 6 columns):
state                         41 non-null object
fips                          41 non-null object
contributions_in_state        41 non-null float64
contributions_out_of_state    41 non-null float64
contributions_unknown         41 non-null float64
population                    41 non-null int64
dtypes: float64(3), int64(1), object(2)
memory usage: 2.2+ KB


In [25]:
contribs_by_state_18.head(1)

Unnamed: 0,state,fips,contributions_in_state,contributions_out_of_state,contributions_unknown,population
0,AK,2,2423533.25,110922.49,9285.0,739795


Calculate the proportion of in-vs.-out-of-state contributions and the contributions per 1,000 residents.

In [26]:
contribs_by_state_18["pct_out_of_state"] = contribs_by_state_18["contributions_out_of_state"] / (contribs_by_state_18["contributions_in_state"] + contribs_by_state_18["contributions_out_of_state"] + contribs_by_state_18["contributions_unknown"])
contribs_by_state_18["out_of_state_per_thousand_pop"] = contribs_by_state_18["contributions_out_of_state"] / contribs_by_state_18["population"] * 1000
contribs_by_state_18.info()

<class 'pandas.core.frame.DataFrame'>
Int64Index: 41 entries, 0 to 40
Data columns (total 8 columns):
state                            41 non-null object
fips                             41 non-null object
contributions_in_state           41 non-null float64
contributions_out_of_state       41 non-null float64
contributions_unknown            41 non-null float64
population                       41 non-null int64
pct_out_of_state                 41 non-null float64
out_of_state_per_thousand_pop    41 non-null float64
dtypes: float64(5), int64(1), object(2)
memory usage: 2.9+ KB


Which states are seeing the highest proportion of out-of-state fundings, both as a proportion of all contributions and relative to their populations?

In [27]:
contribs_by_state_18.sort_values("pct_out_of_state", ascending=False).head()

Unnamed: 0,state,fips,contributions_in_state,contributions_out_of_state,contributions_unknown,population,pct_out_of_state,out_of_state_per_thousand_pop
40,WY,56,10068.82,11025.0,0.0,579315,0.522665,19.031097
28,NV,32,204373.96,128025.0,0.0,2998039,0.385155,42.702913
14,KS,20,839569.09,391701.06,21063.02,2913123,0.312777,134.460872
27,NM,35,2283457.57,762145.22,21297.19,2088070,0.248507,364.999842
11,ID,16,864658.82,250755.78,1520.0,1716943,0.224504,146.047819


In [28]:
contribs_by_state_18.sort_values("out_of_state_per_thousand_pop", ascending=False).head()

Unnamed: 0,state,fips,contributions_in_state,contributions_out_of_state,contributions_unknown,population,pct_out_of_state,out_of_state_per_thousand_pop
12,IL,17,50311839.99,11161066.39,78679.49,12802023,0.181329,871.820523
18,MD,24,23892706.98,5202013.18,2014270.47,6052177,0.167219,859.527601
27,NM,35,2283457.57,762145.22,21297.19,2088070,0.248507,364.999842
32,OR,41,6611843.89,1431338.82,170698.23,4142776,0.174259,345.502344
4,CA,6,78068725.68,13343195.37,79475.13,39536653,0.145841,337.48925


### Calculate out-of-state contributions by race.

Calculate contributions by in-vs.-out-of-state status and group by district.

In [29]:
contribs_by_race_18 = contribs_18.groupby(["state", "district", "in_out_state"])["contributions"].sum().reset_index()
contribs_by_race_18["state_district"] = contribs_by_race_18["state"] + " " + contribs_by_race_18["district"] # Create unique cross-state district identifier
contribs_by_race_18.drop(["state", "district"], axis=1, inplace=True)
contribs_by_race_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 8221 entries, 0 to 8220
Data columns (total 3 columns):
in_out_state      8221 non-null object
contributions     8221 non-null float64
state_district    8221 non-null object
dtypes: float64(1), object(2)
memory usage: 192.8+ KB


In [30]:
contribs_by_race_18.head(10)

Unnamed: 0,in_out_state,contributions,state_district
0,in-state,135899.42,AK HOUSE DISTRICT 001
1,out-of-state,1350.0,AK HOUSE DISTRICT 001
2,in-state,20618.19,AK HOUSE DISTRICT 002
3,out-of-state,1801.0,AK HOUSE DISTRICT 002
4,in-state,1100.0,AK HOUSE DISTRICT 003
5,in-state,71984.34,AK HOUSE DISTRICT 004
6,out-of-state,3969.0,AK HOUSE DISTRICT 004
7,unknown,55.0,AK HOUSE DISTRICT 004
8,in-state,16708.9,AK HOUSE DISTRICT 005
9,out-of-state,20.0,AK HOUSE DISTRICT 005


Pivot dataframe to aggregate each district's data in a single row.

In [31]:
contribs_by_race_18 = pd.pivot_table(contribs_by_race_18, index=["state_district"], columns=["in_out_state"]).reset_index()
contribs_by_race_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3746 entries, 0 to 3745
Data columns (total 4 columns):
(state_district, )               3746 non-null object
(contributions, in-state)        3726 non-null float64
(contributions, out-of-state)    3339 non-null float64
(contributions, unknown)         1156 non-null float64
dtypes: float64(3), object(1)
memory usage: 117.1+ KB


Again, some records have no contributions for certain categories. Let's set those values equal to zero to be sure any calculations we run on them are correct.

In [32]:
contribs_by_race_18.fillna(0, inplace=True)
contribs_by_race_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3746 entries, 0 to 3745
Data columns (total 4 columns):
(state_district, )               3746 non-null object
(contributions, in-state)        3746 non-null float64
(contributions, out-of-state)    3746 non-null float64
(contributions, unknown)         3746 non-null float64
dtypes: float64(3), object(1)
memory usage: 117.1+ KB


Flatten the resulting dataframe's multi-index columns.

In [33]:
contribs_by_race_18.columns = ["_".join(column).replace("-","_").strip("_") for column in contribs_by_race_18.columns.values]
contribs_by_race_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3746 entries, 0 to 3745
Data columns (total 4 columns):
state_district                3746 non-null object
contributions_in_state        3746 non-null float64
contributions_out_of_state    3746 non-null float64
contributions_unknown         3746 non-null float64
dtypes: float64(3), object(1)
memory usage: 117.1+ KB


Calculate the proportion of in-vs.-out-of-state contributions.

In [34]:
contribs_by_race_18["pct_out_of_state"] = contribs_by_race_18["contributions_out_of_state"] / (contribs_by_race_18["contributions_in_state"] + contribs_by_race_18["contributions_out_of_state"] + contribs_by_race_18["contributions_unknown"])
contribs_by_race_18.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3746 entries, 0 to 3745
Data columns (total 5 columns):
state_district                3746 non-null object
contributions_in_state        3746 non-null float64
contributions_out_of_state    3746 non-null float64
contributions_unknown         3746 non-null float64
pct_out_of_state              3746 non-null float64
dtypes: float64(4), object(1)
memory usage: 146.4+ KB


Which races are seeing the highest proportion of out-of-state fundings, both in raw numbers and as a proportion of all contributions?

In [35]:
contribs_by_race_18.sort_values("contributions_out_of_state", ascending=False).head(25)

Unnamed: 0,state_district,contributions_in_state,contributions_out_of_state,contributions_unknown,pct_out_of_state
1212,IL SENATE DISTRICT 006,4388978.15,1500250.63,0.0,0.254745
1112,IL HOUSE DISTRICT 022,1848372.73,785900.66,0.0,0.298337
1840,MD HOUSE DISTRICT 046,567605.85,486782.69,74030.03,0.431385
1234,IL SENATE DISTRICT 039,1678836.52,457602.64,0.0,0.214189
1172,IL HOUSE DISTRICT 082,2231549.1,416484.37,700.0,0.157239
3078,PA SENATE DISTRICT 034,1305371.64,412200.0,0.0,0.23999
1815,MD HOUSE DISTRICT 030A,1249223.71,407824.5,4350.0,0.245471
419,CA SENATE DISTRICT 034,1787526.3,333430.0,0.0,0.157207
342,CA ASSEMBLY DISTRICT 021,769408.91,326000.0,0.0,0.297606
420,CA SENATE DISTRICT 036,1031454.3,320500.0,0.0,0.237064


In [36]:
contribs_by_race_18.sort_values("pct_out_of_state", ascending=False).head(25)

Unnamed: 0,state_district,contributions_in_state,contributions_out_of_state,contributions_unknown,pct_out_of_state
1050,ID HOUSE DISTRICT 031-POSITION B,-750.0,1050.0,0.0,3.5
3163,SC HOUSE DISTRICT 047,3500.0,8000.0,-3650.0,1.019108
1037,ID HOUSE DISTRICT 024-POSITION B,0.0,100.0,0.0,1.0
3732,WY HOUSE DISTRICT 007,0.0,550.0,0.0,1.0
311,AZ SENATE DISTRICT 019,0.0,200.0,0.0,1.0
780,GA HOUSE DISTRICT 170,0.0,500.0,0.0,1.0
3737,WY HOUSE DISTRICT 032,0.0,200.0,0.0,1.0
138,AL SENATE DISTRICT 020,0.0,1500.0,0.0,1.0
1004,ID HOUSE DISTRICT 007-POSITION B,0.0,250.0,0.0,1.0
3738,WY HOUSE DISTRICT 047,0.0,350.0,0.0,1.0


### Calculate the largest out-of-state donors in 2018 and compare them with the largest out-of-state donors in 2016.

Calculate 2018 out-of-state contributions by donor.