In [1]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns

### Part 2: Exploratory Data Analysis
Using your scraped data, investigates different relationships between candidates and the amount of money they raised. Here are some suggestions to get you started, but feel free to pose you own questions or do additional exploration:  
    a. How often does the candidate who raised more money win a race?  
    b. How often does the candidate who spent more money win a race?  
    c. Does the difference between either money raised or money spent seem to influence the likelihood of a candidate winning a race?  
    d. How often does the incumbent candidate win a race?  
    e. Can you detect any relationship between amount of money raised and the incumbent status of a candidate?

In [2]:
House_of_Rep = pd.read_csv('../data/HOR_2020.CSV')
House_of_Rep

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973
1,James Averhart,(D),Alabama,District 01,False,False,35.0,80095,78973,1122
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633
3,Phyllis Harvey-Hall,(D),Alabama,District 02,False,False,34.6,56050,55988,0
4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234
...,...,...,...,...,...,...,...,...,...,...
877,Tricia Zunker,(D),Wisconsin,District 07,False,False,39.2,1261957,1232690,29267
878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362
879,Amanda Stuck,(D),Wisconsin,District 08,False,False,36.0,416978,399916,2165
880,Liz Cheney,(R),Wyoming,District 01,True,True,68.6,3003883,3060167,153567


In [3]:
House_of_Rep['State'].nunique()

50

a. How often does the candidate who raised more money win a race?

In [4]:
House_of_Rep[House_of_Rep['Winner']==True]

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633
4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234
6,Robert B Aderholt,(R),Alabama,District 04,True,True,82.5,1255076,1323812,647004
7,Mo Brooks,(R),Alabama,District 05,True,True,95.8,655365,210045,1137501
...,...,...,...,...,...,...,...,...,...,...
872,Scott Fitzgerald,(R),Wisconsin,District 05,False,True,60.2,1155721,968406,187315
874,Glenn Grothman,(R),Wisconsin,District 06,True,True,59.3,1815756,1736659,269431
876,Tom Tiffany,(R),Wisconsin,District 07,True,True,60.8,2637459,2514740,122719
878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362


In [5]:
House_of_Rep.sort_values(['State','District','Raised'], ascending = False)

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining
880,Liz Cheney,(R),Wyoming,District 01,True,True,68.6,3003883,3060167,153567
881,Lynnette Grey Bull,(D),Wyoming,District 01,False,False,24.6,134597,132235,2363
878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362
879,Amanda Stuck,(D),Wisconsin,District 08,False,False,36.0,416978,399916,2165
876,Tom Tiffany,(R),Wisconsin,District 07,True,True,60.8,2637459,2514740,122719
...,...,...,...,...,...,...,...,...,...,...
5,Adia Winfrey,(D),Alabama,District 03,False,False,32.4,50273,40971,43
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633
3,Phyllis Harvey-Hall,(D),Alabama,District 02,False,False,34.6,56050,55988,0
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973


In [6]:
House_of_Rep['Raised Ranked'] = House_of_Rep.groupby(['State','District'])['Raised'].rank(ascending = False)
House_of_Rep

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining,Raised Ranked
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973,1.0
1,James Averhart,(D),Alabama,District 01,False,False,35.0,80095,78973,1122,2.0
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633,1.0
3,Phyllis Harvey-Hall,(D),Alabama,District 02,False,False,34.6,56050,55988,0,2.0
4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234,1.0
...,...,...,...,...,...,...,...,...,...,...,...
877,Tricia Zunker,(D),Wisconsin,District 07,False,False,39.2,1261957,1232690,29267,2.0
878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362,1.0
879,Amanda Stuck,(D),Wisconsin,District 08,False,False,36.0,416978,399916,2165,2.0
880,Liz Cheney,(R),Wyoming,District 01,True,True,68.6,3003883,3060167,153567,1.0


In [7]:
House_of_Rep['Top Raised'] = np.where(House_of_Rep['Raised Ranked'] == 1.0, True, False)
House_of_Rep

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining,Raised Ranked,Top Raised
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973,1.0,True
1,James Averhart,(D),Alabama,District 01,False,False,35.0,80095,78973,1122,2.0,False
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633,1.0,True
3,Phyllis Harvey-Hall,(D),Alabama,District 02,False,False,34.6,56050,55988,0,2.0,False
4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234,1.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...
877,Tricia Zunker,(D),Wisconsin,District 07,False,False,39.2,1261957,1232690,29267,2.0,False
878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362,1.0,True
879,Amanda Stuck,(D),Wisconsin,District 08,False,False,36.0,416978,399916,2165,2.0,False
880,Liz Cheney,(R),Wyoming,District 01,True,True,68.6,3003883,3060167,153567,1.0,True


In [8]:
House_of_Rep

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining,Raised Ranked,Top Raised
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973,1.0,True
1,James Averhart,(D),Alabama,District 01,False,False,35.0,80095,78973,1122,2.0,False
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633,1.0,True
3,Phyllis Harvey-Hall,(D),Alabama,District 02,False,False,34.6,56050,55988,0,2.0,False
4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234,1.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...
877,Tricia Zunker,(D),Wisconsin,District 07,False,False,39.2,1261957,1232690,29267,2.0,False
878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362,1.0,True
879,Amanda Stuck,(D),Wisconsin,District 08,False,False,36.0,416978,399916,2165,2.0,False
880,Liz Cheney,(R),Wyoming,District 01,True,True,68.6,3003883,3060167,153567,1.0,True


In [9]:
CrosstabResult = pd.crosstab(index=House_of_Rep['Top Raised'], columns= House_of_Rep['Winner'])
CrosstabResult

Winner,False,True
Top Raised,Unnamed: 1_level_1,Unnamed: 2_level_1
False,402,47
True,51,382


In [10]:
382/433

0.8822170900692841

**88% of the time, the candidate that raised the most money is the candidate that wins the race.**

b. How often does the candidate who spent more money win a race?

In [11]:
House_of_Rep['Spent Ranked'] = House_of_Rep.groupby(['State','District'])['Spent'].rank(ascending = False)

In [12]:
House_of_Rep

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining,Raised Ranked,Top Raised,Spent Ranked
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973,1.0,True,1.0
1,James Averhart,(D),Alabama,District 01,False,False,35.0,80095,78973,1122,2.0,False,2.0
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633,1.0,True,1.0
3,Phyllis Harvey-Hall,(D),Alabama,District 02,False,False,34.6,56050,55988,0,2.0,False,2.0
4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234,1.0,True,1.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,Tricia Zunker,(D),Wisconsin,District 07,False,False,39.2,1261957,1232690,29267,2.0,False,2.0
878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362,1.0,True,1.0
879,Amanda Stuck,(D),Wisconsin,District 08,False,False,36.0,416978,399916,2165,2.0,False,2.0
880,Liz Cheney,(R),Wyoming,District 01,True,True,68.6,3003883,3060167,153567,1.0,True,1.0


In [13]:
House_of_Rep['Top Spent'] = np.where(House_of_Rep['Spent Ranked'] == 1.0, True, False)
House_of_Rep

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining,Raised Ranked,Top Raised,Spent Ranked,Top Spent
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973,1.0,True,1.0,True
1,James Averhart,(D),Alabama,District 01,False,False,35.0,80095,78973,1122,2.0,False,2.0,False
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633,1.0,True,1.0,True
3,Phyllis Harvey-Hall,(D),Alabama,District 02,False,False,34.6,56050,55988,0,2.0,False,2.0,False
4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234,1.0,True,1.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
877,Tricia Zunker,(D),Wisconsin,District 07,False,False,39.2,1261957,1232690,29267,2.0,False,2.0,False
878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362,1.0,True,1.0,True
879,Amanda Stuck,(D),Wisconsin,District 08,False,False,36.0,416978,399916,2165,2.0,False,2.0,False
880,Liz Cheney,(R),Wyoming,District 01,True,True,68.6,3003883,3060167,153567,1.0,True,1.0,True


In [14]:
raised_winner_crosstab = pd.crosstab(index=House_of_Rep['Top Spent'], columns= House_of_Rep['Winner'])
raised_winner_crosstab

Winner,False,True
Top Spent,Unnamed: 1_level_1,Unnamed: 2_level_1
False,399,50
True,54,379


In [15]:
379/433

0.8752886836027713

**87% of the time the candidate that spent the most has one the race**

c. Does the difference between either money raised or money spent seem to influence the likelihood of a candidate winning a race?


In [16]:
House_of_Rep['Raised'] - House_of_Rep['Spent']

0      111972
1        1122
2      -18561
3          62
4      -25453
        ...  
877     29267
878    361104
879     17062
880    -56284
881      2362
Length: 882, dtype: int64

d. How often does the incumbent candidate win a race?

In [17]:
Incumbent_winner_crosstab = pd.crosstab(index=House_of_Rep['Incumbent'], columns= House_of_Rep['Winner'])
Incumbent_winner_crosstab

Winner,False,True
Incumbent,Unnamed: 1_level_1,Unnamed: 2_level_1
False,438,59
True,15,370


In [18]:
370/385

0.961038961038961

96% of the time where an incumbent was present in the race they were the winner.

e. Can you detect any relationship between amount of money raised and the incumbent status of a candidate?

In [19]:
House_of_Rep[['Incumbent', 'Raised']].corr()

Unnamed: 0,Incumbent,Raised
Incumbent,1.0,0.271196
Raised,0.271196,1.0


In [20]:
House_of_Rep.head(10)

Unnamed: 0,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining,Raised Ranked,Top Raised,Spent Ranked,Top Spent
0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973,1.0,True,1.0,True
1,James Averhart,(D),Alabama,District 01,False,False,35.0,80095,78973,1122,2.0,False,2.0,False
2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633,1.0,True,1.0,True
3,Phyllis Harvey-Hall,(D),Alabama,District 02,False,False,34.6,56050,55988,0,2.0,False,2.0,False
4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234,1.0,True,1.0,True
5,Adia Winfrey,(D),Alabama,District 03,False,False,32.4,50273,40971,43,2.0,False,2.0,False
6,Robert B Aderholt,(R),Alabama,District 04,True,True,82.5,1255076,1323812,647004,1.0,True,1.0,True
7,Mo Brooks,(R),Alabama,District 05,True,True,95.8,655365,210045,1137501,1.0,True,1.0,True
8,Gary Palmer,(R),Alabama,District 06,True,True,97.1,907219,909082,370688,1.0,True,1.0,True
9,Terri Sewell,(D),Alabama,District 07,True,True,97.2,2168165,1495957,2243480,1.0,True,1.0,True


In [22]:
House_of_Rep[['Candidate','State', 'District', 'Raised']]

Unnamed: 0,Candidate,State,District,Raised
0,Jerry Carl,Alabama,District 01,1971321
1,James Averhart,Alabama,District 01,80095
2,Barry Moore,Alabama,District 02,650807
3,Phyllis Harvey-Hall,Alabama,District 02,56050
4,Mike D Rogers,Alabama,District 03,1193111
...,...,...,...,...
877,Tricia Zunker,Wisconsin,District 07,1261957
878,Mike Gallagher,Wisconsin,District 08,3202905
879,Amanda Stuck,Wisconsin,District 08,416978
880,Liz Cheney,Wyoming,District 01,3003883


In [23]:
District_Average_Raised = House_of_Rep.groupby(['State', 'District'])['Raised'].mean().reset_index()
District_Average_Raised

Unnamed: 0,State,District,Raised
0,Alabama,District 01,1025708.0
1,Alabama,District 02,353428.5
2,Alabama,District 03,621692.0
3,Alabama,District 04,1255076.0
4,Alabama,District 05,655365.0
...,...,...,...
428,Wisconsin,District 05,763056.5
429,Wisconsin,District 06,1051754.5
430,Wisconsin,District 07,1949708.0
431,Wisconsin,District 08,1809941.5


In [24]:
House_of_Rep_Winners = House_of_Rep[House_of_Rep['Winner']== True].reset_index()
House_of_Rep_Winners

Unnamed: 0,index,Candidate,Party,State,District,Incumbent,Winner,Percentage of Vote,Raised,Spent,Funds Remaining,Raised Ranked,Top Raised,Spent Ranked,Top Spent
0,0,Jerry Carl,(R),Alabama,District 01,False,True,64.9,1971321,1859349,111973,1.0,True,1.0,True
1,2,Barry Moore,(R),Alabama,District 02,False,True,65.3,650807,669368,-13633,1.0,True,1.0,True
2,4,Mike D Rogers,(R),Alabama,District 03,True,True,67.5,1193111,1218564,502234,1.0,True,1.0,True
3,6,Robert B Aderholt,(R),Alabama,District 04,True,True,82.5,1255076,1323812,647004,1.0,True,1.0,True
4,7,Mo Brooks,(R),Alabama,District 05,True,True,95.8,655365,210045,1137501,1.0,True,1.0,True
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
424,872,Scott Fitzgerald,(R),Wisconsin,District 05,False,True,60.2,1155721,968406,187315,1.0,True,1.0,True
425,874,Glenn Grothman,(R),Wisconsin,District 06,True,True,59.3,1815756,1736659,269431,1.0,True,1.0,True
426,876,Tom Tiffany,(R),Wisconsin,District 07,True,True,60.8,2637459,2514740,122719,1.0,True,1.0,True
427,878,Mike Gallagher,(R),Wisconsin,District 08,True,True,64.0,3202905,2841801,1169362,1.0,True,1.0,True


In [25]:
House_of_Rep_Winners[['Candidate','State', 'District', 'Winner', 'Raised']]

Unnamed: 0,Candidate,State,District,Winner,Raised
0,Jerry Carl,Alabama,District 01,True,1971321
1,Barry Moore,Alabama,District 02,True,650807
2,Mike D Rogers,Alabama,District 03,True,1193111
3,Robert B Aderholt,Alabama,District 04,True,1255076
4,Mo Brooks,Alabama,District 05,True,655365
...,...,...,...,...,...
424,Scott Fitzgerald,Wisconsin,District 05,True,1155721
425,Glenn Grothman,Wisconsin,District 06,True,1815756
426,Tom Tiffany,Wisconsin,District 07,True,2637459
427,Mike Gallagher,Wisconsin,District 08,True,3202905


In [26]:
District_Total_Raised = House_of_Rep.groupby(['State', 'District'])['Raised'].sum().reset_index()
District_Total_Raised

Unnamed: 0,State,District,Raised
0,Alabama,District 01,2051416
1,Alabama,District 02,706857
2,Alabama,District 03,1243384
3,Alabama,District 04,1255076
4,Alabama,District 05,655365
...,...,...,...
428,Wisconsin,District 05,1526113
429,Wisconsin,District 06,2103509
430,Wisconsin,District 07,3899416
431,Wisconsin,District 08,3619883


In [27]:
#The amount raised by the winners divided by the total amount raised in their district. 
District_Total_Raised['Winning Proportion'] = (House_of_Rep_Winners['Raised'])/(District_Total_Raised['Raised'])
District_Total_Raised

Unnamed: 0,State,District,Raised,Winning Proportion
0,Alabama,District 01,2051416,0.960956
1,Alabama,District 02,706857,0.920705
2,Alabama,District 03,1243384,0.959568
3,Alabama,District 04,1255076,1.000000
4,Alabama,District 05,655365,1.000000
...,...,...,...,...
428,Wisconsin,District 05,1526113,1.968323
429,Wisconsin,District 06,2103509,
430,Wisconsin,District 07,3899416,
431,Wisconsin,District 08,3619883,


In [28]:
District_Total_Raised.groupby(['State'])['Winning Proportion'].mean()

State
Alabama           0.977318
Alaska            0.270780
Arizona           0.796277
Arkansas          0.795217
California        0.835730
Colorado          0.755657
Connecticut       0.846201
Delaware          0.936723
Florida           0.750551
Georgia           0.899397
Hawaii            1.408929
Idaho             0.825037
Illinois          1.880896
Indiana           2.186328
Iowa              0.416298
Kansas            0.756123
Kentucky          2.774939
Louisiana         1.067038
Maine             1.373631
Maryland          1.692236
Massachusetts     1.271646
Michigan          1.380479
Minnesota         0.716306
Mississippi       2.536614
Missouri          1.063819
Montana           0.379617
Nebraska          0.452629
Nevada            2.517471
New Hampshire     0.861488
New Jersey        1.415509
New Mexico        0.793445
New York          1.225459
North Carolina    1.215625
North Dakota      1.724911
Ohio              1.035046
Oklahoma          1.144465
Oregon            1.92