In [1]:
import pandas as pd
from sqlalchemy import create_engine

## Extract CSVs into DataFrames

In [2]:
# File paths
gov_cn_path = 'Resources/governors_county.csv'
gov_cand_path = 'Resources/governors_county_candidate.csv'
gov_ste_path = 'Resources/governors_state.csv'
hse_cand_path = 'Resources/house_candidate.csv'
hse_ste_path = 'Resources/house_state.csv'
psd_cn_path = 'Resources/president_county.csv'
psd_cand_path = 'Resources/president_county_candidate.csv'
psd_ste_path = 'Resources/president_state.csv'
sen_cn_path = 'Resources/senate_county.csv'
sen_cand_path = 'Resources/senate_county_candidate.csv'
sen_ste_path = 'Resources/senate_state.csv'

In [3]:
# Read csv files
gov_cn_df = pd.read_csv(gov_cn_path)
gov_cand_df = pd.read_csv(gov_cand_path)
gov_ste_df = pd.read_csv(gov_ste_path)
hse_cand_df = pd.read_csv(hse_cand_path)
hse_ste_df = pd.read_csv(hse_ste_path)
psd_cn_df = pd.read_csv(psd_cn_path)
psd_cand_df = pd.read_csv(psd_cand_path)
psd_ste_df = pd.read_csv(psd_ste_path)
sen_cn_df = pd.read_csv(sen_cn_path)
sen_cand_df = pd.read_csv(sen_cand_path)
sen_ste_df = pd.read_csv(sen_ste_path)

### Governer

In [4]:
gov_cn_df.head()

Unnamed: 0,state,county,current_votes,total_votes,percent
0,Delaware,Kent County,85415,87025,100
1,Delaware,New Castle County,280039,287633,100
2,Delaware,Sussex County,127181,129352,100
3,Indiana,Adams County,14154,14238,99
4,Indiana,Allen County,168312,164793,95


In [5]:
gov_cand_df.head()

Unnamed: 0,state,county,candidate,party,votes,won
0,Delaware,Kent County,John Carney,DEM,44352,True
1,Delaware,Kent County,Julianne Murray,REP,39332,False
2,Delaware,Kent County,Kathy DeMatteis,IPD,1115,False
3,Delaware,Kent County,John Machurek,LIB,616,False
4,Delaware,New Castle County,John Carney,DEM,191678,True


In [6]:
gov_ste_df.head()

Unnamed: 0,state,votes
0,Delaware,492635
1,Indiana,3020514
2,Missouri,3010911
3,Montana,603587
4,New Hampshire,792804


### House

In [7]:
hse_cand_df.head()

Unnamed: 0,district,candidate,party,total_votes,won
0,Delaware at large,Lisa Blunt Rochester,DEM,281382,True
1,Delaware at large,Lee Murphy,REP,196392,False
2,Delaware at large,Catherine Purcell,IND,6682,False
3,Delaware at large,David Rogers,LIB,3814,False
4,Florida’s 1st district,Matt Gaetz,REP,283352,True


In [8]:
hse_ste_df.head()

Unnamed: 0,district,current_votes,total_votes,percent
0,Delaware at large,488270,488270,100
1,Florida’s 1st district,438562,438562,100
2,Florida’s 2nd district,311999,311999,100
3,Florida’s 3rd district,390401,390401,100
4,Florida’s 4th district,504940,504940,100


### President

In [9]:
psd_cn_df.head()

Unnamed: 0,state,county,current_votes,total_votes,percent
0,Delaware,Kent County,87025,87025,100
1,Delaware,New Castle County,287633,287633,100
2,Delaware,Sussex County,129352,129352,100
3,District of Columbia,District of Columbia,40631,45392,90
4,District of Columbia,Ward 2,31929,37140,86


In [10]:
psd_cand_df.head()

Unnamed: 0,state,county,candidate,party,total_votes,won
0,Delaware,Kent County,Joe Biden,DEM,44552,True
1,Delaware,Kent County,Donald Trump,REP,41009,False
2,Delaware,Kent County,Jo Jorgensen,LIB,1044,False
3,Delaware,Kent County,Howie Hawkins,GRN,420,False
4,Delaware,New Castle County,Joe Biden,DEM,195034,True


In [11]:
psd_ste_df.head() 

Unnamed: 0,state,total_votes
0,Delaware,504010
1,District of Columbia,333682
2,Florida,11067456
3,Georgia,4998566
4,Hawaii,574469


### Senate

In [12]:
sen_cn_df.head()

Unnamed: 0,state,county,current_votes,total_votes,percent
0,Delaware,Kent County,84975,87025,100
1,Delaware,New Castle County,279462,287633,100
2,Delaware,Sussex County,126498,129352,100
3,Georgia,Appling County,8187,8341,100
4,Georgia,Atkinson County,3097,3155,100


In [13]:
#Determine the percentage of completed votes to include
sen_cn_df.groupby("percent").count()

Unnamed: 0_level_0,state,county,current_votes,total_votes
percent,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
0,148,148,148,148
14,1,1,1,1
40,1,1,1,1
48,1,1,1,1
54,1,1,1,1
59,2,2,2,2
63,1,1,1,1
64,1,1,1,1
66,1,1,1,1
67,2,2,2,2


In [14]:
sen_cand_df.head()

Unnamed: 0,state,county,candidate,party,total_votes
0,Delaware,Kent County,Lauren Witzke,REP,38571
1,Delaware,Kent County,Mark Turley,IPD,1553
2,Delaware,Kent County,Nadine Frost,LIB,958
3,Delaware,New Castle County,Lauren Witzke,REP,80081
4,Delaware,New Castle County,Mark Turley,IPD,4277


In [15]:
sen_ste_df.head()

Unnamed: 0,state,total_votes
0,Delaware,490935
1,Georgia,4952440
2,Idaho,858826
3,Illinois,5860394
4,Iowa,1670616


In [16]:
#Determine how many states had senate races for the 2020 Election
len(sen_ste_df)

35

## Completed Results

In [28]:
#Establish who won by each county
sen_cand_win_df = sen_cand_df[sen_cand_df['total_votes'] == sen_cand_df.groupby(['state','county'])['total_votes'].transform('max')]
sen_winner_index = sen_cand_win_df.index

In [31]:
sen_cand_df['won'] = False
for i in sen_cand_df.index:
    if i in sen_winner_index:
        sen_cand_df.iloc[i, 5] = True

sen_cand_df.head()

Unnamed: 0,state,county,candidate,party,total_votes,won
0,Delaware,Kent County,Lauren Witzke,REP,38571,True
1,Delaware,Kent County,Mark Turley,IPD,1553,False
2,Delaware,Kent County,Nadine Frost,LIB,958,False
3,Delaware,New Castle County,Lauren Witzke,REP,80081,True
4,Delaware,New Castle County,Mark Turley,IPD,4277,False


## By County

In [38]:
# Rename column and make a copy from original dataframe
sen_result_df = sen_cand_df.rename(columns={'total_votes': 'candidate_votes'}).copy()

# Assign column of each candidate’s percentage vote
sen_result_df = sen_result_df.assign(
                        percent_votes = round(sen_result_df.candidate_votes/sen_result_df\
                                        .groupby(['state','county']).candidate_votes\
                                        .transform('sum')*100, 2))
# Rearrage columns
sen_result_cleaned_df = sen_result_df[['state', 'county','candidate', 'party',
                                       'candidate_votes', 'percent_votes', 'won']].copy()
# Display dataframe
sen_result_cleaned_df.head()

Unnamed: 0,state,county,candidate,party,candidate_votes,percent_votes,won
0,Delaware,Kent County,Lauren Witzke,REP,38571,93.89,True
1,Delaware,Kent County,Mark Turley,IPD,1553,3.78,False
2,Delaware,Kent County,Nadine Frost,LIB,958,2.33,False
3,Delaware,New Castle County,Lauren Witzke,REP,80081,91.32,True
4,Delaware,New Castle County,Mark Turley,IPD,4277,4.88,False


## By State

In [40]:
# Sum of votes for each candidate for each State
sen_st_votes = sen_result_cleaned_df.groupby(['state', 'candidate'])\
                        .agg({'candidate_votes':sum}).reset_index(['state', 'candidate'])
# Assign column of each candidate’s percentage vote
sen_st_votes = sen_st_votes.assign(percent_votes = round(sen_st_votes.candidate_votes/sen_st_votes\
                                              .groupby(['state']).candidate_votes\
                                              .transform('sum')*100, 2))
# Display dataframe
sen_st_votes.head()

Unnamed: 0,state,candidate,candidate_votes,percent_votes
0,Alabama,Write-ins,3869,0.49
1,Alabama,Doug Jones,590953,74.48
2,Alabama,Tommy Tuberville,198603,25.03
3,Alaska,Al Gross,86471,59.11
4,Alaska,Dan Sullivan,43150,29.5


In [None]:
# Store election result by county to database
sen_result_cleaned_df.to_sql(name='senate_result_county', con=engine, if_exists='replace', index=False)

# Store election result by state to database
sen_st_votes.to_sql(name='senate_result_state', con=engine, if_exists='replace', index=False)

# Store election's total votes by county to database
sen_cn_df.to_sql(name='senate_total_votes_county', con=engine, if_exists='replace', index=False)

# Store election's total votes by state to database
sen_ste_df.to_sql(name='senate_total_votes_state', con=engine, if_exists='replace', index=False)