# Federal Election Results 2018
### Non-Presidential year

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

import psycopg2
from sqlalchemy import create_engine

In [2]:
"""
https://www.fec.gov/introduction-campaign-finance/election-and-voting-information/federal-elections-2018/
Extract multiple sheets from the Excel file and load them into Pandas data frames. 
A dictionary allows you to store each data frame under a unique key (the sheet name), 
making it easy to access and work with each data frame later.
"""
# Define the file path to the Excel file
file_path = 'Election_Results/federalelections2018.xlsx'

# Define a list of sheet names to extract
sheet_names = ['2018 US Senate Results by State', 
               '2018 US House Results by State']

# Create an empty dictionary to store the data frames
data_frames = {}

# Loop over the sheet names and read them into data frames
for name in sheet_names:
    data_frames[name] = pd.read_excel(file_path, sheet_name=name)

# Print the data frames to verify that they were loaded correctly
# for name, df in data_frames.items():
#     print(f"{name}:")
#     print(df.head())


2018 US House Results by State
2018 US House Results by State


In [3]:
df_senate_2018 = data_frames['2018 US Senate Results by State']
df_senate_2018.head(20)
# 681 rows 18 columns

Unnamed: 0,1,STATE ABBREVIATION,STATE,DISTRICT,FEC ID#,(I),CANDIDATE NAME (First),CANDIDATE NAME (Last),CANDIDATE NAME,TOTAL VOTES,...,RUNOFF VOTES,RUNOFF %,GENERAL VOTES,GENERAL %,GE RUNOFF ELECTION VOTES (MS Senate),GE RUNOFF ELECTION % (MS Senate),"COMBINED GE PARTY TOTALS (CT, NY)","COMBINED % (CT, NY)",GE WINNER INDICATOR,FOOTNOTES
0,2,,,,,,,,,,...,,,,,,,,,,
1,3,AZ,Arizona,S,S8AZ00197,,Kyrsten,Sinema,"Sinema, Kyrsten",,...,,,1191100.0,0.4995579,,,,,W,
2,4,AZ,Arizona,S,S8AZ00122,,Deedra,Abboud,"Abboud, Deedra",,...,,,,,,,,,,
3,5,AZ,Arizona,S,,,,,,Party Votes:,...,,,,,,,,,,
4,6,AZ,Arizona,S,S8AZ00221,,Martha,McSally,"McSally, Martha",,...,,,1135200.0,0.476113,,,,,,
5,7,AZ,Arizona,S,S6AZ00233,,Kelli,Ward,"Ward, Kelli",,...,,,,,,,,,,
6,8,AZ,Arizona,S,S8AZ00247,,Joe,Arpaio,"Arpaio, Joe",,...,,,,,,,,,,
7,9,AZ,Arizona,S,,,Nicholas N.,Glenn,"Glenn, Nicholas N.",,...,,,,,,,,,,
8,10,AZ,Arizona,S,,,William,Gonzales,"Gonzales, William",,...,,,,,,,,,,
9,11,AZ,Arizona,S,,,,,,Party Votes:,...,,,,,,,,,,


In [4]:
# select the columns wanted and rename them
df_selection = df_senate_2018[['STATE ABBREVIATION', 'DISTRICT', 'CANDIDATE NAME (Last)', 'PARTY', 'GENERAL %', 'GENERAL VOTES ']]
df_senate_2018 = df_selection.rename(columns={
    'DISTRICT': 'Office',
    'STATE ABBREVIATION': 'State',
    'CANDIDATE NAME (Last)': 'Last Name',
    'PARTY': 'Party',
    'GENERAL %': 'Vote %',
    'GENERAL VOTES ': 'Vote count'
})
df_senate_2018.head()
# gives 681 rows

Unnamed: 0,State,Office,Last Name,Party,Vote %,Vote count
0,,,,,,
1,AZ,S,Sinema,D,0.499558,1191100.0
2,AZ,S,Abboud,D,,
3,AZ,S,,D,,
4,AZ,S,McSally,R,0.476113,1135200.0


In [5]:
# add year column
df_senate_2018['Year'] = 2018

# Arrange columns: Move the Year and Office columns to the front
new_cols = ['Year', 'Office', 'State', 'Last Name', 'Party', 'Vote %', 'Vote count']
df_senate_2018 = df_senate_2018.reindex(columns=new_cols)

# Include only Candidates R, D, IND (independent)
df_senate_2018 = df_senate_2018[df_senate_2018['Party'].isin(['R', 'D', 'IND'])]
df_senate_2018['Party'] = df_senate_2018['Party'].replace('R', 'REP')
df_senate_2018['Party'] = df_senate_2018['Party'].replace('D', 'DEM')

df_senate_2018.head()
# gives 135 rows

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
1,2018,S,AZ,Sinema,DEM,0.499558,1191100.0
2,2018,S,AZ,Abboud,DEM,,
3,2018,S,AZ,,DEM,,
4,2018,S,AZ,McSally,REP,0.476113,1135200.0
5,2018,S,AZ,Ward,REP,,


In [6]:
df_senate_2018['Office'] = df_senate_2018['Office'].replace('S', 'Senate')
df_senate_2018

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
1,2018,Senate,AZ,Sinema,DEM,0.499558,1191100.0
2,2018,Senate,AZ,Abboud,DEM,,
3,2018,Senate,AZ,,DEM,,
4,2018,Senate,AZ,McSally,REP,0.476113,1135200.0
5,2018,Senate,AZ,Ward,REP,,
...,...,...,...,...,...,...,...
618,2018,Senate,WY,De La Fuente,REP,,
619,2018,Senate,WY,Van Risseghem,REP,,
621,2018,Senate,WY,,REP,,
622,2018,Senate,WY,Trauner,DEM,0.300988,61227.0


In [7]:
# Remove any unseen NaN's from num rows.
df_s_2018 = df_senate_2018.dropna(subset=['Vote %'])
df_s_2018 = df_s_2018.dropna(subset=['Vote count'])
# confirms NaN's are gone
print(df_s_2018[df_s_2018['Vote %'].isna()])
print(df_s_2018[df_s_2018['Vote count'].isna()])

Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []
Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []


In [9]:
# replace non-finite values with -1
df_s_2018['Vote count'] = df_s_2018['Vote count'].replace([np.nan, np.inf, -np.inf], -1)

# convert to integer
df_s_2018['Vote count'] = df_s_2018['Vote count'].astype(int)
# check data types
print(df_s_2018.dtypes)

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count      int32
dtype: object


In [10]:
df_s_2018.shape
# gives 79 rows.

(79, 7)

In [11]:
df_s_2018.head(5)

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
1,2018,Senate,AZ,Sinema,DEM,0.499558,1191100
4,2018,Senate,AZ,McSally,REP,0.476113,1135200
23,2018,Senate,CA,Feinstein,DEM,0.541638,6019422
24,2018,Senate,CA,De Leon,DEM,0.458362,5093942
61,2018,Senate,CT,Murphy,DEM,0.567971,787685


## 2018 House Data

In [12]:
df_house_2018 = data_frames['2018 US House Results by State']
df_house_2018.head()

Unnamed: 0,1,STATE ABBREVIATION,STATE,DISTRICT,FEC ID#,(I),CANDIDATE NAME (First),CANDIDATE NAME (Last),CANDIDATE NAME,TOTAL VOTES,...,RUNOFF VOTES,RUNOFF %,GENERAL VOTES,GENERAL %,GE RUNOFF ELECTION VOTES (MS Senate),GE RUNOFF ELECTION % (MS Senate),"COMBINED GE PARTY TOTALS (CT, NY)","COMBINED % (CT, NY)",GE WINNER INDICATOR,FOOTNOTES
0,2,,,,,,,,,,...,,,,,,,,,,
1,3,AL,Alabama,1.0,H4AL01123,(I),Bradley,Byrne,"Byrne, Bradley",,...,,,153228.0,0.631563,,,,,W,
2,4,AL,Alabama,1.0,H8AL01066,,"Robert, Jr.",Kennedy,"Kennedy, Robert, Jr.",,...,,,89226.0,0.367765,,,,,,
3,5,AL,Alabama,1.0,H8AL01082,,Lizzetta Hill,McConnell,"McConnell, Lizzetta Hill",,...,,,,,,,,,,
4,6,AL,Alabama,1.0,,,,,,Party Votes:,...,,,,,,,,,,


In [13]:
# select the columns wanted and rename them
df_sel_house = df_house_2018[['STATE ABBREVIATION', 
                                'CANDIDATE NAME (Last)', 
                                'PARTY', 'GENERAL %', 
                                'GENERAL VOTES ']]

df_hou_2018 = df_sel_house.rename(columns={
    'STATE ABBREVIATION': 'State',
    'CANDIDATE NAME (Last)': 'Last Name',
    'PARTY': 'Party',
    'GENERAL %': 'Vote %',
    'GENERAL VOTES ': 'Vote count'
})
df_hou_2018.head()
# gives 541 rows.

Unnamed: 0,State,Last Name,Party,Vote %,Vote count
0,,,,,
1,AL,Byrne,R,0.631563,153228.0
2,AL,Kennedy,D,0.367765,89226.0
3,AL,McConnell,D,,
4,AL,,D,,


In [14]:
# add year and office columns
df_hou_2018['Year'] = 2018
df_hou_2018['Office'] = 'House'

# Move the Year and Office columns to the front
new_cols = ['Year', 'Office', 'State', 'Last Name', 'Party', 'Vote %', 'Vote count']
df_hou_2018 = df_hou_2018.reindex(columns=new_cols)

# Include only Candidates R, D, IND (independent)
df_hou_2018 = df_hou_2018[df_hou_2018['Party'].isin(['R', 'D', 'IND'])]
df_hou_2018['Party'] = df_hou_2018['Party'].replace('R', 'REP')
df_hou_2018['Party'] = df_hou_2018['Party'].replace('D', 'DEM')

df_hou_2018.head()

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
1,2018,House,AL,Byrne,REP,0.631563,153228.0
2,2018,House,AL,Kennedy,DEM,0.367765,89226.0
3,2018,House,AL,McConnell,DEM,,
4,2018,House,AL,,DEM,,
8,2018,House,AL,Roby,REP,0.613884,138879.0


In [15]:
# Remove any unseen NaN's from num rows.
df_h_2018 = df_hou_2018.dropna(subset=['Vote %'])
df_h_2018 = df_h_2018.dropna(subset=['Vote count'])
# confirms NaN's are gone
print(df_h_2018[df_h_2018['Vote %'].isna()])
print(df_h_2018[df_h_2018['Vote count'].isna()])

Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []
Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []


In [16]:
df_h_2018.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count     object
dtype: object

In [19]:
# # replace non-finite values with -1
# df_h_2018['Vote count'] = df_h_2018['Vote count'].replace([np.nan, np.inf, -np.inf], -1)

# # convert to integer
# df_h_2018['Vote count'] = df_h_2018['Vote count'].astype(int)
# # check data types
# print(df_h_2018.dtypes)

In [18]:
# replace non-finite values with -1
df_h_2018['Vote count'] = df_h_2018['Vote count'].replace([np.nan, np.inf, -np.inf], -1)

# replace 'Unopposed' with 0
df_h_2018['Vote count'] = df_h_2018['Vote count'].replace('Unopposed', 0)

# convert to integer
df_h_2018['Vote count'] = df_h_2018['Vote count'].astype(int)

# check data types
print(df_h_2018.dtypes)


Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count      int32
dtype: object


## Add data to database

In [20]:
# import psycopg2
# from sqlalchemy import create_engine

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database using the results table
# df_p_2020.to_sql("results", engine, if_exists="replace", index=False)
# print("Presidential table loaded successfully")
df_s_2018.to_sql("results", engine, if_exists="append", index=False)
df_h_2018.to_sql("results", engine, if_exists="append", index=False)

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()


In [21]:
# import psycopg2
# import pandas as pd

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM results", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()
# gives 1128 rows for 2020
# gives 2084 total row for 2020-2018

      Year Office State  Last Name Party    Vote %  Vote count
0     2020   Pres    AL      Trump   REP  0.620316     1441170
1     2020   Pres    AL      Biden   DEM  0.365700      849624
2     2020   Pres    AL  Jorgensen   IND  0.010836       25176
3     2020   Pres    AK      Trump   REP  0.528331      189951
4     2020   Pres    AK      Biden   DEM  0.427720      153778
...    ...    ...   ...        ...   ...       ...         ...
2079  2018  House    WI   Driessen   IND  0.013679        4416
2080  2018  House    WI  Gallagher   REP  0.636942      209410
2081  2018  House    WI   Liegeois   DEM  0.362757      119265
2082  2018  House    WY     Cheney   REP  0.635857      127963
2083  2018  House    WY     Hunter   DEM  0.297662       59903

[2084 rows x 7 columns]
