# 2014 Election Data
### Non-Presidential election

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import psycopg2
from sqlalchemy import create_engine

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

In [2]:
"""
Extract multiple sheets from the Excel file and load them into Pandas data frames. 
A dictionary allows you to store each data frame under a unique key (the sheet name), 
making it easy to access and work with each data frame later.
"""
# Define the file path to the Excel file
file_path = 'Election_Results/results2014.xls'

# Define a list of sheet names to extract
sheet_names = ['2014 US Senate Results by State', 
               '2014 US House Results by State']

# Create an empty dictionary to store the data frames
data_frames = {}

# Loop over the sheet names and read them into data frames
for name in sheet_names:
    data_frames[name] = pd.read_excel(file_path, sheet_name=name)

# Print the data frames to verify that they were loaded correctly
# for name, df in data_frames.items():
#     print(f"{name}:")
#     print(df.head())


In [3]:
df_senate_2014 = data_frames['2014 US Senate Results by State']
df_senate_2014.head()
# 681 rows 18 columns

Unnamed: 0,1,STATE ABBREVIATION,STATE,D,FEC ID#,(I),CANDIDATE NAME (First),CANDIDATE NAME (Last),CANDIDATE NAME,TOTAL VOTES,...,RUNOFF VOTES,RUNOFF %,GENERAL VOTES,GENERAL %,GE RUNOFF ELECTION VOTES (LA),GE RUNOFF ELECTION % (LA),"COMBINED GE PARTY TOTALS (CT, NY, SC)","COMBINED % (CT, NY, SC)",GE WINNER INDICATOR,FOOTNOTES
0,2,AL,Alabama,S,S6AL00195,(I),Jeff,Sessions,"Sessions, Jeff",,...,,,795606.0,0.972516,,,,,W,
1,3,AL,Alabama,S,,,,Scattered,Scattered,,...,,,22484.0,0.173714,,,,,,
2,4,AL,Alabama,S,,,,,,Total State Votes:,...,,,818090.0,,,,,,,
3,5,,,,,,,,,,...,,,,,,,,,,
4,6,AK,Alaska,S,S4AK00214,,Dan,Sullivan,"Sullivan, Dan",,...,,,135445.0,0.479621,,,,,W,


In [4]:
df_selection = df_senate_2014[['STATE ABBREVIATION', 'CANDIDATE NAME (Last)', 'PARTY', 'GENERAL %', 'GENERAL VOTES ']]
df_sen_2014 = df_selection.rename(columns={
    'STATE ABBREVIATION': 'State',
    'CANDIDATE NAME (Last)': 'Last Name',
    'PARTY': 'Party',
    'GENERAL %': 'Vote %',
    'GENERAL VOTES ': 'Vote count'
})
df_sen_2014.head()

Unnamed: 0,State,Last Name,Party,Vote %,Vote count
0,AL,Sessions,R,0.972516,795606.0
1,AL,Scattered,W,0.173714,22484.0
2,AL,,,,818090.0
3,,,,,
4,AK,Sullivan,R,0.479621,135445.0


In [5]:
# add year and office columns
df_sen_2014['Year'] = 2014
df_sen_2014['Office'] = 'Senate'

# Move the Year and Office columns to the front
new_cols = ['Year', 'Office', 'State', 'Last Name', 'Party', 'Vote %', 'Vote count']
df_sen_2014 = df_sen_2014.reindex(columns=new_cols)

# Include only Candidates R, D, IND (independent)
df_sen_2014 = df_sen_2014[df_sen_2014['Party'].isin(['R', 'D', 'IND'])]
df_sen_2014['Party'] = df_sen_2014['Party'].replace('R', 'REP')
df_sen_2014['Party'] = df_sen_2014['Party'].replace('D', 'DEM')

df_sen_2014.head()
# give 276 rows and 7 columns

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
0,2014,Senate,AL,Sessions,REP,0.972516,795606.0
4,2014,Senate,AK,Sullivan,REP,0.479621,135445.0
5,2014,Senate,AK,Miller,REP,,
6,2014,Senate,AK,Treadwell,REP,,
7,2014,Senate,AK,Jaramillo,REP,,


In [6]:
# Remove any unseen NaN's from num rows.
df_s_2014 = df_sen_2014.dropna(subset=['Vote %'])
df_s_2014 = df_s_2014.dropna(subset=['Vote count'])
# confirms NaN's are gone
print(df_s_2014[df_s_2014['Vote %'].isna()])
print(df_s_2014[df_s_2014['Vote count'].isna()])

Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []
Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []


In [7]:
# convert Vote count to float for math
df_s_2014['Vote count'] = df_s_2014['Vote count'].astype(int)
df_s_2014.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count      int32
dtype: object

In [8]:
df_s_2014.head()

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
0,2014,Senate,AL,Sessions,REP,0.972516,795606
4,2014,Senate,AK,Sullivan,REP,0.479621,135445
9,2014,Senate,AK,Begich,DEM,0.458325,129431
23,2014,Senate,AR,Cotton,REP,0.564975,478819
24,2014,Senate,AR,Pryor,DEM,0.394303,334174


## 2014 House Data

In [9]:
df_house_2014 = data_frames['2014 US House Results by State']
df_house_2014.head()
# 4131 rows 23 columns

Unnamed: 0,1,STATE ABBREVIATION,STATE,D,FEC ID#,(I),CANDIDATE NAME (First),CANDIDATE NAME (Last),CANDIDATE NAME,TOTAL VOTES,...,RUNOFF VOTES,RUNOFF %,GENERAL VOTES,GENERAL %,GE RUNOFF ELECTION VOTES (LA),GE RUNOFF ELECTION % (LA),"COMBINED GE PARTY TOTALS (CT, NY, SC)","COMBINED % (CT, NY, SC)",GE WINNER INDICATOR,FOOTNOTES
0,2,AL,Alabama,1.0,H4AL01123,(I),Bradley,Byrne,"Byrne, Bradley",,...,,,103758.0,0.681569,,,,,W,
1,3,AL,Alabama,1.0,H4AL01156,,Burton R.,LeFlore,"LeFlore, Burton R.",,...,,,48278.0,0.31713,,,,,,
2,4,AL,Alabama,1.0,,,,,Scattered,,...,,,198.0,0.001301,,,,,,
3,5,AL,Alabama,1.0,,,,,,District Votes:,...,,,152234.0,,,,,,,
4,6,AL,Alabama,,,,,,,,...,,,,,,,,,,


In [10]:
df_selection = df_house_2014[['STATE ABBREVIATION', 'CANDIDATE NAME (Last)', 'PARTY', 'GENERAL %', 'GENERAL VOTES ']]

df_hou_2014 = df_selection.rename(columns={
    'STATE ABBREVIATION': 'State',
    'CANDIDATE NAME (Last)': 'Last Name',
    'PARTY': 'Party',
    'GENERAL %': 'Vote %',
    'GENERAL VOTES ': 'Vote count'
})
df_hou_2014.head()
# gives 4131 rows

Unnamed: 0,State,Last Name,Party,Vote %,Vote count
0,AL,Byrne,R,0.681569,103758.0
1,AL,LeFlore,D,0.31713,48278.0
2,AL,,W,0.001301,198.0
3,AL,,,,152234.0
4,AL,,,,


In [11]:
# add year and office columns
df_hou_2014['Year'] = 2014
df_hou_2014['Office'] = 'House'

# Move the Year and Office columns to the front
new_cols = ['Year', 'Office', 'State', 'Last Name', 'Party', 'Vote %', 'Vote count']
df_hou_2014 = df_hou_2014.reindex(columns=new_cols)

# Include only Candidates R, D, IND (independent)
df_hou_2014 = df_hou_2014[df_hou_2014['Party'].isin(['R', 'D', 'IND'])]
df_hou_2014['Party'] = df_hou_2014['Party'].replace('R', 'REP')
df_hou_2014['Party'] = df_hou_2014['Party'].replace('D', 'DEM')

df_hou_2014.head()
# 2096 rows 7 columns

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
0,2014,House,AL,Byrne,REP,0.681569,103758
1,2014,House,AL,LeFlore,DEM,0.31713,48278
5,2014,House,AL,Roby,REP,0.673425,113103
6,2014,House,AL,Wright,DEM,0.325641,54692
10,2014,House,AL,Rogers,REP,0.661205,103558


In [12]:
# Remove any unseen NaN's from num rows.
df_h_2014 = df_hou_2014.dropna(subset=['Vote %'])
df_h_2014 = df_h_2014.dropna(subset=['Vote count'])
# confirms NaN's are gone
print(df_h_2014[df_h_2014['Vote %'].isna()])
print(df_h_2014[df_h_2014['Vote count'].isna()])

Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []
Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []


In [13]:
df_h_2014.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count     object
dtype: object

In [14]:
# convert Vote count to float for math
df_h_2014['Vote count'] = df_h_2014['Vote count'].astype(int)
df_h_2014.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count      int32
dtype: object

In [15]:
df_h_2014.head()

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
0,2014,House,AL,Byrne,REP,0.681569,103758
1,2014,House,AL,LeFlore,DEM,0.31713,48278
5,2014,House,AL,Roby,REP,0.673425,113103
6,2014,House,AL,Wright,DEM,0.325641,54692
10,2014,House,AL,Rogers,REP,0.661205,103558


# Add to Database

In [16]:
# import psycopg2
# from sqlalchemy import create_engine

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database
# df_p_2016.to_sql("results", engine, if_exists="append", index=False)
# print("Presidential table loaded successfully")
df_s_2014.to_sql("results", engine, if_exists="append", index=False)
df_h_2014.to_sql("results", engine, if_exists="append", index=False)

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()


In [17]:
# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM results", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()
# gives total 4083 rows for 2020-2014

      Year Office State  Last Name Party    Vote %  Vote count
0     2020   Pres    AL      Trump   REP  0.620316     1441170
1     2020   Pres    AL      Biden   DEM  0.365700      849624
2     2020   Pres    AL  Jorgensen   IND  0.010836       25176
3     2020   Pres    AK      Trump   REP  0.528331      189951
4     2020   Pres    AK      Biden   DEM  0.427720      153778
...    ...    ...   ...        ...   ...       ...         ...
4079  2014  House    WI   Westlund   DEM  0.394096      112949
4080  2014  House    WI     Ribble   REP  0.650075      188553
4081  2014  House    WI     Gruett   DEM  0.349408      101345
4082  2014  House    WY     Lummis   REP  0.684664      113038
4083  2014  House    WY    Grayson   DEM  0.228970       37803

[4084 rows x 7 columns]


In [None]:
# All the party names
# AE	=	 Americans Elect
# AFC 	=	Allen 4 Congress
# AIP 	=	American Independent
# AKI 	=	Alaskan Independence
# ALP 	=	American Labor Party
# AM  	=	American Party
# AMC 	=	American Constitution Party
# BBH 	=	Bullying Breaks Hearts
# BP 	=	By Petition
# BQT	=	 Bob Quast for Term Limits
# CIT	=	Citizens Party
# CN 	=	Change is Needed
# CON 	=	Constitution
# CRV 	=	Conservative
# D 	=	Democratic
# DCG 	=	D.C. Statehood Green
# DFL 	=	Democratic-Farmer-Labor
# DNL 	=	Democratic-Nonpartisan League
# DRP 	=	D-R Party
# EG 	=	Economic Growth
# ENI 	=	Energy Independence
# FA 	=	For Americans
# FEP 	=	Flourish Every Person
# FV 	=	Future.Vision.
# GOP 	=	G.O.P. Party
# GRE	=	Green
# HRP	=	Human Rights Party
# IAP 	=	Independent American Party
# IDP 	=	Independence
# IGR 	=	Independent Green
# IND  	=	Independent
# IP 	=	Independent Party
# JP 	=	José Peñalosa
# LBF 	=	Libertarian Party of Florida
# LBR  	=	Labor
# LBU	=	 Liberty Union
# LIB   	=	Libertarian
# LMP 	=	Legalize Marijuana Party
# MSC 	=	Send Mr. Smith
# MTP 	=	Mountain
# N 	=	Nonpartisan
# NAF 	=	Nonaffiliated
# NLP	=	Natural Law Party
# NNE	=	None
# NOP	=	No Party Preference
# NPA 	=	No Party Affiliation
# NUP	=	National Union Party
# OP 	=	Of The People
# PAC 	=	Politicians are Crooks
# PAF 	=	Peace and Freedom
# PC 	=	Petitioning Candidate
# PET	=	Petition
# PG 	=	Pacific Green
# PRO 	=	Progressive
# R	=	Republican
# REF 	=	Reform
# SBP 	=	Stop Boss Politics
# SC 	=	Start the Conversation
# SI 	=	Seeking Inclusion
# TN 	=	911 Truth Needed
# TRP 	=	Tax Revolt
# TVH 	=	Truth Vision Hope
# UN 	=	Unaffiliated
# UPC 	=	Unity Party of Colorado
# UST 	=	U.S. Taxpayers Party
# W 	=	Write-In
# WDB 	=	We Deserve Better
# WF 	=	Working Families
# WU 	=	Wake Up USA
# WWP	=	Work and Wealth Party
