# 2012 Election Data
### Presidential election

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import psycopg2
from sqlalchemy import create_engine
import xlrd

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

In [22]:
"""
Extract multiple sheets from the Excel file and load them into Pandas data frames. 
A dictionary allows you to store each data frame under a unique key (the sheet name), 
making it easy to access and work with each data frame later.
"""
# "C:\Users\Owner\Documents\UC_Irvine\STATS170A\FinalProject\Election_Results\2012pres.xls"
# Define the file path to the Excel file
file_path = 'Election_Results/2012pres.xls'

# Define a list of sheet names to extract
sheet_names = ['2012 Pres General Results']

# Create an empty dictionary to store the data frames
data_frames = {}

# Loop over the sheet names and read them into data frames
for name in sheet_names:
    data_frames[name] = pd.read_excel(file_path, sheet_name=name)

# # Print the data frames to verify that they were loaded correctly
# for name, df in data_frames.items():
#     print(f"{name}:")
#     print(df.head())


In [23]:
df_pres_2012 = data_frames['2012 Pres General Results']
df_pres_2012.head()
# 537 rows 14 columns

Unnamed: 0,1,FEC ID,STATE,STATE ABBREVIATION,GENERAL ELECTION DATE,FIRST NAME,LAST NAME,"LAST NAME, FIRST",TOTAL VOTES,PARTY,GENERAL RESULTS,TOTAL VOTES #,GENERAL %,WINNER INDICATOR
0,2,P80003353,Alabama,AL,2012-11-06,Mitt,Romney,"Romney, Mitt",,R,1255925,,0.605458,W
1,3,P80003338,Alabama,AL,2012-11-06,Barack,Obama,"Obama, Barack",,D,795696,,0.38359,
2,4,P20002671,Alabama,AL,2012-11-06,Gary,Johnson,"Johnson, Gary",,IND,12328,,0.005943,
3,5,,Alabama,AL,2012-11-06,,Scattered,Scattered,,W,4011,,0.001934,
4,6,P20003984,Alabama,AL,2012-11-06,Jill,Stein,"Stein, Jill",,IND,3397,,0.001638,


In [24]:
df_sel_2012 = df_pres_2012[['STATE ABBREVIATION', 'LAST NAME', 'PARTY', 'GENERAL %', 'GENERAL RESULTS']]
df_sel_2012.head()

Unnamed: 0,STATE ABBREVIATION,LAST NAME,PARTY,GENERAL %,GENERAL RESULTS
0,AL,Romney,R,0.605458,1255925
1,AL,Obama,D,0.38359,795696
2,AL,Johnson,IND,0.005943,12328
3,AL,Scattered,W,0.001934,4011
4,AL,Stein,IND,0.001638,3397


In [25]:
# select the columns wanted and rename them
df_selection = df_sel_2012[['STATE ABBREVIATION', 'LAST NAME', 'PARTY', 'GENERAL %', 'GENERAL RESULTS']]
df_pres_2012 = df_selection.rename(columns={
    'STATE ABBREVIATION': 'State',
    'LAST NAME': 'Last Name',
    'PARTY': 'Party',
    'GENERAL %': 'Vote %',
    'GENERAL RESULTS': 'Vote count'
})
df_pres_2012.head()
# gives 681 rows

Unnamed: 0,State,Last Name,Party,Vote %,Vote count
0,AL,Romney,R,0.605458,1255925
1,AL,Obama,D,0.38359,795696
2,AL,Johnson,IND,0.005943,12328
3,AL,Scattered,W,0.001934,4011
4,AL,Stein,IND,0.001638,3397


In [26]:
# add year and office columns
df_pres_2012['Year'] = 2012
df_pres_2012['Office'] = 'Pres'

# Arrange columns: Move the Year and Office columns to the front
new_cols = ['Year', 'Office', 'State', 'Last Name', 'Party', 'Vote %', 'Vote count']
df_pres_2012 = df_pres_2012.reindex(columns=new_cols)

# Include only Candidates R, D, IND (independent)
df_pres_2012 = df_pres_2012[df_pres_2012['Party'].isin(['R', 'D', 'IND'])]
df_pres_2012['Party'] = df_pres_2012['Party'].replace('R', 'REP')
df_pres_2012['Party'] = df_pres_2012['Party'].replace('D', 'DEM')

df_pres_2012.head()
# gives 135 rows

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
0,2012,Pres,AL,Romney,REP,0.605458,1255925
1,2012,Pres,AL,Obama,DEM,0.38359,795696
2,2012,Pres,AL,Johnson,IND,0.005943,12328
4,2012,Pres,AL,Stein,IND,0.001638,3397
5,2012,Pres,AL,Goode,IND,0.001437,2981


In [27]:
# Remove any unseen NaN's from num rows.
df_p_2012 = df_pres_2012.dropna(subset=['Vote %'])
df_p_2012 = df_p_2012.dropna(subset=['Vote count'])
# confirms NaN's are gone
print(df_p_2012[df_p_2012['Vote %'].isna()])
print(df_p_2012[df_p_2012['Vote count'].isna()])

Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []
Empty DataFrame
Columns: [Year, Office, State, Last Name, Party, Vote %, Vote count]
Index: []


In [28]:
df_p_2012.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count     object
dtype: object

In [12]:
# # convert Vote count to float for math
# df_p_2012['Vote count'] = df_p_2012['Vote count'].astype(int)
# df_p_2012.dtypes
# # gives ValueError: invalid literal for int() with base 10: '[4,337,622]'

In [13]:
# # convert Vote count to string
# df_p_2012['Vote count'] = df_p_2012['Vote count'].astype(str)

# # replace commas with empty string
# df_p_2012['Vote count'] = df_p_2012['Vote count'].str.replace(',', '').astype(int)

# # check data types
# print(df_p_2012.dtypes)
# # gives ValueError: invalid literal for int() with base 10: '[4337622]'

In [29]:
# column 'Vote count' contains a value that has 
# brackets around it, indicating that it is a list.
# need to extract the values from a list

df_p_2012['Vote count'] = df_p_2012['Vote count'].apply(lambda x: int(x[1:-1].replace(',', '')) if isinstance(x, str) else x)

# now it can be convert to integer
df_p_2012['Vote count'] = df_p_2012['Vote count'].astype(int)

# check data types
print(df_p_2012.dtypes)


Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count      int32
dtype: object


In [30]:
df_p_2012.shape
# gives 14 rows.

(114, 7)

In [31]:
df_p_2012.head(10)

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
0,2012,Pres,AL,Romney,REP,0.605458,1255925
1,2012,Pres,AL,Obama,DEM,0.38359,795696
2,2012,Pres,AL,Johnson,IND,0.005943,12328
4,2012,Pres,AL,Stein,IND,0.001638,3397
5,2012,Pres,AL,Goode,IND,0.001437,2981
7,2012,Pres,AK,Romney,REP,0.548016,164676
8,2012,Pres,AK,Obama,DEM,0.408127,122640
13,2012,Pres,AZ,Romney,REP,0.536545,1233654
14,2012,Pres,AZ,Obama,DEM,0.445898,1025232
24,2012,Pres,AR,Romney,REP,0.605669,647744


# Congress election results 2012

In [None]:
# path to Congress election results 2012
# "C:\Users\Owner\Documents\UC_Irvine\STATS170A\FinalProject\Election_Results\2012congresults.xls"

# Senate & House file and data

In [32]:
"""
Extract multiple sheets from the Excel file and load them into Pandas data frames. 
A dictionary allows you to store each data frame under a unique key (the sheet name), 
making it easy to access and work with each data frame later.
"""
# "C:\Users\Owner\Documents\UC_Irvine\STATS170A\FinalProject\Election_Results\2012congresults.xls"
# Define the file path to the Excel file
file_path = 'Election_Results/2012congresults.xls'

# Define a list of sheet names to extract
sheet_names = ['2012 US House & Senate Results']

# Create an empty dictionary to store the data frames
data_frames = {}

# Loop over the sheet names and read them into data frames
for name in sheet_names:
    data_frames[name] = pd.read_excel(file_path, sheet_name=name)

# Print the data frames to verify that they were loaded correctly
# for name, df in data_frames.items():
#     print(f"{name}:")
#     print(df.head())

In [33]:
df_congress_2012 = data_frames['2012 US House & Senate Results']
df_congress_2012.head()
# 537 rows 14 columns

Unnamed: 0,1,STATE ABBREVIATION,STATE,D,FEC ID#,(I),CANDIDATE NAME (First),CANDIDATE NAME (Last),CANDIDATE NAME,TOTAL VOTES,...,RUNOFF VOTES,RUNOFF %,GENERAL VOTES,GENERAL %,GE RUNOFF ELECTION VOTES (LA),GE RUNOFF ELECTION % (LA),"COMBINED GE PARTY TOTALS (CT, NY, SC)","COMBINED % (CT, NY, SC)",GE WINNER INDICATOR,FOOTNOTES
0,2,AL,Alabama,,,,,,DISTRICT 1,,...,,,,,,,,,,
1,3,AL,Alabama,1.0,H2AL01077,(I),Jo,Bonner,"Bonner, Jo",,...,,,196374.0,0.978562,,,,,W,
2,4,AL,Alabama,1.0,H2AL01176,,Dean,Young,"Young, Dean",,...,,,,,,,,,,
3,5,AL,Alabama,1.0,H2AL01184,,Pete,Riehm,"Riehm, Pete",,...,,,,,,,,,,
4,6,AL,Alabama,1.0,H0AL01030,,Peter,Gounares,"Gounares, Peter",,...,,,,,,,,,,


In [34]:
df_selection = df_congress_2012[['D', 'STATE ABBREVIATION', 'CANDIDATE NAME (Last)', 'PARTY', 'GENERAL %', 'GENERAL VOTES ']]
df_con_2012 = df_selection.rename(columns={
    'D': 'Office',
    'STATE ABBREVIATION': 'State',
    'CANDIDATE NAME (Last)': 'Last Name',
    'PARTY': 'Party',
    'GENERAL %': 'Vote %',
    'GENERAL VOTES ': 'Vote count'
})
df_con_2012.head()

Unnamed: 0,Office,State,Last Name,Party,Vote %,Vote count
0,,AL,,,,
1,1.0,AL,Bonner,R,0.978562,196374.0
2,1.0,AL,Young,R,,
3,1.0,AL,Riehm,R,,
4,1.0,AL,Gounares,R,,


In [35]:
# Remove any unseen NaN's from num rows.
df_c_2012 = df_con_2012.dropna(subset=['Vote %'])
df_c_2012 = df_c_2012.dropna(subset=['Vote count'])
# confirms NaN's are gone
print(df_c_2012[df_c_2012['Vote %'].isna()])
print(df_c_2012[df_c_2012['Vote count'].isna()])

Empty DataFrame
Columns: [Office, State, Last Name, Party, Vote %, Vote count]
Index: []
Empty DataFrame
Columns: [Office, State, Last Name, Party, Vote %, Vote count]
Index: []


In [36]:
df_c_2012.head()

Unnamed: 0,Office,State,Last Name,Party,Vote %,Vote count
1,1,AL,Bonner,R,0.978562,196374
6,1,AL,Scattered,W,0.021438,4302
9,2,AL,Roby,R,0.635989,180591
10,2,AL,Ford,D,0.36306,103092
11,2,AL,Scattered,W,0.000951,270


In [37]:
# Replace the values in the 'D' column that will become the 'office' column
df_c_2012['Office'] = df_c_2012['Office'].apply(lambda x: 'House' if x.isdigit() else 'Senate')

df_c_2012.head()

Unnamed: 0,Office,State,Last Name,Party,Vote %,Vote count
1,House,AL,Bonner,R,0.978562,196374
6,House,AL,Scattered,W,0.021438,4302
9,House,AL,Roby,R,0.635989,180591
10,House,AL,Ford,D,0.36306,103092
11,House,AL,Scattered,W,0.000951,270


In [41]:
# add year and office columns
df_c_2012['Year'] = 2012

# Move the Year and Office columns to the front
new_cols = ['Year', 'Office', 'State', 'Last Name', 'Party', 'Vote %', 'Vote count']
df_c_2012 = df_c_2012.reindex(columns=new_cols)

# Include only Candidates R, D, IND (independent)
df_c_2012 = df_c_2012[df_c_2012['Party'].isin(['R', 'D', 'IND'])]
df_c_2012['Party'] = df_c_2012['Party'].replace('R', 'REP')
df_c_2012['Party'] = df_c_2012['Party'].replace('D', 'DEM')

df_c_2012.head(10)

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
74,2012,House,AS,Lancaster,IND,0.053304,697
1181,2012,House,ID,Pro-Life,IND,0.024018,7607
1215,2012,House,IL,Lewis,IND,0.134378,40006
1256,2012,House,IL,Monaghan,IND,0.04372,12523
1319,2012,House,IL,Hartman,IND,0.072419,21319
1564,2012,House,KY,Beacham,IND,0.022333,6304
1572,2012,House,KY,Devore,IND,0.014935,4819
1586,2012,Senate,KY,Lewis,IND,0.028967,8674
1591,2012,Senate,KY,Lewis,IND,0.034358,9987
1605,2012,House,KY,Vance,IND,0.027525,8340


In [42]:
df_c_2012.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count     object
dtype: object

In [43]:
# convert Vote count to float for math
df_c_2012['Vote count'] = df_c_2012['Vote count'].astype(int)
df_c_2012.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count      int32
dtype: object

In [44]:
df_c_2012.head()
#gives 935 rows

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
74,2012,House,AS,Lancaster,IND,0.053304,697
1181,2012,House,ID,Pro-Life,IND,0.024018,7607
1215,2012,House,IL,Lewis,IND,0.134378,40006
1256,2012,House,IL,Monaghan,IND,0.04372,12523
1319,2012,House,IL,Hartman,IND,0.072419,21319


# Add to Database

In [None]:
# in postgreSQL created database called: electiondb
# create table in the postgresSQL first then ran code below
# CREATE TABLE results (
#     year INTEGER NOT NULL,
#     office VARCHAR(50) NOT NULL,
#     state VARCHAR(50) NOT NULL,
#     last_name VARCHAR(50) NOT NULL,
#     party VARCHAR(50) NOT NULL,
#     vote_percentage NUMERIC(5, 2) NOT NULL,
#     vote_count BIGINT NOT NULL,
#     PRIMARY KEY (year, office, state, last_name, party)
# );

# run this line in postgresSQL
# GRANT ALL PRIVILEGES ON DATABASE electiondb TO postgres;

In [45]:
# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database
df_p_2012.to_sql("results", engine, if_exists="append", index=False)
print("Presidential table loaded successfully")
df_c_2012.to_sql("results", engine, if_exists="append", index=False)

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()


Presidential table loaded successfully


In [46]:
# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM results", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()
# gives total 5133 rows for 2020-2012

      Year Office State  Last Name Party    Vote %  Vote count
0     2020   Pres    AL      Trump   REP  0.620316     1441170
1     2020   Pres    AL      Biden   DEM  0.365700      849624
2     2020   Pres    AL  Jorgensen   IND  0.010836       25176
3     2020   Pres    AK      Trump   REP  0.528331      189951
4     2020   Pres    AK      Biden   DEM  0.427720      153778
...    ...    ...   ...        ...   ...       ...         ...
4253  2012  House    VA     Howell   IND  0.028987       10180
4254  2012  House    VA   Chisholm   IND  0.026894        9855
4255  2012  House    VA     Gibson   IND  0.011455        3806
4256  2012  House    VA    DeCarlo   IND  0.009111        3027
4257  2012  House    WI    Raymond   IND  0.028476        9277

[4258 rows x 7 columns]
