In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

import psycopg2
from sqlalchemy import create_engine

https://www.census.gov/topics/public-sector/voting/data/tables.2010.List_1863097513.html#list-tab-List_1863097513

In [2]:
# read in the Excel file and specify the sheet name
df_votes_2012 = pd.read_excel('Election_Results/vote2012.xls', sheet_name='Table 4a', header=[4])

# display the resulting dataframe
df_votes_2012

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Total registered,Percent registered\n(Total),Margin of Error 1,Percent registered\n(Citizen),Margin of Error,Total voted,Percent voted\n(Total),Margin of Error .1,Percent voted\n(Citizen),Margin of Error .2
0,UNITED STATES,235248.0,215081.0,153157.0,65.1,0.3,71.2,0.3,132948.0,56.5,0.3,61.8,0.3
1,ALABAMA,3594.0,3479.0,2556.0,71.1,2.2,73.5,2.2,2154.0,59.9,2.4,61.9,2.4
2,ALASKA,516.0,495.0,361.0,69.9,2.4,72.8,2.4,289.0,56.0,2.6,58.4,2.6
3,ARIZONA,4863.0,4314.0,2812.0,57.8,2.1,65.2,2.2,2412.0,49.6,2.2,55.9,2.3
4,ARKANSAS,2198.0,2109.0,1376.0,62.6,2.4,65.3,2.4,1124.0,51.1,2.5,53.3,2.6
5,CALIFORNIA,28357.0,23419.0,15356.0,54.2,0.9,65.6,0.9,13462.0,47.5,0.9,57.5,1.0
6,COLORADO,3817.0,3544.0,2635.0,69.0,2.3,74.4,2.2,2495.0,65.4,2.3,70.4,2.3
7,CONNECTICUT,2726.0,2499.0,1760.0,64.6,2.5,70.4,2.5,1568.0,57.5,2.5,62.7,2.6
8,DELAWARE,693.0,641.0,470.0,67.8,2.4,73.3,2.4,431.0,62.2,2.5,67.3,2.5
9,DISTRICT OF COLUMBIA,517.0,461.0,385.0,74.4,2.3,83.4,2.1,350.0,67.7,2.5,75.9,2.4


In [4]:
# select the columns wanted and rename them
df_selection = df_votes_2012[['Unnamed: 0', 'Unnamed: 1', 'Total registered', 'Total voted']]
df_votes = df_selection.rename(columns={
    'Unnamed: 0': 'State',
    'Unnamed: 1': 'Population',
    'Total registered': 'Registered',
    'Total voted': 'Voted'
})
# add year and office columns
df_votes['Year'] = 2012

# Arrange columns: Move the Year and Office columns to the front
new_cols = ['Year', 'State', 'Population', 'Registered', 'Voted']

# df_votes = df_votes.iloc[1:]  # remove the first row
df_votes = df_votes.iloc[:-3] # removes last 7 rows of text

# Save the first row as a separate DataFrame
first_row = df_votes.iloc[[0]].copy()

# Remove the first row from the original DataFrame
df_votes = df_votes.iloc[1:].copy()

# Append the first row to the end of the DataFrame
df_votes = pd.concat([df_votes, first_row])

df_votes = df_votes.reset_index(drop=True)  # reset the index
df_votes = df_votes.reindex(columns=new_cols) # set year column in front

df_votes
# values are in the thousands

Unnamed: 0,Year,State,Population,Registered,Voted
0,2012,ALABAMA,3594.0,2556.0,2154.0
1,2012,ALASKA,516.0,361.0,289.0
2,2012,ARIZONA,4863.0,2812.0,2412.0
3,2012,ARKANSAS,2198.0,1376.0,1124.0
4,2012,CALIFORNIA,28357.0,15356.0,13462.0
5,2012,COLORADO,3817.0,2635.0,2495.0
6,2012,CONNECTICUT,2726.0,1760.0,1568.0
7,2012,DELAWARE,693.0,470.0,431.0
8,2012,DISTRICT OF COLUMBIA,517.0,385.0,350.0
9,2012,FLORIDA,15034.0,9102.0,8107.0


In [5]:
state_abbr = {
    'ALABAMA': 'AL',
    'ALASKA': 'AK',
    'ARIZONA': 'AZ',
    'ARKANSAS': 'AR',
    'CALIFORNIA': 'CA',
    'COLORADO': 'CO',
    'CONNECTICUT': 'CT',
    'DELAWARE': 'DE',
    'DISTRICT OF COLUMBIA': 'DC',
    'FLORIDA': 'FL',
    'GEORGIA': 'GA',
    'HAWAII': 'HI',
    'IDAHO': 'ID',
    'ILLINOIS': 'IL',
    'INDIANA': 'IN',
    'IOWA': 'IA',
    'KANSAS': 'KS',
    'KENTUCKY': 'KY',
    'LOUISIANA': 'LA',
    'MAINE': 'ME',
    'MARYLAND': 'MD',
    'MASSACHUSETTS': 'MA',
    'MICHIGAN': 'MI',
    'MINNESOTA': 'MN',
    'MISSISSIPPI': 'MS',
    'MISSOURI': 'MO',
    'MONTANA': 'MT',
    'NEBRASKA': 'NE',
    'NEVADA': 'NV',
    'NEW HAMPSHIRE': 'NH',
    'NEW JERSEY': 'NJ',
    'NEW MEXICO': 'NM',
    'NEW YORK': 'NY',
    'NORTH CAROLINA': 'NC',
    'NORTH DAKOTA': 'ND',
    'OHIO': 'OH',
    'OKLAHOMA': 'OK',
    'OREGON': 'OR',
    'PENNSYLVANIA': 'PA',
    'RHODE ISLAND': 'RI',
    'SOUTH CAROLINA': 'SC',
    'SOUTH DAKOTA': 'SD',
    'TENNESSEE': 'TN',
    'TEXAS': 'TX',
    'UTAH': 'UT',
    'VERMONT': 'VT',
    'VIRGINIA': 'VA',
    'WASHINGTON': 'WA',
    'WEST VIRGINIA': 'WV',
    'WISCONSIN': 'WI',
    'WYOMING': 'WY',
    'UNITED STATES': 'US'
}

In [6]:
df_votes['State'] = df_votes['State'].map(state_abbr)
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted
0,2012,AL,3594.0,2556.0,2154.0
1,2012,AK,516.0,361.0,289.0
2,2012,AZ,4863.0,2812.0,2412.0
3,2012,AR,2198.0,1376.0,1124.0
4,2012,CA,28357.0,15356.0,13462.0
5,2012,CO,3817.0,2635.0,2495.0
6,2012,CT,2726.0,1760.0,1568.0
7,2012,DE,693.0,470.0,431.0
8,2012,DC,517.0,385.0,350.0
9,2012,FL,15034.0,9102.0,8107.0


In [7]:
# Calculate the normalized voter count
df_votes['Norm_Voter_Reg'] = df_votes['Voted'] / df_votes['Registered']
df_votes['Norm_Voter_Pop'] = df_votes['Voted'] / df_votes['Population']
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted,Norm_Voter_Reg,Norm_Voter_Pop
0,2012,AL,3594.0,2556.0,2154.0,0.842723,0.599332
1,2012,AK,516.0,361.0,289.0,0.800554,0.560078
2,2012,AZ,4863.0,2812.0,2412.0,0.857752,0.49599
3,2012,AR,2198.0,1376.0,1124.0,0.81686,0.511374
4,2012,CA,28357.0,15356.0,13462.0,0.876661,0.474733
5,2012,CO,3817.0,2635.0,2495.0,0.946869,0.653655
6,2012,CT,2726.0,1760.0,1568.0,0.890909,0.575202
7,2012,DE,693.0,470.0,431.0,0.917021,0.621934
8,2012,DC,517.0,385.0,350.0,0.909091,0.676983
9,2012,FL,15034.0,9102.0,8107.0,0.890683,0.539244


# Add to sql db

In [8]:
# import psycopg2
# from sqlalchemy import create_engine

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database using the voters table
df_votes.to_sql("voters", engine, if_exists="append", index=False)
print("Votes table loaded successfully")

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()


Votes table loaded successfully


In [9]:
# import psycopg2
# import pandas as pd

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM voters", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()
# gives 1128 rows for 2020

     Year State  Population  Registered   Voted  Norm_Voter_Reg   
0    2020    AL      3769.0        2527    2247        0.889197  \
1    2020    AK       528.0         383     330        0.861619   
2    2020    AZ      5638.0        3878    3649        0.940949   
3    2020    AR      2283.0        1361    1186        0.871418   
4    2020    CA     30342.0       18001   16893        0.938448   
..    ...   ...         ...         ...     ...             ...   
307  2012    WA      5230.0        3533    3172        0.897821   
308  2012    WV      1452.0         982     690        0.702648   
309  2012    WI      4352.0        3318    3127        0.942435   
310  2012    WY       427.0         268     247        0.921642   
311  2012    US    235248.0      153157  132948        0.868050   

     Norm_Voter_Pop  
0          0.596179  
1          0.625000  
2          0.647215  
3          0.519492  
4          0.556753  
..              ...  
307        0.606501  
308        0.475207