In [11]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

import psycopg2
from sqlalchemy import create_engine

https://www.census.gov/topics/public-sector/voting/data/tables.2010.List_1863097513.html#list-tab-List_1863097513

In [12]:
# read in the Excel file and specify the sheet name
df_votes_2018 = pd.read_excel('Election_Results/vote2018.xlsx', sheet_name='Table 4a', header=[4])

# display the resulting dataframe
df_votes_2018.head()

Table 4a


Unnamed: 0,STATE,Total population,Total citizen population,Registered,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Voted,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12
0,,,,Total registered,Percent registered\n(Total),Margin of error 1,Percent registered\n(Citizen),Margin of error 1,Total voted,Percent voted\n(Total),Margin of error 1,Percent voted\n(Citizen),Margin of error 1
1,UNITED STATES,249748.0,228832.0,153066,61.3,0.3,66.9,0.3,122281,49,0.3,53.4,0.3
2,ALABAMA,3753.0,3609.0,2490,66.4,2.5,69,2.5,1830,48.8,2.7,50.7,2.7
3,ALASKA,523.0,497.0,337,64.4,2.7,67.7,2.7,263,50.2,2.9,52.8,2.9
4,ARIZONA,5361.0,4757.0,3262,60.8,2.2,68.6,2.2,2800,52.2,2.3,58.9,2.4


In [13]:
# select the columns wanted and rename them
df_selection = df_votes_2018[['STATE', 'Total population', 'Registered', 'Voted']]
df_votes = df_selection.rename(columns={
    'STATE': 'State',
    'Total population': 'Population',
    'Registered': 'Registered'
})
# add year and office columns
df_votes['Year'] = 2018

# Arrange columns: Move the Year and Office columns to the front
new_cols = ['Year', 'State', 'Population', 'Registered', 'Voted']

df_votes = df_votes.iloc[1:]  # remove the first row
df_votes = df_votes.iloc[:-7] # removes last 7 rows of text

# Save the first row as a separate DataFrame
first_row = df_votes.iloc[[0]].copy()

# Remove the first row from the original DataFrame
df_votes = df_votes.iloc[1:].copy()

# Append the first row to the end of the DataFrame
df_votes = pd.concat([df_votes, first_row])

df_votes = df_votes.reset_index(drop=True)  # reset the index
df_votes = df_votes.reindex(columns=new_cols) # set year column in front

df_votes
# values are in the thousands

Unnamed: 0,Year,State,Population,Registered,Voted
0,2018,ALABAMA,3753.0,2490,1830
1,2018,ALASKA,523.0,337,263
2,2018,ARIZONA,5361.0,3262,2800
3,2018,ARKANSAS,2261.0,1262,919
4,2018,CALIFORNIA,30243.0,15690,13240
5,2018,COLORADO,4353.0,2645,2342
6,2018,CONNECTICUT,2834.0,1726,1370
7,2018,DELAWARE,756.0,472,369
8,2018,DISTRICT OF COLUMBIA,567.0,397,313
9,2018,FLORIDA,16845.0,9435,7918


In [14]:
state_abbr = {
    'ALABAMA': 'AL',
    'ALASKA': 'AK',
    'ARIZONA': 'AZ',
    'ARKANSAS': 'AR',
    'CALIFORNIA': 'CA',
    'COLORADO': 'CO',
    'CONNECTICUT': 'CT',
    'DELAWARE': 'DE',
    'DISTRICT OF COLUMBIA': 'DC',
    'FLORIDA': 'FL',
    'GEORGIA': 'GA',
    'HAWAII': 'HI',
    'IDAHO': 'ID',
    'ILLINOIS': 'IL',
    'INDIANA': 'IN',
    'IOWA': 'IA',
    'KANSAS': 'KS',
    'KENTUCKY': 'KY',
    'LOUISIANA': 'LA',
    'MAINE': 'ME',
    'MARYLAND': 'MD',
    'MASSACHUSETTS': 'MA',
    'MICHIGAN': 'MI',
    'MINNESOTA': 'MN',
    'MISSISSIPPI': 'MS',
    'MISSOURI': 'MO',
    'MONTANA': 'MT',
    'NEBRASKA': 'NE',
    'NEVADA': 'NV',
    'NEW HAMPSHIRE': 'NH',
    'NEW JERSEY': 'NJ',
    'NEW MEXICO': 'NM',
    'NEW YORK': 'NY',
    'NORTH CAROLINA': 'NC',
    'NORTH DAKOTA': 'ND',
    'OHIO': 'OH',
    'OKLAHOMA': 'OK',
    'OREGON': 'OR',
    'PENNSYLVANIA': 'PA',
    'RHODE ISLAND': 'RI',
    'SOUTH CAROLINA': 'SC',
    'SOUTH DAKOTA': 'SD',
    'TENNESSEE': 'TN',
    'TEXAS': 'TX',
    'UTAH': 'UT',
    'VERMONT': 'VT',
    'VIRGINIA': 'VA',
    'WASHINGTON': 'WA',
    'WEST VIRGINIA': 'WV',
    'WISCONSIN': 'WI',
    'WYOMING': 'WY',
    'UNITED STATES': 'US'
}



In [15]:
df_votes['State'] = df_votes['State'].map(state_abbr)
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted
0,2018,AL,3753.0,2490,1830
1,2018,AK,523.0,337,263
2,2018,AZ,5361.0,3262,2800
3,2018,AR,2261.0,1262,919
4,2018,CA,30243.0,15690,13240
5,2018,CO,4353.0,2645,2342
6,2018,CT,2834.0,1726,1370
7,2018,DE,756.0,472,369
8,2018,DC,567.0,397,313
9,2018,FL,16845.0,9435,7918


In [16]:
# Calculate the normalized voter count
df_votes['Norm_Voter_Reg'] = df_votes['Voted'] / df_votes['Registered']
df_votes['Norm_Voter_Pop'] = df_votes['Voted'] / df_votes['Population']
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted,Norm_Voter_Reg,Norm_Voter_Pop
0,2018,AL,3753.0,2490,1830,0.73494,0.48761
1,2018,AK,523.0,337,263,0.780415,0.502868
2,2018,AZ,5361.0,3262,2800,0.858369,0.522291
3,2018,AR,2261.0,1262,919,0.728209,0.406457
4,2018,CA,30243.0,15690,13240,0.84385,0.437787
5,2018,CO,4353.0,2645,2342,0.885444,0.53802
6,2018,CT,2834.0,1726,1370,0.793743,0.483416
7,2018,DE,756.0,472,369,0.78178,0.488095
8,2018,DC,567.0,397,313,0.788413,0.552028
9,2018,FL,16845.0,9435,7918,0.839216,0.47005


# Add to sql db

In [17]:
# import psycopg2
# from sqlalchemy import create_engine

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database using the voters table
df_votes.to_sql("voters", engine, if_exists="append", index=False)
print("Votes table loaded successfully")

# Commit the changes to the database
conn.commit()

# Close the cursor and database connection
cursor.close()
conn.close()


Votes table loaded successfully


In [18]:
# import psycopg2
# import pandas as pd

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM voters", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()
# gives 1128 rows for 2020

     Year State  Population  Registered   Voted  Norm_Voter_Reg   
0    2020    AL      3769.0        2527    2247        0.889197  \
1    2020    AK       528.0         383     330        0.861619   
2    2020    AZ      5638.0        3878    3649        0.940949   
3    2020    AR      2283.0        1361    1186        0.871418   
4    2020    CA     30342.0       18001   16893        0.938448   
..    ...   ...         ...         ...     ...             ...   
99   2018    WA      5775.0        3852    3234        0.839564   
100  2018    WV      1406.0         892     610        0.683857   
101  2018    WI      4436.0        3129    2776        0.887184   
102  2018    WY       430.0         268     220        0.820896   
103  2018    US    249748.0      153066  122281        0.798878   

     Norm_Voter_Pop  
0          0.596179  
1          0.625000  
2          0.647215  
3          0.519492  
4          0.556753  
..              ...  
99         0.560000  
100        0.433855