In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

import psycopg2
from sqlalchemy import create_engine

https://www.census.gov/topics/public-sector/voting/data/tables.2010.List_1863097513.html#list-tab-List_1863097513

In [2]:
# read in the Excel file and specify the sheet name
df_votes_2020 = pd.read_excel('Election_Results/vote2020.xlsx', sheet_name='Table 4a', header=[4])

# display the resulting dataframe
df_votes_2020.head()

Table 4a


Unnamed: 0,STATE,Total population,Total citizen population,Registered,Unnamed: 4,Unnamed: 5,Unnamed: 6,Unnamed: 7,Voted,Unnamed: 9,Unnamed: 10,Unnamed: 11,Unnamed: 12
0,,,,Total registered,Percent registered\n(Total),Margin of error 1,Percent registered\n(Citizen),Margin of error 1,Total voted,Percent voted\n(Total),Margin of error 1,Percent voted\n(Citizen),Margin of error 1
1,UNITED STATES,252274.0,231593.0,168308,66.7,0.4,72.7,0.4,154628,61.3,0.4,66.8,0.4
2,ALABAMA,3769.0,3716.0,2527,67,3.1,68,3.1,2247,59.6,3.3,60.5,3.3
3,ALASKA,528.0,516.0,383,72.6,3.2,74.2,3.1,330,62.4,3.4,63.8,3.4
4,ARIZONA,5638.0,5075.0,3878,68.8,2.5,76.4,2.5,3649,64.7,2.6,71.9,2.6


In [3]:
# select the columns wanted and rename them
df_selection = df_votes_2020[['STATE', 'Total population', 'Registered', 'Voted']]
df_votes = df_selection.rename(columns={
    'STATE': 'State',
    'Total population': 'Population',
    'Registered': 'Registered'
})
# add year and office columns
df_votes['Year'] = 2020

# Arrange columns: Move the Year and Office columns to the front
new_cols = ['Year', 'State', 'Population', 'Registered', 'Voted']

df_votes = df_votes.iloc[1:]  # remove the first row
df_votes = df_votes.iloc[:-7] # removes last 7 rows of text

# Save the first row as a separate DataFrame
first_row = df_votes.iloc[[0]].copy()

# Remove the first row from the original DataFrame
df_votes = df_votes.iloc[1:].copy()

# Append the first row to the end of the DataFrame
df_votes = pd.concat([df_votes, first_row])

df_votes = df_votes.reset_index(drop=True)  # reset the index
df_votes = df_votes.reindex(columns=new_cols) # set year column in front

df_votes
# values are in the thousands

Unnamed: 0,Year,State,Population,Registered,Voted
0,2020,ALABAMA,3769.0,2527,2247
1,2020,ALASKA,528.0,383,330
2,2020,ARIZONA,5638.0,3878,3649
3,2020,ARKANSAS,2283.0,1361,1186
4,2020,CALIFORNIA,30342.0,18001,16893
5,2020,COLORADO,4525.0,2993,2837
6,2020,CONNECTICUT,2777.0,1850,1681
7,2020,DELAWARE,766.0,542,489
8,2020,DISTRICT OF COLUMBIA,576.0,464,448
9,2020,FLORIDA,17244.0,10495,9720


In [4]:
state_abbr = {
    'ALABAMA': 'AL',
    'ALASKA': 'AK',
    'ARIZONA': 'AZ',
    'ARKANSAS': 'AR',
    'CALIFORNIA': 'CA',
    'COLORADO': 'CO',
    'CONNECTICUT': 'CT',
    'DELAWARE': 'DE',
    'DISTRICT OF COLUMBIA': 'DC',
    'FLORIDA': 'FL',
    'GEORGIA': 'GA',
    'HAWAII': 'HI',
    'IDAHO': 'ID',
    'ILLINOIS': 'IL',
    'INDIANA': 'IN',
    'IOWA': 'IA',
    'KANSAS': 'KS',
    'KENTUCKY': 'KY',
    'LOUISIANA': 'LA',
    'MAINE': 'ME',
    'MARYLAND': 'MD',
    'MASSACHUSETTS': 'MA',
    'MICHIGAN': 'MI',
    'MINNESOTA': 'MN',
    'MISSISSIPPI': 'MS',
    'MISSOURI': 'MO',
    'MONTANA': 'MT',
    'NEBRASKA': 'NE',
    'NEVADA': 'NV',
    'NEW HAMPSHIRE': 'NH',
    'NEW JERSEY': 'NJ',
    'NEW MEXICO': 'NM',
    'NEW YORK': 'NY',
    'NORTH CAROLINA': 'NC',
    'NORTH DAKOTA': 'ND',
    'OHIO': 'OH',
    'OKLAHOMA': 'OK',
    'OREGON': 'OR',
    'PENNSYLVANIA': 'PA',
    'RHODE ISLAND': 'RI',
    'SOUTH CAROLINA': 'SC',
    'SOUTH DAKOTA': 'SD',
    'TENNESSEE': 'TN',
    'TEXAS': 'TX',
    'UTAH': 'UT',
    'VERMONT': 'VT',
    'VIRGINIA': 'VA',
    'WASHINGTON': 'WA',
    'WEST VIRGINIA': 'WV',
    'WISCONSIN': 'WI',
    'WYOMING': 'WY',
    'UNITED STATES': 'US'
}



In [5]:
df_votes['State'] = df_votes['State'].map(state_abbr)
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted
0,2020,AL,3769.0,2527,2247
1,2020,AK,528.0,383,330
2,2020,AZ,5638.0,3878,3649
3,2020,AR,2283.0,1361,1186
4,2020,CA,30342.0,18001,16893
5,2020,CO,4525.0,2993,2837
6,2020,CT,2777.0,1850,1681
7,2020,DE,766.0,542,489
8,2020,DC,576.0,464,448
9,2020,FL,17244.0,10495,9720


In [6]:
# Calculate the normalized voter count
df_votes['Norm_Voter_Reg'] = df_votes['Voted'] / df_votes['Registered']
df_votes['Norm_Voter_Pop'] = df_votes['Voted'] / df_votes['Population']
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted,Norm_Voter_Reg,Norm_Voter_Pop
0,2020,AL,3769.0,2527,2247,0.889197,0.596179
1,2020,AK,528.0,383,330,0.861619,0.625
2,2020,AZ,5638.0,3878,3649,0.940949,0.647215
3,2020,AR,2283.0,1361,1186,0.871418,0.519492
4,2020,CA,30342.0,18001,16893,0.938448,0.556753
5,2020,CO,4525.0,2993,2837,0.947878,0.626961
6,2020,CT,2777.0,1850,1681,0.908649,0.605329
7,2020,DE,766.0,542,489,0.902214,0.638381
8,2020,DC,576.0,464,448,0.965517,0.777778
9,2020,FL,17244.0,10495,9720,0.926155,0.563674


# Add to sql db

In [7]:
# import psycopg2
# from sqlalchemy import create_engine

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database using the results table
df_votes_2020.to_sql("voters", engine, if_exists="replace", index=False)
print("Voters table loaded successfully")

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()


Voters table loaded successfully


In [8]:
# import psycopg2
# import pandas as pd

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM voters", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()


    Year State  Population  Registered   Voted  Norm_Voter_Reg  Norm_Voter_Pop
0   2020    AL      3769.0        2527    2247        0.889197        0.596179
1   2020    AK       528.0         383     330        0.861619        0.625000
2   2020    AZ      5638.0        3878    3649        0.940949        0.647215
3   2020    AR      2283.0        1361    1186        0.871418        0.519492
4   2020    CA     30342.0       18001   16893        0.938448        0.556753
5   2020    CO      4525.0        2993    2837        0.947878        0.626961
6   2020    CT      2777.0        1850    1681        0.908649        0.605329
7   2020    DE       766.0         542     489        0.902214        0.638381
8   2020    DC       576.0         464     448        0.965517        0.777778
9   2020    FL     17244.0       10495    9720        0.926155        0.563674
10  2020    GA      8032.0        5233    4888        0.934072        0.608566
11  2020    HI      1056.0         673     630      