In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

import psycopg2
from sqlalchemy import create_engine

https://www.census.gov/topics/public-sector/voting/data/tables.2010.List_1863097513.html#list-tab-List_1863097513

In [2]:
# read in the Excel file and specify the sheet name
df_votes_2010 = pd.read_excel('Election_Results/vote2010.xls', sheet_name='Table 4a', header=[3])

# display the resulting dataframe
df_votes_2010

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Total registered,Percent registered\n(Total),Margin of Error 1,Percent registered\n(Citizen),Margin of Error 1.1,Total voted,Percent voted\n(Total),Margin of Error 1.2,Percent voted\n(Citizen),Margin of Error 1.3
0,UNITED STATES,229690.0,210800.0,137263.0,59.8,0.3,65.1,0.3,95987.0,41.8,0.3,45.5,0.3
1,ALABAMA,3526.0,3420.0,2224.0,63.1,2.4,65.0,2.4,1509.0,42.8,2.5,44.1,2.5
2,ALASKA,498.0,478.0,317.0,63.8,2.5,66.4,2.6,242.0,48.6,2.6,50.7,2.7
3,ARIZONA,4831.0,4443.0,2934.0,60.7,2.1,66.0,2.1,2170.0,44.9,2.1,48.8,2.3
4,ARKANSAS,2140.0,2068.0,1256.0,58.7,2.5,60.7,2.5,841.0,39.3,2.5,40.7,2.5
5,CALIFORNIA,27381.0,22767.0,13864.0,50.6,0.9,60.9,1.0,10725.0,39.2,0.9,47.1,1.0
6,COLORADO,3768.0,3473.0,2299.0,61.0,2.4,66.2,2.4,1826.0,48.4,2.5,52.6,2.6
7,CONNECTICUT,2648.0,2463.0,1641.0,62.0,2.5,66.6,2.6,1215.0,45.9,2.6,49.3,2.7
8,DELAWARE,667.0,634.0,418.0,62.6,2.5,65.9,2.5,323.0,48.5,2.6,51.0,2.7
9,DISTRICT OF COLUMBIA,489.0,441.0,295.0,60.3,2.7,66.9,2.7,200.0,40.8,2.7,45.3,2.9


In [3]:
# select the columns wanted and rename them
df_selection = df_votes_2010[['Unnamed: 0', 'Unnamed: 1', 'Total registered', 'Total voted']]
df_votes = df_selection.rename(columns={
    'Unnamed: 0': 'State',
    'Unnamed: 1': 'Population',
    'Total registered': 'Registered',
    'Total voted': 'Voted'
})
# add year and office columns
df_votes['Year'] = 2010

# Arrange columns: Move the Year and Office columns to the front
new_cols = ['Year', 'State', 'Population', 'Registered', 'Voted']

# df_votes = df_votes.iloc[1:]  # remove the first row
df_votes = df_votes.iloc[:-4] # removes last 7 rows of text

# Save the first row as a separate DataFrame
first_row = df_votes.iloc[[0]].copy()

# Remove the first row from the original DataFrame
df_votes = df_votes.iloc[1:].copy()

# Append the first row to the end of the DataFrame
df_votes = pd.concat([df_votes, first_row])

df_votes = df_votes.reset_index(drop=True)  # reset the index
df_votes = df_votes.reindex(columns=new_cols) # set year column in front

df_votes
# values are in the thousands

Unnamed: 0,Year,State,Population,Registered,Voted
0,2010,ALABAMA,3526.0,2224.0,1509.0
1,2010,ALASKA,498.0,317.0,242.0
2,2010,ARIZONA,4831.0,2934.0,2170.0
3,2010,ARKANSAS,2140.0,1256.0,841.0
4,2010,CALIFORNIA,27381.0,13864.0,10725.0
5,2010,COLORADO,3768.0,2299.0,1826.0
6,2010,CONNECTICUT,2648.0,1641.0,1215.0
7,2010,DELAWARE,667.0,418.0,323.0
8,2010,DISTRICT OF COLUMBIA,489.0,295.0,200.0
9,2010,FLORIDA,14227.0,7994.0,5572.0


In [4]:
state_abbr = {
    'ALABAMA': 'AL',
    'ALASKA': 'AK',
    'ARIZONA': 'AZ',
    'ARKANSAS': 'AR',
    'CALIFORNIA': 'CA',
    'COLORADO': 'CO',
    'CONNECTICUT': 'CT',
    'DELAWARE': 'DE',
    'DISTRICT OF COLUMBIA': 'DC',
    'FLORIDA': 'FL',
    'GEORGIA': 'GA',
    'HAWAII': 'HI',
    'IDAHO': 'ID',
    'ILLINOIS': 'IL',
    'INDIANA': 'IN',
    'IOWA': 'IA',
    'KANSAS': 'KS',
    'KENTUCKY': 'KY',
    'LOUISIANA': 'LA',
    'MAINE': 'ME',
    'MARYLAND': 'MD',
    'MASSACHUSETTS': 'MA',
    'MICHIGAN': 'MI',
    'MINNESOTA': 'MN',
    'MISSISSIPPI': 'MS',
    'MISSOURI': 'MO',
    'MONTANA': 'MT',
    'NEBRASKA': 'NE',
    'NEVADA': 'NV',
    'NEW HAMPSHIRE': 'NH',
    'NEW JERSEY': 'NJ',
    'NEW MEXICO': 'NM',
    'NEW YORK': 'NY',
    'NORTH CAROLINA': 'NC',
    'NORTH DAKOTA': 'ND',
    'OHIO': 'OH',
    'OKLAHOMA': 'OK',
    'OREGON': 'OR',
    'PENNSYLVANIA': 'PA',
    'RHODE ISLAND': 'RI',
    'SOUTH CAROLINA': 'SC',
    'SOUTH DAKOTA': 'SD',
    'TENNESSEE': 'TN',
    'TEXAS': 'TX',
    'UTAH': 'UT',
    'VERMONT': 'VT',
    'VIRGINIA': 'VA',
    'WASHINGTON': 'WA',
    'WEST VIRGINIA': 'WV',
    'WISCONSIN': 'WI',
    'WYOMING': 'WY',
    'UNITED STATES': 'US'
}



In [5]:
df_votes['State'] = df_votes['State'].map(state_abbr)
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted
0,2010,AL,3526.0,2224.0,1509.0
1,2010,AK,498.0,317.0,242.0
2,2010,AZ,4831.0,2934.0,2170.0
3,2010,AR,2140.0,1256.0,841.0
4,2010,CA,27381.0,13864.0,10725.0
5,2010,CO,3768.0,2299.0,1826.0
6,2010,CT,2648.0,1641.0,1215.0
7,2010,DE,667.0,418.0,323.0
8,2010,DC,489.0,295.0,200.0
9,2010,FL,14227.0,7994.0,5572.0


In [6]:
# Calculate the normalized voter count
df_votes['Norm_Voter_Reg'] = df_votes['Voted'] / df_votes['Registered']
df_votes['Norm_Voter_Pop'] = df_votes['Voted'] / df_votes['Population']
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted,Norm_Voter_Reg,Norm_Voter_Pop
0,2010,AL,3526.0,2224.0,1509.0,0.678507,0.427964
1,2010,AK,498.0,317.0,242.0,0.763407,0.485944
2,2010,AZ,4831.0,2934.0,2170.0,0.739605,0.449182
3,2010,AR,2140.0,1256.0,841.0,0.669586,0.392991
4,2010,CA,27381.0,13864.0,10725.0,0.773586,0.391695
5,2010,CO,3768.0,2299.0,1826.0,0.794258,0.484607
6,2010,CT,2648.0,1641.0,1215.0,0.740402,0.458837
7,2010,DE,667.0,418.0,323.0,0.772727,0.484258
8,2010,DC,489.0,295.0,200.0,0.677966,0.408998
9,2010,FL,14227.0,7994.0,5572.0,0.697023,0.39165


# Add to sql db

In [7]:
# import psycopg2
# from sqlalchemy import create_engine

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database using the voters table
df_votes.to_sql("voters", engine, if_exists="append", index=False)
print("Votes table loaded successfully")

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()


Votes table loaded successfully


In [8]:
# import psycopg2
# import pandas as pd

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM voters", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()
# gives 364 rows for 2020 to 2010 and 7 columns

     Year State  Population  Registered  Voted  Norm_Voter_Reg  Norm_Voter_Pop
0    2020    AL      3769.0        2527   2247        0.889197        0.596179
1    2020    AK       528.0         383    330        0.861619        0.625000
2    2020    AZ      5638.0        3878   3649        0.940949        0.647215
3    2020    AR      2283.0        1361   1186        0.871418        0.519492
4    2020    CA     30342.0       18001  16893        0.938448        0.556753
..    ...   ...         ...         ...    ...             ...             ...
255  2010    WA      5095.0        3367   2692        0.799525        0.528361
256  2010    WV      1420.0         883    581        0.657984        0.409155
257  2010    WI      4291.0        2908   2258        0.776479        0.526218
258  2010    WY       411.0         239    190        0.794979        0.462287
259  2010    US    229690.0      137263  95987        0.699293        0.417898

[260 rows x 7 columns]
