In [11]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

import psycopg2
from sqlalchemy import create_engine

https://www.census.gov/topics/public-sector/voting/data/tables.2010.List_1863097513.html#list-tab-List_1863097513

In [12]:
# read in the Excel file and specify the sheet name
df_votes_2014 = pd.read_excel('Election_Results/vote2014.xls', sheet_name='Table 4a', header=[4])

# display the resulting dataframe
df_votes_2014

Unnamed: 0.1,Unnamed: 0,Unnamed: 1,Unnamed: 2,Total registered,Percent registered\n(Total),Margin of Error 1,Percent registered\n(Citizen),Margin of Error,Total voted,Percent voted\n(Total),Margin of Error .1,Percent voted\n(Citizen),Margin of Error .2
0,UNITED STATES,239874.0,219941.0,142166.0,59.3,0.3,64.6,0.3,92251.0,38.5,0.3,41.9,0.3
1,ALABAMA,3656.0,3519.0,2366.0,64.7,2.4,67.2,2.4,1497.0,40.9,2.4,42.5,2.5
2,ALASKA,521.0,503.0,336.0,64.6,2.5,66.8,2.5,255.0,49.0,2.6,50.7,2.7
3,ARIZONA,4994.0,4397.0,2738.0,54.8,2.1,62.3,2.2,1785.0,35.7,2.1,40.6,2.2
4,ARKANSAS,2189.0,2057.0,1292.0,59.0,2.5,62.8,2.6,798.0,36.5,2.5,38.8,2.6
5,CALIFORNIA,29030.0,24455.0,14113.0,48.6,0.9,57.7,1.0,8949.0,30.8,0.8,36.6,0.9
6,COLORADO,4009.0,3732.0,2654.0,66.2,2.3,71.1,2.3,2211.0,55.2,2.4,59.3,2.4
7,CONNECTICUT,2783.0,2560.0,1668.0,59.9,2.4,65.1,2.5,1195.0,43.0,2.5,46.7,2.6
8,DELAWARE,712.0,661.0,442.0,62.0,2.4,66.8,2.5,299.0,42.0,2.5,45.2,2.6
9,DISTRICT OF COLUMBIA,538.0,480.0,353.0,65.6,2.4,73.5,2.4,247.0,45.8,2.6,51.4,2.7


In [13]:
# select the columns wanted and rename them
df_selection = df_votes_2014[['Unnamed: 0', 'Unnamed: 1', 'Total registered', 'Total voted']]
df_votes = df_selection.rename(columns={
    'Unnamed: 0': 'State',
    'Unnamed: 1': 'Population',
    'Total registered': 'Registered',
    'Total voted': 'Voted'
})
# add year and office columns
df_votes['Year'] = 2014

# Arrange columns: Move the Year and Office columns to the front
new_cols = ['Year', 'State', 'Population', 'Registered', 'Voted']

# df_votes = df_votes.iloc[1:]  # remove the first row
df_votes = df_votes.iloc[:-3] # removes last 3 rows of text

# Save the first row as a separate DataFrame
first_row = df_votes.iloc[[0]].copy()

# Remove the first row from the original DataFrame
df_votes = df_votes.iloc[1:].copy()

# Append the first row to the end of the DataFrame
df_votes = pd.concat([df_votes, first_row])

df_votes = df_votes.reset_index(drop=True)  # reset the index
df_votes = df_votes.reindex(columns=new_cols) # set year column in front

df_votes
# values are in the thousands

Unnamed: 0,Year,State,Population,Registered,Voted
0,2014,ALABAMA,3656.0,2366.0,1497.0
1,2014,ALASKA,521.0,336.0,255.0
2,2014,ARIZONA,4994.0,2738.0,1785.0
3,2014,ARKANSAS,2189.0,1292.0,798.0
4,2014,CALIFORNIA,29030.0,14113.0,8949.0
5,2014,COLORADO,4009.0,2654.0,2211.0
6,2014,CONNECTICUT,2783.0,1668.0,1195.0
7,2014,DELAWARE,712.0,442.0,299.0
8,2014,DISTRICT OF COLUMBIA,538.0,353.0,247.0
9,2014,FLORIDA,15414.0,8691.0,6220.0


In [14]:
state_abbr = {
    'ALABAMA': 'AL',
    'ALASKA': 'AK',
    'ARIZONA': 'AZ',
    'ARKANSAS': 'AR',
    'CALIFORNIA': 'CA',
    'COLORADO': 'CO',
    'CONNECTICUT': 'CT',
    'DELAWARE': 'DE',
    'DISTRICT OF COLUMBIA': 'DC',
    'FLORIDA': 'FL',
    'GEORGIA': 'GA',
    'HAWAII': 'HI',
    'IDAHO': 'ID',
    'ILLINOIS': 'IL',
    'INDIANA': 'IN',
    'IOWA': 'IA',
    'KANSAS': 'KS',
    'KENTUCKY': 'KY',
    'LOUISIANA': 'LA',
    'MAINE': 'ME',
    'MARYLAND': 'MD',
    'MASSACHUSETTS': 'MA',
    'MICHIGAN': 'MI',
    'MINNESOTA': 'MN',
    'MISSISSIPPI': 'MS',
    'MISSOURI': 'MO',
    'MONTANA': 'MT',
    'NEBRASKA': 'NE',
    'NEVADA': 'NV',
    'NEW HAMPSHIRE': 'NH',
    'NEW JERSEY': 'NJ',
    'NEW MEXICO': 'NM',
    'NEW YORK': 'NY',
    'NORTH CAROLINA': 'NC',
    'NORTH DAKOTA': 'ND',
    'OHIO': 'OH',
    'OKLAHOMA': 'OK',
    'OREGON': 'OR',
    'PENNSYLVANIA': 'PA',
    'RHODE ISLAND': 'RI',
    'SOUTH CAROLINA': 'SC',
    'SOUTH DAKOTA': 'SD',
    'TENNESSEE': 'TN',
    'TEXAS': 'TX',
    'UTAH': 'UT',
    'VERMONT': 'VT',
    'VIRGINIA': 'VA',
    'WASHINGTON': 'WA',
    'WEST VIRGINIA': 'WV',
    'WISCONSIN': 'WI',
    'WYOMING': 'WY',
    'UNITED STATES': 'US'
}



In [15]:
df_votes['State'] = df_votes['State'].map(state_abbr)
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted
0,2014,AL,3656.0,2366.0,1497.0
1,2014,AK,521.0,336.0,255.0
2,2014,AZ,4994.0,2738.0,1785.0
3,2014,AR,2189.0,1292.0,798.0
4,2014,CA,29030.0,14113.0,8949.0
5,2014,CO,4009.0,2654.0,2211.0
6,2014,CT,2783.0,1668.0,1195.0
7,2014,DE,712.0,442.0,299.0
8,2014,DC,538.0,353.0,247.0
9,2014,FL,15414.0,8691.0,6220.0


In [16]:
# Calculate the normalized voter count
df_votes['Norm_Voter_Reg'] = df_votes['Voted'] / df_votes['Registered']
df_votes['Norm_Voter_Pop'] = df_votes['Voted'] / df_votes['Population']
df_votes

Unnamed: 0,Year,State,Population,Registered,Voted,Norm_Voter_Reg,Norm_Voter_Pop
0,2014,AL,3656.0,2366.0,1497.0,0.632713,0.409464
1,2014,AK,521.0,336.0,255.0,0.758929,0.489443
2,2014,AZ,4994.0,2738.0,1785.0,0.651936,0.357429
3,2014,AR,2189.0,1292.0,798.0,0.617647,0.36455
4,2014,CA,29030.0,14113.0,8949.0,0.634096,0.308267
5,2014,CO,4009.0,2654.0,2211.0,0.833082,0.551509
6,2014,CT,2783.0,1668.0,1195.0,0.716427,0.429393
7,2014,DE,712.0,442.0,299.0,0.676471,0.419944
8,2014,DC,538.0,353.0,247.0,0.699717,0.459108
9,2014,FL,15414.0,8691.0,6220.0,0.715683,0.403529


# Add to sql db

In [17]:
# import psycopg2
# from sqlalchemy import create_engine

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database using the voters table
df_votes.to_sql("voters", engine, if_exists="append", index=False)
print("Votes table loaded successfully")

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()


Votes table loaded successfully


In [18]:
# import psycopg2
# import pandas as pd

# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM voters", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()


     Year State  Population  Registered  Voted  Norm_Voter_Reg  Norm_Voter_Pop
0    2020    AL      3769.0        2527   2247        0.889197        0.596179
1    2020    AK       528.0         383    330        0.861619        0.625000
2    2020    AZ      5638.0        3878   3649        0.940949        0.647215
3    2020    AR      2283.0        1361   1186        0.871418        0.519492
4    2020    CA     30342.0       18001  16893        0.938448        0.556753
..    ...   ...         ...         ...    ...             ...             ...
203  2014    WA      5333.0        3281   2390        0.728436        0.448153
204  2014    WV      1449.0         896    484        0.540179        0.334023
205  2014    WI      4356.0        3005   2343        0.779700        0.537879
206  2014    WY       441.0         241    172        0.713693        0.390023
207  2014    US    239874.0      142166  92251        0.648896        0.384581

[208 rows x 7 columns]
