# 2010 Election Data
### Non-Presidential election

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=Warning)
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from mpl_toolkits.basemap import Basemap
import cartopy.crs as ccrs
import seaborn as sns
import plotly.express as px
import plotly.graph_objects as go
import psycopg2
from sqlalchemy import create_engine
import xlrd

import cartopy.io.shapereader as shpreader
from matplotlib.patches import Polygon
from matplotlib.collections import PatchCollection
import matplotlib.patches as mpatches
import geopandas as gpd

In [2]:
"""
Extract multiple sheets from the Excel file and load them into Pandas data frames. 
A dictionary allows you to store each data frame under a unique key (the sheet name), 
making it easy to access and work with each data frame later.
"""
# "C:\Users\Owner\Documents\UC_Irvine\STATS170A\FinalProject\Election_Results\results2010.xls"
# Define the file path to the Excel file
file_path = 'Election_Results/results2010.xls'

# Define a list of sheet names to extract
sheet_names = ['2010 US House & Senate Results']

# Create an empty dictionary to store the data frames
data_frames = {}

# Loop over the sheet names and read them into data frames
for name in sheet_names:
    data_frames[name] = pd.read_excel(file_path, sheet_name=name)

# # Print the data frames to verify that they were loaded correctly
# for name, df in data_frames.items():
#     print(f"{name}:")
#     print(df.head())


In [3]:
df_congress_2010 = data_frames['2010 US House & Senate Results']
df_congress_2010.head()
# 537 rows 14 columns

Unnamed: 0,1,STATE,STATE ABBREVIATION,DISTRICT,FEC ID#,INCUMBENT INDICATOR (I),CANDIDATE NAME (First),CANDIDATE NAME (Last),"CANDIDATE NAME (Last, First)",TOTAL VOTES,...,PRIMARY,PRIMARY %,RUNOFF,RUNOFF %,GENERAL,GENERAL %,"COMBINED GE PARTY TOTALS (CT, NY, SC)","COMBINED % (CT, NY, SC)",FOOTNOTES,Unnamed: 20
0,2,Alabama,AL,1,H2AL01077,(I),Jo,Bonner,"Bonner, Jo",,...,56937.0,0.752518,,,129063.0,0.825839,,,,
1,3,Alabama,AL,1,H0AL01030,,Peter,Gounares,"Gounares, Peter",,...,18725.0,0.247482,,,,,,,,
2,4,Alabama,AL,1,,,,,,Party Votes:,...,75662.0,,,,,,,,,
3,5,Alabama,AL,1,H0AL01048,,David,Walter,"Walter, David",,...,,,,,26357.0,0.168651,,,,
4,6,Alabama,AL,1,,,,Scattered,Scattered,,...,,,,,861.0,0.005509,,,,


In [4]:
df_selection = df_congress_2010[['STATE ABBREVIATION', 'DISTRICT', 'CANDIDATE NAME (Last)', 'PARTY', 'GENERAL %', 'GENERAL ']]
df_con_2010 = df_selection.rename(columns={
    'DISTRICT': 'Office',
    'STATE ABBREVIATION': 'State',
    'CANDIDATE NAME (Last)': 'Last Name',
    'PARTY': 'Party',
    'GENERAL %': 'Vote %',
    'GENERAL ': 'Vote count'
})
df_con_2010.head()

Unnamed: 0,State,Office,Last Name,Party,Vote %,Vote count
0,AL,1,Bonner,REP,0.825839,129063.0
1,AL,1,Gounares,REP,,
2,AL,1,,REP,,
3,AL,1,Walter,CPA,0.168651,26357.0
4,AL,1,Scattered,W,0.005509,861.0


In [5]:
# Remove any unseen NaN's from num rows.
df_c_2010 = df_con_2010.dropna(subset=['Vote %'])
df_c_2010 = df_c_2010.dropna(subset=['Vote count'])
# confirms NaN's are gone
print(df_c_2010[df_c_2010['Vote %'].isna()])
print(df_c_2010[df_c_2010['Vote count'].isna()])

Empty DataFrame
Columns: [State, Office, Last Name, Party, Vote %, Vote count]
Index: []
Empty DataFrame
Columns: [State, Office, Last Name, Party, Vote %, Vote count]
Index: []


In [6]:
df_c_2010.tail()

Unnamed: 0,State,Office,Last Name,Party,Vote %,Vote count
5248,WI,S,LaForest,W,5.9e-05,129
5255,WY,00,Lummis,REP,0.704186,131661
5259,WY,00,Wendt,DEM,0.244789,45768
5262,WY,00,Love,LIB,0.049489,9253
5263,WY,00,Scattered,W,0.001535,287


In [7]:
df_c_2010.dtypes

State          object
Office         object
Last Name      object
Party          object
Vote %        float64
Vote count     object
dtype: object

In [8]:
df_c_2010['Office'] = df_c_2010['Office'].astype(str)
df_c_2010['Office'] = df_c_2010['Office'].apply(lambda x: 'House' if x.isdigit() else 'Senate')
df_c_2010.tail()

Unnamed: 0,State,Office,Last Name,Party,Vote %,Vote count
5248,WI,Senate,LaForest,W,5.9e-05,129
5255,WY,House,Lummis,REP,0.704186,131661
5259,WY,House,Wendt,DEM,0.244789,45768
5262,WY,House,Love,LIB,0.049489,9253
5263,WY,House,Scattered,W,0.001535,287


In [9]:
df_c_2010.dtypes

State          object
Office         object
Last Name      object
Party          object
Vote %        float64
Vote count     object
dtype: object

In [10]:
# add year and office columns
df_c_2010['Year'] = 2010

# Move the Year and Office columns to the front
new_cols = ['Year', 'Office', 'State', 'Last Name', 'Party', 'Vote %', 'Vote count']
df_c_2010 = df_c_2010.reindex(columns=new_cols)

# Include only Candidates R, D, IND (independent)
df_c_2010 = df_c_2010[df_c_2010['Party'].isin(['REP', 'DEM', 'IND'])]

df_c_2010.tail()

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
5226,2010,House,WI,Kagen,DEM,0.451232,118646
5237,2010,Senate,WI,Johnson,REP,0.518575,1125999
5243,2010,Senate,WI,Feingold,DEM,0.470199,1020958
5255,2010,House,WY,Lummis,REP,0.704186,131661
5259,2010,House,WY,Wendt,DEM,0.244789,45768


In [11]:
df_c_2010.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count     object
dtype: object

In [12]:
# convert Vote count to float for math
df_c_2010['Vote count'] = df_c_2010['Vote count'].astype(int)
df_c_2010.dtypes

Year            int64
Office         object
State          object
Last Name      object
Party          object
Vote %        float64
Vote count      int32
dtype: object

In [15]:
df_c_2010.head(10)
#gives 979 rows

Unnamed: 0,Year,Office,State,Last Name,Party,Vote %,Vote count
0,2010,House,AL,Bonner,REP,0.825839,129063
7,2010,House,AL,Roby,REP,0.509729,111645
12,2010,House,AL,Bright,DEM,0.487906,106865
16,2010,House,AL,Rogers,REP,0.594209,117736
17,2010,House,AL,Segrest,DEM,0.404787,80204
21,2010,House,AL,Aderholt,REP,0.988175,167714
25,2010,House,AL,Brooks,REP,0.578873,131109
29,2010,House,AL,Raby,DEM,0.420292,95192
37,2010,House,AL,Bachus,REP,0.980532,205288
43,2010,House,AL,Sewell,DEM,0.724317,136696


# Add to database

In [16]:
# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to your PostgreSQL database
conn = psycopg2.connect(
    database=DATABASE,
    user=USER,
    password=PASSWORD,
    host=HOST,
    port=PORT
)

# Create a sqlalchemy engine
engine = create_engine(f"postgresql://{USER}:{PASSWORD}@{HOST}:{PORT}/{DATABASE}")

# Upload the data frames to the database
df_c_2010.to_sql("results", engine, if_exists="append", index=False)

# Commit the changes to the database
conn.commit()

# Close the database connection
conn.close()


In [17]:
# Replace the values below with your database credentials
DATABASE = "electiondb"
USER = "postgres"
PASSWORD = "YourPassword"
HOST = "localhost"
PORT = "5432"

# Connect to the PostgreSQL database
conn = psycopg2.connect(database=DATABASE, user=USER, password=PASSWORD, host=HOST, port=PORT)

# Query the table and store the results in a Pandas dataframe
df = pd.read_sql_query("SELECT * FROM results", conn)

# Print the dataframe to verify that it contains data
print(df)

# Close the database connection
conn.close()
# gives total 5237 rows for 2020-2010 (6112)

      Year  Office State  Last Name Party    Vote %  Vote count
0     2020    Pres    AL      Trump   REP  0.620316     1441170
1     2020    Pres    AL      Biden   DEM  0.365700      849624
2     2020    Pres    AL  Jorgensen   IND  0.010836       25176
3     2020    Pres    AK      Trump   REP  0.528331      189951
4     2020    Pres    AK      Biden   DEM  0.427720      153778
...    ...     ...   ...        ...   ...       ...         ...
5232  2010   House    WI      Kagen   DEM  0.451232      118646
5233  2010  Senate    WI    Johnson   REP  0.518575     1125999
5234  2010  Senate    WI   Feingold   DEM  0.470199     1020958
5235  2010   House    WY     Lummis   REP  0.704186      131661
5236  2010   House    WY      Wendt   DEM  0.244789       45768

[5237 rows x 7 columns]


## Postgres upload to GitHub

In [None]:
'''
Dump your PostgreSQL database to a file using the pg_dump command. 
You can do this from the command line
This command will create a backup of your database and save it to a 
file called database_backup.sql.
Directory location # \! cd
C:\Program Files\PostgreSQL\12\scripts
'''
pg_dump -U your_username your_database_name > database_backup.sql


In [None]:
# Commit the changes to your local git repository:
git commit -m "Initial commit"


In [None]:
# Add the remote repository to your local git repository:
git remote add origin git@github.com:your-username/your-repository-name.git


In [None]:
# Push your changes to GitHub:
git push -u origin master