In [61]:
# Python SQL toolkit and Object Relational Mapper
import sqlalchemy
import sqlalchemy as db
from sqlalchemy.ext.automap import automap_base
from sqlalchemy.orm import Session
from sqlalchemy import create_engine, inspect
# Dependencies
import pandas as pd
import numpy as np
import psycopg2

In [62]:
# Create engine using the `demographics.sqlite` database file
engine = create_engine("sqlite:///data/election/database.sqlite")

In [63]:
# Declare a Base using `automap_base()`
Base = automap_base()

In [64]:
# Use the Base class to reflect the database tables
Base.prepare(engine, reflect=True)

In [65]:
# Print all of the classes mapped to the Base
Base.classes.keys()

['county_facts']

In [66]:
# Create the inspector and connect it to the engine
inspector = inspect(engine)

In [67]:
# Collect the names of tables within the database
inspector.get_table_names()

['county_facts', 'county_facts_dictionary', 'primary_results']

In [68]:
# Assign the demographics class to a variable called `Demographics`
county_facts = Base.classes.county_facts

In [69]:
# Using the inspector to print the column names within the 'Salaries' table and its types
columns = inspector.get_columns('primary_results')
for column in columns:
    print(column["name"], column["type"])

state TEXT
state_abbreviation TEXT
county TEXT
fips INTEGER
party TEXT
candidate TEXT
votes INTEGER
fraction_votes NUMERIC


In [70]:
# Create a session
session = Session(engine)

In [71]:
engine.execute("SELECT * FROM primary_results").fetchall()


[('Alabama', 'AL', 'Autauga', 1001, 'Democrat', 'Bernie Sanders', 544, 0.182),
 ('Alabama', 'AL', 'Autauga', 1001, 'Democrat', 'Hillary Clinton', 2387, 0.8),
 ('Alabama', 'AL', 'Baldwin', 1003, 'Democrat', 'Bernie Sanders', 2694, 0.32899999999999996),
 ('Alabama', 'AL', 'Baldwin', 1003, 'Democrat', 'Hillary Clinton', 5290, 0.647),
 ('Alabama', 'AL', 'Barbour', 1005, 'Democrat', 'Bernie Sanders', 222, 0.078),
 ('Alabama', 'AL', 'Barbour', 1005, 'Democrat', 'Hillary Clinton', 2567, 0.9059999999999999),
 ('Alabama', 'AL', 'Bibb', 1007, 'Democrat', 'Bernie Sanders', 246, 0.19699999999999998),
 ('Alabama', 'AL', 'Bibb', 1007, 'Democrat', 'Hillary Clinton', 942, 0.755),
 ('Alabama', 'AL', 'Blount', 1009, 'Democrat', 'Bernie Sanders', 395, 0.386),
 ('Alabama', 'AL', 'Blount', 1009, 'Democrat', 'Hillary Clinton', 564, 0.551),
 ('Alabama', 'AL', 'Bullock', 1011, 'Democrat', 'Bernie Sanders', 178, 0.066),
 ('Alabama', 'AL', 'Bullock', 1011, 'Democrat', 'Hillary Clinton', 2451, 0.9129999999999999

In [83]:
# returns a list of states with incorrect fips associated to record
engine.execute("SELECT distinct(state_abbreviation) FROM primary_results where fips > 99999").fetchall()

[('AK',),
 ('CT',),
 ('IL',),
 ('KS',),
 ('ME',),
 ('MA',),
 ('NH',),
 ('ND',),
 ('RI',),
 ('WY',)]

In [77]:
# returns a list of states with blank fips associated to record
engine.execute("SELECT * FROM primary_results WHERE fips = ''").fetchall()

[('New Hampshire', 'NH', 'Belknap', '', 'Democrat', 'Bernie Sanders', 5990, 0.6318565400843882),
 ('New Hampshire', 'NH', 'Belknap', '', 'Democrat', 'Hillary Clinton', 3490, 0.3681434599156118),
 ('New Hampshire', 'NH', 'Carroll', '', 'Democrat', 'Bernie Sanders', 5655, 0.6364659538548115),
 ('New Hampshire', 'NH', 'Carroll', '', 'Democrat', 'Hillary Clinton', 3230, 0.3635340461451885),
 ('New Hampshire', 'NH', 'Cheshire', '', 'Democrat', 'Bernie Sanders', 12471, 0.7070930430345297),
 ('New Hampshire', 'NH', 'Cheshire', '', 'Democrat', 'Hillary Clinton', 5166, 0.2929069569654703),
 ('New Hampshire', 'NH', 'Coos', '', 'Democrat', 'Bernie Sanders', 3637, 0.643716814159292),
 ('New Hampshire', 'NH', 'Coos', '', 'Democrat', 'Hillary Clinton', 2013, 0.35628318584070795),
 ('New Hampshire', 'NH', 'Grafton', '', 'Democrat', 'Bernie Sanders', 14258, 0.6733094068757084),
 ('New Hampshire', 'NH', 'Grafton', '', 'Democrat', 'Hillary Clinton', 6918, 0.32669059312429166),
 ('New Hampshire', 'NH', '

In [84]:
# 3 types of dataframes need to be created. 1 for good values, 1 for errorneous values, 1 for blank values.

df_election_all = engine.execute("SELECT * FROM primary_results").fetchall()
df_election_blank = engine.execute("SELECT * FROM primary_results WHERE fips = ''").fetchall()
df_election_fips_error = engine.execute("SELECT distinct(state_abbreviation) FROM primary_results where fips > 99999").fetchall()

In [87]:
type(df_election_fips_error)

list

In [59]:
# We imported FIPS CSV spreadsheet into a PostGres Database & created table via sql.
from sqlalchemy import create_engine
engine = create_engine('postgresql://postgres:password@localhost:5432/etlproject')
connection = engine.connect()

df_FIPS = pd.read_sql_query('''SELECT * FROM public."FIPS" where state = 'AR';''', engine)
df_FIPS

Unnamed: 0,zipcode,county,state,fips,classfips
0,72003,Arkansas County,AR,05001,H1
1,72134,Arkansas County,AR,05001,H1
2,72166,Arkansas County,AR,05001,H1
3,72140,Arkansas County,AR,05001,H1
4,72048,Arkansas County,AR,05001,H1
5,72026,Arkansas County,AR,05001,H1
6,72055,Arkansas County,AR,05001,H1
7,72073,Arkansas County,AR,05001,H1
8,72038,Arkansas County,AR,05001,H1
9,72042,Arkansas County,AR,05001,H1
