In [1]:
from sqlalchemy import create_engine
import pandas as pd 
import json

with open('/home/douglas/postgres_credentials.json') as f:
    data = json.load(f)
    username = data['username']
    password = data['password']
    address = data['address']

conn = create_engine('postgresql://{}:{}@{}:5432/ex_election'.format(username, password, address))

  """)


# Tables

Presidential election results per state from 1952 to 2016

In [2]:
pd.read_sql_query("SELECT * FROM election LIMIT 10", conn)

Unnamed: 0,state,democrat_votes,republican_votes,other_votes,year
0,AL,275075,149231,0,1952
1,AR,226300,177155,0,1952
2,AZ,108528,152042,0,1952
3,CA,2257646,3035587,0,1952
4,CO,245504,379782,0,1952
5,CT,481649,611012,0,1952
6,DE,83315,90059,0,1952
7,FL,444950,544036,0,1952
8,GA,456823,198979,0,1952
9,IA,451513,808906,0,1952


In [3]:
pd.read_sql_query("SELECT COUNT(*) FROM election", conn)

Unnamed: 0,count
0,860


In [4]:
pd.read_sql_query("SELECT * FROM candidate LIMIT 10", conn)

Unnamed: 0,year,party,candidate
0,1952,Democratic,"Stevenson, Adlai"
1,1952,Republican,"Eisenhower, Dwight"
2,1956,Democratic,"Stevenson, Adlai"
3,1956,Republican,"Eisenhower, Dwight"
4,1960,Democratic,"Kennedy, Jack (JFK)"
5,1960,Republican,"Nixon, Richard"
6,1964,Democratic,"Johnson, Lyndon (LBJ)"
7,1964,Republican,"Goldwater, Barry"
8,1968,American Independent,"Wallace, George"
9,1968,Democratic,"Humphrey, Hubert"


In [5]:
# unstack rows for another view
pd.read_sql_query('''
SELECT state, democrat_votes AS votes, 'democrat' AS party, year
FROM election 
UNION ALL
SELECT state, republican_votes AS votes, 'republican' AS party, year
FROM election
UNION ALL
SELECT state, other_votes as votes, 'other' AS party, year
FROM election
ORDER BY year, state
''', conn).head()

Unnamed: 0,state,votes,party,year
0,AL,0,other,1952
1,AL,275075,democrat,1952
2,AL,149231,republican,1952
3,AR,0,other,1952
4,AR,226300,democrat,1952


In [6]:
# save as view
conn.execute('''CREATE OR REPLACE VIEW party_votes AS
SELECT state, democrat_votes AS votes, 'democrat' AS party, year
FROM election 
UNION ALL
SELECT state, republican_votes AS votes, 'republican' AS party, year
FROM election
UNION ALL
SELECT state, other_votes as votes, 'other' AS party, year
FROM election
ORDER BY year, state
''')

<sqlalchemy.engine.result.ResultProxy at 0x7facb82af3c8>

In [7]:
# confirm view created 
pd.read_sql_query('''SELECT * FROM party_votes LIMIT 5''', conn)

Unnamed: 0,state,votes,party,year
0,AL,149231,republican,1952
1,AL,0,other,1952
2,AL,275075,democrat,1952
3,AR,226300,democrat,1952
4,AR,177155,republican,1952


# Problems

How many candidates are in the candidate table for the 2000 election?

In [8]:
pd.read_sql_query("SELECT COUNT(*) FROM candidate WHERE year = 2000", conn)

Unnamed: 0,count
0,3


How many candidates are in the candidate table for each election from 1984 to 2016?

In [9]:
pd.read_sql_query('''
SELECT year, count(candidate)
FROM candidate
WHERE year BETWEEN 1984 AND 2016
GROUP BY year''', conn)

Unnamed: 0,year,count
0,1984,2
1,1988,2
2,1992,3
3,1996,3
4,2000,3
5,2008,2
6,2004,2
7,2012,2
8,2016,3


For each election from 1984 to 2016, give the party that won the popular vote (i.e. the most votes, not the most electoral college seats)

In [10]:
pd.read_sql_query('''
SELECT year, total_democrat, total_republican,total_other,
CASE 
    WHEN total_other > total_democrat AND total_other > total_republican
        THEN 'other' 
        ELSE CASE 
            WHEN total_democrat > total_republican 
            THEN 'democrat'
            ELSE 'republican' END
    END AS winner
FROM (SELECT year, 
    sum(democrat_votes) AS total_democrat,
    sum(republican_votes) AS total_republican,
    sum(other_votes) AS total_other
    FROM election
    WHERE year BETWEEN 1984 AND 2016
    GROUP BY year
    ORDER BY year) s ''', conn)

Unnamed: 0,year,total_democrat,total_republican,total_other,winner
0,1984,37577352,54455472,0,republican
1,1988,41809476,48886597,0,republican
2,1992,44909806,39104550,19743821,democrat
3,1996,47400125,39198755,8085402,democrat
4,2000,51009810,50462412,2883443,democrat
5,2004,59027115,62039572,0,republican
6,2008,69499428,59950323,0,democrat
7,2012,65918507,60934407,0,democrat
8,2016,65853625,62985106,4489233,democrat


Extension of previous question: for each election from 1984 to 2016, give the party that won the popular vote and the margin (i.e. the amount that the winning party got over the party that came in second place). You can assume that the third party votes ("Other") are irrelevant, and just compare Democrats and Republicans.

In [11]:
pd.read_sql_query('''
WITH winner AS (
SELECT year, total_democrat, total_republican,
CASE 
    WHEN total_democrat > total_republican 
    THEN 'democrat'
    ELSE 'republican'
END AS winner
FROM (SELECT year, 
    sum(democrat_votes) AS total_democrat,
    sum(republican_votes) AS total_republican,
    sum(other_votes) AS total_other
    FROM election
    WHERE year BETWEEN 1984 AND 2016
    GROUP BY year
    ORDER BY year) s)
SELECT year,
abs(total_democrat - total_republican) as margin,
winner
FROM winner
    ''', conn)

Unnamed: 0,year,margin,winner
0,1984,16878120,republican
1,1988,7077121,republican
2,1992,5805256,democrat
3,1996,8201370,democrat
4,2000,547398,democrat
5,2004,3012457,republican
6,2008,9549105,democrat
7,2012,4984100,democrat
8,2016,2868519,democrat


Which states have had fewer than 3 democratic victories (i.e. fewer than 3 elections where the democrats got the majority of the votes in that state) since 1952?

In [12]:
pd.read_sql_query('''
SELECT s.state,
count(s.winner) as num_democratic_victories
FROM (SELECT state, 
        CASE WHEN democrat_votes > republican_votes 
            THEN 'democrat'
            ELSE 'republican'
            END AS winner
        FROM election) s
WHERE s.winner = 'democrat'
GROUP BY s.state
HAVING count(s.winner) < 3
ORDER BY state
''', conn)

Unnamed: 0,state,num_democratic_victories
0,AK,1
1,AZ,1
2,ID,1
3,IN,2
4,KS,1
5,MT,2
6,ND,1
7,NE,1
8,OK,1
9,SD,1


Which states have had fewer than 3 republican victories since 1952?

In [13]:
%%time 
pd.read_sql_query('''
SELECT s.state,
count(winner) as num_republican_victories
FROM (SELECT state, 
        CASE WHEN democrat_votes > republican_votes 
            THEN 'democrat'
            ELSE 'republican'
            END AS winner
        FROM election) s
WHERE s.winner != 'democrat'
GROUP BY s.state
HAVING count(s.winner) < 3
ORDER BY state
''', conn)

CPU times: user 3.73 ms, sys: 0 ns, total: 3.73 ms
Wall time: 8.46 ms


Unnamed: 0,state,num_republican_victories
0,HI,2


In [14]:
%%time 
# prev query slower and loses DC when since Republicans have never won
pd.read_sql_query('''
SELECT s.state,
SUM(CASE WHEN s.winner = 'republican' THEN 1
    ELSE 0 END) as num_republican_victories
FROM (SELECT state, 
        CASE WHEN democrat_votes > republican_votes 
            THEN 'democrat'
            ELSE 'republican'
            END AS winner
        FROM election) s

GROUP BY s.state
HAVING SUM(CASE WHEN s.winner = 'republican' THEN 1
    ELSE 0 END) < 3
ORDER BY state
''', conn)

CPU times: user 0 ns, sys: 4.17 ms, total: 4.17 ms
Wall time: 9.62 ms


Unnamed: 0,state,num_republican_victories
0,DC,0
1,HI,2


In [15]:
%%time 
# simplify into one query 
pd.read_sql_query('''
SELECT state, 
        SUM(CASE WHEN democrat_votes > republican_votes 
            THEN 0
            ELSE 1
            END) AS num_republican_victories
FROM election
GROUP BY state
HAVING SUM(CASE WHEN democrat_votes > republican_votes 
            THEN 0
            ELSE 1
            END) < 3
ORDER BY state
''', conn)

CPU times: user 0 ns, sys: 3.79 ms, total: 3.79 ms
Wall time: 7.64 ms


Unnamed: 0,state,num_republican_victories
0,DC,0
1,HI,2


We are interested in measuring the partisanship of the states. We will define a partisan state as one that is consistently won by a single party (either Democrat or Republican) since 1988. For example, since 1988 California has been won by the republicans once, and won by the democrats 7 times. Under this metric, California would be considered "partisan". (Note that if we include elections back to 1952, the republicans have won CA 9 times, and democrats have only won it 8 times).

In [16]:
# not a great example of the metric if CA counts
# but will take metric to be states where single party has won all elections since 1988

In [17]:
%%time 
pd.read_sql_query('''
SELECT 
    state,
    CASE WHEN s.num_republican_victories = 0
        THEN 'democrat'
        ELSE CASE WHEN s.num_democratic_victories = 0
            THEN 'republican'
            ELSE NULL END
        END AS winner
FROM (SELECT 
        state, 
        SUM(CASE WHEN democrat_votes > republican_votes 
            THEN 0
            ELSE 1
            END) AS num_republican_victories,
        SUM(CASE WHEN democrat_votes > republican_votes 
            THEN 1
            ELSE 0
            END) AS num_democratic_victories
    FROM election
    WHERE year >= 1988
    GROUP BY state) s
WHERE s.num_republican_victories = 0
OR s.num_democratic_victories = 0
ORDER BY winner, state

''', conn)

CPU times: user 3.29 ms, sys: 0 ns, total: 3.29 ms
Wall time: 10.6 ms


Unnamed: 0,state,winner
0,DC,democrat
1,HI,democrat
2,MA,democrat
3,MN,democrat
4,NY,democrat
5,OR,democrat
6,RI,democrat
7,WA,democrat
8,AK,republican
9,AL,republican
