##  We're now going to analyze change in turnout by precinct

In [1]:
#First set everthing up
import pyodbc
import graphlab as gl
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import datetime
import local #server information and credentials
from pullODBC import pullODBC
from pullODBCall import pullODBCall
from matplotlib.backends.backend_pdf import PdfPages
from __future__ import division # convert integer division to floating point
%matplotlib inline
plt.style.use('seaborn-white')
plt.rcParams['font.size'] = 7
plt.tight_layout;
plt.rc('xtick', labelsize=7) 
plt.rc('ytick', labelsize=7) 

First read the csv's with turnout for all three years

In [2]:
turnout2008 = pd.read_csv('C:/voter/turnout2008.csv')
turnout2012 = pd.read_csv('C:/voter/turnout2012.csv')
turnout2016 = pd.read_csv('C:/voter/turnout2016.csv')

Next, we need to eliminate all blank spaces before and after all important fields so the joins will work correctly

In [3]:
def trim(x):
    return str.strip(x)

In [4]:
turnout2008['county_desc']=turnout2008['county_desc'].apply(trim)
turnout2012['county_desc']=turnout2012['county_desc'].apply(trim)
turnout2016['county_desc']=turnout2016['county_desc'].apply(trim)
turnout2008['precinct_abbrv']=turnout2008['precinct_abbrv'].apply(trim)
turnout2012['precinct_abbrv']=turnout2012['precinct_abbrv'].apply(trim)
turnout2016['precinct_abbrv']=turnout2016['precinct_abbrv'].apply(trim)

Next, do an inner join to combine data for 2016 and 2012.  The key columns will be 'county_desc' and 'precinct_abbrv'.  Note that all joins are inner since we want only the subset of precincts that existed in 2008, 2012, and 2016

In [5]:
new_dataframe = pd.merge(turnout2016, turnout2012, how='inner', on=['county_desc', 'precinct_abbrv'])

In [6]:
print len(new_dataframe)
print len(turnout2016)
print len(turnout2012)

3012
3064
3007


We need to clean out some junk columns

In [7]:
del new_dataframe['Unnamed: 0_x']
del new_dataframe['Unnamed: 0_y']

Next, do an inner join to combine new_dataframe with data for 2008.  The key columns again will be 'county_desc' and 'precinct_abbrv'

In [8]:
new_dataframe = pd.merge(new_dataframe, turnout2008, how='inner', on=['county_desc', 'precinct_abbrv'])

We need to clean out one more junk column

In [9]:
del new_dataframe['Unnamed: 0']

Reverse the order of each 2-tuple column name so that party will be at the top

In [10]:
new_dataframe = new_dataframe.rename(columns = {"('2016', 'LIB')" : "('LIB', '2016')", 
                                                "('2016', 'REP')" : "('REP', '2016')",
                                                "('2016', 'DEM')" : "('DEM', '2016')",
                                                "('2016', 'UNA')" : "('UNA', '2016')",
                                                "('2012', 'LIB')" : "('LIB', '2012')", 
                                                "('2012', 'REP')" : "('REP', '2012')",
                                                "('2012', 'DEM')" : "('DEM', '2012')",
                                                "('2012', 'UNA')" : "('UNA', '2012')",
                                                "('2008', 'LIB')" : "('LIB', '2008')", 
                                                "('2008', 'REP')" : "('REP', '2008')",
                                                "('2008', 'DEM')" : "('DEM', '2008')",
                                                "('2008', 'UNA')" : "('UNA', '2008')"})


Create two new fields for each party, each containing the turnout ratio of 2016 to 2008 and 2012

In [11]:
new_dataframe["('DEM', 'COMP 2008')"] = new_dataframe["('DEM', '2016')"]/new_dataframe["('DEM', '2008')"]
new_dataframe["('DEM', 'COMP 2012')"] = new_dataframe["('DEM', '2016')"]/new_dataframe["('DEM', '2012')"]
new_dataframe["('REP', 'COMP 2008')"] = new_dataframe["('REP', '2016')"]/new_dataframe["('REP', '2008')"]
new_dataframe["('REP', 'COMP 2012')"] = new_dataframe["('REP', '2016')"]/new_dataframe["('REP', '2012')"]
new_dataframe["('UNA', 'COMP 2008')"] = new_dataframe["('UNA', '2016')"]/new_dataframe["('UNA', '2008')"]
new_dataframe["('UNA', 'COMP 2012')"] = new_dataframe["('UNA', '2016')"]/new_dataframe["('UNA', '2012')"]
new_dataframe["('LIB', 'COMP 2008')"] = new_dataframe["('LIB', '2016')"]/new_dataframe["('LIB', '2008')"]
new_dataframe["('LIB', 'COMP 2012')"] = new_dataframe["('LIB', '2016')"]/new_dataframe["('LIB', '2012')"]


Next, let's format the dataframe by grouping by county and precinct abbrv

In [12]:
new_dataframe = new_dataframe.groupby(['county_desc', 'precinct_abbrv'])["('REP', 'COMP 2012')", "('REP', 'COMP 2008')", "('REP', '2016')", "('REP', '2012')", "('REP', '2008')",
                                                        "('DEM', 'COMP 2012')", "('DEM', 'COMP 2008')","('DEM', '2016')", "('DEM', '2012')", "('DEM', '2008')",
                                                        "('UNA', 'COMP 2012')", "('UNA', 'COMP 2008')", "('UNA', '2016')", "('UNA', '2012')", "('UNA', '2008')",
                                                        "('LIB', 'COMP 2012')", "('LIB', 'COMP 2008')", "('LIB', '2016')", "('LIB', '2012')", "('LIB', '2008')"].mean()

Convert the column names from strings back to 2-tuples

In [13]:
new_dataframe.columns = [eval(t) for t in new_dataframe.columns]

To make the report more readable, delete liberatarian columns

In [14]:
del new_dataframe[('LIB', 'COMP 2012')]
del new_dataframe[('LIB', 'COMP 2008')]
del new_dataframe[('LIB', '2016')]
del new_dataframe[('LIB', '2012')]
del new_dataframe[('LIB', '2008')]

Next, rearrange the columns to have a Party be the top grouping, and the years as the bottom grouping

In [15]:
new_dataframe.columns = pd.MultiIndex.from_tuples(new_dataframe.columns, names=['Party','Year'])

In [16]:
pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 2000)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)
pd.options.display.float_format = '{:.1%}'.format #display as percentages
by_republican = new_dataframe.sort_values(('REP', 'COMP 2012'), axis=0, ascending=False, inplace=False, kind='quicksort', na_position='last')
%store by_republican > turnout_improvement_sorted_republican.txt
by_democrat = new_dataframe.sort_values(('DEM', 'COMP 2012'), axis=0, ascending=False, inplace=False, kind='quicksort', na_position='last')
%store by_democrat > turnout_improvement_sorted_democrat.txt
by_unaffiliated = new_dataframe.sort_values(('UNA', 'COMP 2012'), axis=0, ascending=False, inplace=False, kind='quicksort', na_position='last')
%store by_unaffiliated > turnout_improvement_sorted_unaffiliated.txt

height has been deprecated.

Writing 'by_republican' (DataFrame) to file 'turnout_improvement_sorted_republican.txt'.
Writing 'by_democrat' (DataFrame) to file 'turnout_improvement_sorted_democrat.txt'.
Writing 'by_unaffiliated' (DataFrame) to file 'turnout_improvement_sorted_unaffiliated.txt'.
