In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pymc3 as pm
from scipy.stats import beta
from scipy.stats import binom
import seaborn

%matplotlib inline

Couldn't import dot_parser, loading of dot files will not be possible.


  from ._conv import register_converters as _register_converters


In [2]:
# read in data
df = pd.read_csv("../data/Voter_Reg_Ballot_Vote.csv")

In [3]:
# look at first 10 rows
df.head(10)

Unnamed: 0,COUNTY,TOTAL_REG,DEM_REG,REP_REG,OTH_REG,TOTAL_BALLOTS,DEM_BALLOTS,REP_BALLOTS,OTHER_BALLOTS,DEM_VOTES,...,OTHER_VOTE_OTHER,D_vote_D,R_vote_D,O_vote_D,D_vote_R,R_vote_R,O_vote_R,D_vote_O,R_vote_O,O_vote_O
0,Adams,273004,96987,68732,107285,201817,74775,55674,71368,96558,...,25177,74775,0,21783,0,55674,24408,0,0,25177
1,Alamosa,10047,3774,2881,3392,7189,2873,2348,1968,3189,...,954,2873,0,316,0,2348,698,0,0,954
2,Arapahoe,420997,142142,120505,158350,314424,110726,98873,104825,159885,...,37486,110726,0,49159,0,98873,18180,0,0,37486
3,Archuleta,10598,2462,4926,3210,7567,1777,3808,1982,2500,...,803,1777,0,723,0,3808,456,0,0,803
4,Baca,2826,661,1413,752,2223,543,1219,461,283,...,187,283,0,0,260,1219,274,0,0,187
5,Bent,2935,957,1050,928,2027,679,850,498,590,...,-249,590,0,0,89,850,249,0,0,249
6,Boulder,262764,112584,45139,105041,193813,90676,34357,68780,132334,...,20083,90676,0,41658,0,34357,7039,0,0,20083
7,Broomfield,49410,14905,14366,20139,39052,12337,11819,14896,19731,...,4954,12337,0,7394,0,11819,2548,0,0,4954
8,Chaffee,14715,3912,5216,5587,11688,3314,4374,4000,4888,...,1409,3314,0,1574,0,4374,1017,0,0,1409
9,Cheyenne,1417,180,898,339,1132,138,770,224,132,...,75,132,0,0,6,770,149,0,0,75


## Likelihoods 

In [4]:
# likelihoods for Dems
df['vote_D_likelihood'] = df['DEM_VOTES'] / df['TOTAL_BALLOTS']

In [5]:
# likelihoods for Reps
df['vote_R_likelihood'] = df['REP_VOTES'] / df['TOTAL_BALLOTS']

In [6]:
# likelihoods for Other
df['vote_O_likelihood'] = df['OTHER_VOTES'] / df['TOTAL_BALLOTS']

Set up priors using proportions of votes from the 2012 election

In [7]:
# prior for Reg Dem vote Dem
df['D_vote_D_prior'] = 0.2784

In [8]:
# prior for Reg Rep vote Dem
df['R_vote_D_prior'] = 0.0192

In [9]:
# prior for Reg Other vote Dem
df['O_vote_D_prior'] = 0.1848

In [10]:
# prior for Reg Dem vote Rep
df['D_vote_R_prior'] = 0.0288

In [11]:
# prior for Reg Rep vote Rep
df['R_vote_R_prior'] = 0.2112

In [12]:
# prior for Reg Other vote Rep
df['O_vote_R_prior'] = 0.1980

In [13]:
# prior for Reg Dem vote Other
df['D_vote_O_prior'] = 0.0128

In [14]:
# prior for Reg Rep vote Other
df['R_vote_O_prior'] = 0.0096

In [15]:
# prior for Reg Other vote Other
df['O_vote_O_prior'] = .0572

In [16]:
# check all priors equal 1
prior_total = df['D_vote_D_prior'] + df['R_vote_D_prior'] + df['O_vote_D_prior'] + df['D_vote_R_prior'] + df['R_vote_R_prior'] + df['O_vote_R_prior'] + df['D_vote_O_prior'] + df['R_vote_O_prior'] + df['O_vote_O_prior'] 
prior_total[:5]

0    1.0
1    1.0
2    1.0
3    1.0
4    1.0
dtype: float64

In [17]:
df.columns

Index(['COUNTY', 'TOTAL_REG', 'DEM_REG', 'REP_REG', 'OTH_REG', 'TOTAL_BALLOTS',
       'DEM_BALLOTS', 'REP_BALLOTS', 'OTHER_BALLOTS', 'DEM_VOTES', 'REP_VOTES',
       'OTHER_VOTES', 'TOTAL_REG_NO_BALLOT', 'DEM_REG_NO_BALLOT',
       'REP_REG_NO_BALLOT', 'OTHER_REG_NO_BALLOT', 'OTHER_VOTE_DEM',
       'OTHER_VOTE_REP', 'OTHER_VOTE_OTHER', 'D_vote_D', 'R_vote_D',
       'O_vote_D', 'D_vote_R', 'R_vote_R', 'O_vote_R', 'D_vote_O', 'R_vote_O',
       'O_vote_O', 'vote_D_likelihood', 'vote_R_likelihood',
       'vote_O_likelihood', 'D_vote_D_prior', 'R_vote_D_prior',
       'O_vote_D_prior', 'D_vote_R_prior', 'R_vote_R_prior', 'O_vote_R_prior',
       'D_vote_O_prior', 'R_vote_O_prior', 'O_vote_O_prior'],
      dtype='object')

In [18]:
# Marg Prob for Reg Dem
df['Marg_Prob_reg_D'] = df['DEM_REG'] / df['TOTAL_REG']

In [19]:
# Marg Prob for Reg Rep
df['Marg_Prob_reg_R'] = df['REP_REG'] / df['TOTAL_REG']

In [20]:
# Marg Prob for Reg Other
df['Marg_Prob_reg_O'] = df['OTH_REG'] / df['TOTAL_REG']

In [21]:
# Check if probablities each equal 1
a = df['Marg_Prob_reg_D'] + df['Marg_Prob_reg_R'] + df['Marg_Prob_reg_O']
a

0     1.0
1     1.0
2     1.0
3     1.0
4     1.0
5     1.0
6     1.0
7     1.0
8     1.0
9     1.0
10    1.0
11    1.0
12    1.0
13    1.0
14    1.0
15    1.0
16    1.0
17    1.0
18    1.0
19    1.0
20    1.0
21    1.0
22    1.0
23    1.0
24    1.0
25    1.0
26    1.0
27    1.0
28    1.0
29    1.0
     ... 
34    1.0
35    1.0
36    1.0
37    1.0
38    1.0
39    1.0
40    1.0
41    1.0
42    1.0
43    1.0
44    1.0
45    1.0
46    1.0
47    1.0
48    1.0
49    1.0
50    1.0
51    1.0
52    1.0
53    1.0
54    1.0
55    1.0
56    1.0
57    1.0
58    1.0
59    1.0
60    1.0
61    1.0
62    1.0
63    1.0
Length: 64, dtype: float64

In [22]:
df['D_D_posterior'] = (df['vote_D_likelihood'] * df['D_vote_D_prior']) / df['Marg_Prob_reg_D']

In [23]:
df['R_D_posterior'] = (df['vote_D_likelihood'] * df['R_vote_D_prior']) / df['Marg_Prob_reg_D']

In [24]:
df['O_D_posterior'] = (df['vote_D_likelihood'] * df['O_vote_D_prior']) / df['Marg_Prob_reg_D']

In [25]:
df['D_R_posterior'] = (df['vote_R_likelihood'] * df['D_vote_R_prior']) / df['Marg_Prob_reg_R']

In [26]:
df['R_R_posterior'] = (df['vote_R_likelihood'] * df['R_vote_R_prior']) / df['Marg_Prob_reg_R']

In [27]:
df['O_R_posterior'] = (df['vote_R_likelihood'] * df['O_vote_R_prior']) / df['Marg_Prob_reg_R']

In [28]:
df['D_O_posterior'] = (df['vote_O_likelihood'] * df['D_vote_O_prior']) / df['Marg_Prob_reg_O']

In [29]:
df['R_O_posterior'] = (df['vote_O_likelihood'] * df['R_vote_O_prior']) / df['Marg_Prob_reg_O']

In [30]:
df['O_O_posterior'] = (df['vote_O_likelihood'] * df['O_vote_O_prior']) / df['Marg_Prob_reg_O']

In [31]:
df.columns

Index(['COUNTY', 'TOTAL_REG', 'DEM_REG', 'REP_REG', 'OTH_REG', 'TOTAL_BALLOTS',
       'DEM_BALLOTS', 'REP_BALLOTS', 'OTHER_BALLOTS', 'DEM_VOTES', 'REP_VOTES',
       'OTHER_VOTES', 'TOTAL_REG_NO_BALLOT', 'DEM_REG_NO_BALLOT',
       'REP_REG_NO_BALLOT', 'OTHER_REG_NO_BALLOT', 'OTHER_VOTE_DEM',
       'OTHER_VOTE_REP', 'OTHER_VOTE_OTHER', 'D_vote_D', 'R_vote_D',
       'O_vote_D', 'D_vote_R', 'R_vote_R', 'O_vote_R', 'D_vote_O', 'R_vote_O',
       'O_vote_O', 'vote_D_likelihood', 'vote_R_likelihood',
       'vote_O_likelihood', 'D_vote_D_prior', 'R_vote_D_prior',
       'O_vote_D_prior', 'D_vote_R_prior', 'R_vote_R_prior', 'O_vote_R_prior',
       'D_vote_O_prior', 'R_vote_O_prior', 'O_vote_O_prior', 'Marg_Prob_reg_D',
       'Marg_Prob_reg_R', 'Marg_Prob_reg_O', 'D_D_posterior', 'R_D_posterior',
       'O_D_posterior', 'D_R_posterior', 'R_R_posterior', 'O_R_posterior',
       'D_O_posterior', 'R_O_posterior', 'O_O_posterior'],
      dtype='object')

In [36]:
df['New_Dem_Voters'] = (df['D_D_posterior'] + df['R_D_posterior'] + df['O_D_posterior']) * df['TOTAL_REG_NO_BALLOT']

In [37]:
df['New_Rep_Voters'] = (df['D_R_posterior'] + df['R_R_posterior'] + df['O_R_posterior']) * df['TOTAL_REG_NO_BALLOT']

In [38]:
df['New_Other_Voters'] = (df['D_O_posterior'] + df['R_O_posterior'] + df['O_O_posterior']) * df['TOTAL_REG_NO_BALLOT']

In [39]:
df.columns

Index(['COUNTY', 'TOTAL_REG', 'DEM_REG', 'REP_REG', 'OTH_REG', 'TOTAL_BALLOTS',
       'DEM_BALLOTS', 'REP_BALLOTS', 'OTHER_BALLOTS', 'DEM_VOTES', 'REP_VOTES',
       'OTHER_VOTES', 'TOTAL_REG_NO_BALLOT', 'DEM_REG_NO_BALLOT',
       'REP_REG_NO_BALLOT', 'OTHER_REG_NO_BALLOT', 'OTHER_VOTE_DEM',
       'OTHER_VOTE_REP', 'OTHER_VOTE_OTHER', 'D_vote_D', 'R_vote_D',
       'O_vote_D', 'D_vote_R', 'R_vote_R', 'O_vote_R', 'D_vote_O', 'R_vote_O',
       'O_vote_O', 'vote_D_likelihood', 'vote_R_likelihood',
       'vote_O_likelihood', 'D_vote_D_prior', 'R_vote_D_prior',
       'O_vote_D_prior', 'D_vote_R_prior', 'R_vote_R_prior', 'O_vote_R_prior',
       'D_vote_O_prior', 'R_vote_O_prior', 'O_vote_O_prior', 'Marg_Prob_reg_D',
       'Marg_Prob_reg_R', 'Marg_Prob_reg_O', 'D_D_posterior', 'R_D_posterior',
       'O_D_posterior', 'D_R_posterior', 'R_R_posterior', 'O_R_posterior',
       'D_O_posterior', 'R_O_posterior', 'O_O_posterior', 'New_Dem_Voters',
       'New_Rep_Voters', 'New_Other_Voters'

In [40]:
df['New_Dem_total'] = df['DEM_VOTES'] + df['New_Dem_Voters']
df['New_Rep_total'] = df['REP_VOTES'] + df['New_Rep_Voters']
df['New_Other_total'] = df['OTHER_VOTES'] + df['New_Other_Voters']
df['New_Grand_Total'] = df['New_Dem_total'] + df['New_Rep_total'] + df['New_Other_total']

In [41]:
Dem_Election_Outcome = df['DEM_VOTES'].sum() / df['TOTAL_BALLOTS'].sum() 
Dem_Election_Outcome

0.46411125331218345

In [42]:
new_dem_percentage = (df['DEM_VOTES'].sum() + df['New_Dem_Voters'].sum()) / df['New_Grand_Total'] .sum()
new_dem_percentage

0.4860903220063239

In [43]:
Rep_Election_Outcome = df['REP_VOTES'].sum() / df['TOTAL_BALLOTS'].sum() 
Rep_Election_Outcome

0.4168338646230385

In [44]:
new_rep_percentage = (df['REP_VOTES'].sum() + df['New_Rep_Voters'].sum()) / df['New_Grand_Total'] .sum()
new_rep_percentage

0.42569529713265736

In [45]:
df.to_csv("Model_3_NEW.csv")