In [3]:
import pandas as pd
from pathlib import Path
import plotly.graph_objects as go
import plotly.express as px

In [4]:
# Direct Democracy Study

data_path = Path("../data/anes_specialstudy_2012_directdem_dta/anes_specialstudy_2012_directdem_stata12.dta")
data = pd.read_stata(data_path)

In [5]:

data.head(1000)

Unnamed: 0,dd_version,dd_caseid,caseid,weight_pre,weight_post,main_anes,sample,dd_pid_x,dd_rvote2012_x,dd_presvote2012_x,...,post_satisf_timing,post_numballot_timing,post_happy_timing,post_govsay_timing,post_outcome_timing,post_ddpref_timing,post_flawed_timing,post_ddfavor_timing,post_disp_close_timing,post_qf1_timing
0,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",20001.0,3001,1.4708,1.5180,1. Main ANES survey response,1. Main ANES,2. Not very strong Democrat,1. Voted,1. Barack Obama (Democrat),...,20,17,17,20,23,59,21,26,13,30
1,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",20002.0,3004,0.2901,0.0000,1. Main ANES survey response,1. Main ANES,6. Not very strong Republican,-2. Missing,-6. Unit nonresponse,...,-2. Missing,-2. Missing,-2. Missing,-2. Missing,-2. Missing,-2. Missing,-2. Missing,-2. Missing,-2. Missing,-2. Missing
2,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",20003.0,3005,1.6573,1.8081,1. Main ANES survey response,1. Main ANES,2. Not very strong Democrat,1. Voted,1. Barack Obama (Democrat),...,9,9,9,11,17,21,8,10,5,7
3,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",20004.0,3009,1.3252,0.9914,1. Main ANES survey response,1. Main ANES,2. Not very strong Democrat,1. Voted,1. Barack Obama (Democrat),...,7,6,5,6,4,5,7,9,18,2
4,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",20005.0,3016,1.2193,1.2796,1. Main ANES survey response,1. Main ANES,6. Not very strong Republican,1. Voted,2. Mitt Romney (Republican),...,10,6,7,9,14,14,11,8,14,4
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
995,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",21001.0,5748,5.1631,4.9866,1. Main ANES survey response,1. Main ANES,"5. Ind, closer to Republican",1. Voted,2. Mitt Romney (Republican),...,21,15,12,16,17,29,12,15,3,363
996,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",21002.0,5749,4.1436,3.6616,1. Main ANES survey response,1. Main ANES,1. Strong Democrat,1. Voted,1. Barack Obama (Democrat),...,8,8,11,12,8,14,15,7,3,4
997,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",21003.0,5751,5.1631,4.2684,1. Main ANES survey response,1. Main ANES,2. Not very strong Democrat,1. Voted,1. Barack Obama (Democrat),...,8,4,8,6,8,17,9,9,21,3
998,"FINAL RELEASE, ANES 2012 Direct Democracy Stud...",21005.0,5754,0.3416,0.3896,1. Main ANES survey response,1. Main ANES,6. Not very strong Republican,1. Voted,-2. Missing,...,12,7,7,12,14,25,8,10,6,5


In [6]:
data.shape

(5415, 1037)

19520001.0

Can we get an idea of where the participants reside? 

In [7]:
location_spread = data.groupby(by=["pre_ppstaten"]).count()
print(location_spread["caseid"])
print("Total Count: " + str(location_spread["caseid"].sum()))

print(list(location_spread.index))

pre_ppstaten
14. MA    392
31. OH    472
34. MI    423
43. MO    391
44. ND    317
45. SD    334
59. FL    551
71. AR    275
84. CO    368
86. AZ    390
91. WA    407
92. OR    375
93. CA    720
Name: caseid, dtype: int64
Total Count: 5415
['14. MA', '31. OH', '34. MI', '43. MO', '44. ND', '45. SD', '59. FL', '71. AR', '84. CO', '86. AZ', '91. WA', '92. OR', '93. CA']


In [8]:
# Let's examine a map to get a more intuitive understanding

# First let's fix the index, so that the categories are the state abbreviations 
categories = list(location_spread.index)
for i in range(0,len(categories)):
    temp = categories[i].split(" ")
    categories[i] = temp[1]

# Figure code from plotly documentation: https://plotly.com/python/choropleth-maps/
fig = go.Figure(data=go.Choropleth(
    locations=categories,
    z = location_spread["caseid"].astype(float),
    locationmode = 'USA-states',
    colorscale = 'Blues',
    colorbar_title = "Participation in Survey",
))

fig.update_layout(
    title_text = 'Participation in 2021 Direct Democracy Survey by State',
    geo_scope='usa', # limit map scope to USA
)

fig.show()

It appears that we have a decent spread of participants in blue states, red states, and swing states. Can we take a closer look at the balance of party affiliation we have?

In [1]:
party_spread = data.groupby(by=["pre_rptyid"]).count()
print(party_spread["caseid"])

fig = px.pie(party_spread, values='caseid', names=party_spread.index, title='Party Affiliation')
fig.show()

NameError: name 'data' is not defined

We have a nearly three way tie between Democrats, Republicans, and independents. This is surprising, considering that the number of US voters who actually vote for independent candidates is much lower than one third. We can see this with examing how these participants voted. 