# Is being married a good predictor of your political views?

insert nice introduction here

First, let's import some data and necessary packages.

In [1]:
from __future__ import print_function, division

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import display

import thinkstats2_mod
import thinkplot

import statsmodels.formula.api as smf

In [2]:
dct = thinkstats2_mod.ReadStataDct('GSS/GSS.dct')
df = dct.ReadFixedWidth('GSS/GSS.dat')
print('Total number of respondents: %i' %(df.shape[0] + 1))

Total number of respondents: 62467


Now, let's select for years where we have data while filtering out respondents with "invalid" responses (e.g. no response, refuse to answer, don't know).

In [3]:
# Filter out years without relevant data
df = df[df.year >= 1974]
print('Number of respondents left: %i' %(df.shape[0] + 1))

# Filter out invalid polviews
df = df[df.polviews <= 7]
print('Number of respondents left: %i' %(df.shape[0] + 1))

# Filter out invalid age respondants
df = df[df.age <= 89]
print('Number of respondents left: %i' %(df.shape[0] + 1))

# Filter out marital status no answers
df = df[df.marital != 9]
print('Number of respondents left: %i' %(df.shape[0] + 1))

df_control = df.copy(deep=True)

Number of respondents left: 59350
Number of respondents left: 56742
Number of respondents left: 56563
Number of respondents left: 56552


To begin investigating whether or not marital status has an effect on political views, let's run an initial multinomial regression with political views as the dependent variable and marital status as the explanatory variable.

In [4]:
formula = 'df_control.polviews ~ df_control.marital'
model = smf.mnlogit(formula, data=df_control)
results = model.fit()
params_marital = results.params
results.summary()

Optimization terminated successfully.
         Current function value: 1.793065
         Iterations 6


0,1,2,3
Dep. Variable:,df_control.polviews,No. Observations:,56551.0
Model:,MNLogit,Df Residuals:,56537.0
Method:,MLE,Df Model:,7.0
Date:,"Thu, 20 Apr 2017",Pseudo R-squ.:,0.005579
Time:,00:43:53,Log-Likelihood:,-101400.0
converged:,True,LL-Null:,-101970.0
,,LLR p-value:,1.939e-241

df_control.polviews=1,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.4374,0.058,-24.898,0.000,-1.551,-1.324
df_control.marital,0.2225,0.018,12.489,0.000,0.188,0.257
df_control.polviews=2,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.1627,0.037,4.346,0.000,0.089,0.236
df_control.marital,0.1473,0.013,11.672,0.000,0.123,0.172
df_control.polviews=3,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.4396,0.036,12.113,0.000,0.368,0.511
df_control.marital,0.0724,0.013,5.764,0.000,0.048,0.097
df_control.polviews=4,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,1.7342,0.031,55.252,0.000,1.673,1.796


To control for age's effect on political views with respect to marital status, let's run the same regression, but with age as an added explanatory variable.

In [5]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age'
model = smf.mnlogit(formula, data=df_control)
results = model.fit()
params_age = results.params
results.summary()

Optimization terminated successfully.
         Current function value: 1.788698
         Iterations 6


0,1,2,3
Dep. Variable:,df_control.polviews,No. Observations:,56551.0
Model:,MNLogit,Df Residuals:,56530.0
Method:,MLE,Df Model:,14.0
Date:,"Thu, 20 Apr 2017",Pseudo R-squ.:,0.008001
Time:,00:43:54,Log-Likelihood:,-101150.0
converged:,True,LL-Null:,-101970.0
,,LLR p-value:,0.0

df_control.polviews=1,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.5607,0.118,-13.225,0.000,-1.792,-1.329
df_control.marital,0.2305,0.019,12.004,0.000,0.193,0.268
df_control.age,0.0023,0.002,1.218,0.223,-0.001,0.006
df_control.polviews=2,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.3322,0.078,4.257,0.000,0.179,0.485
df_control.marital,0.1350,0.013,10.130,0.000,0.109,0.161
df_control.age,-0.0031,0.001,-2.418,0.016,-0.006,-0.001
df_control.polviews=3,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.8758,0.076,11.529,0.000,0.727,1.025
df_control.marital,0.0435,0.013,3.302,0.001,0.018,0.069


Let's also control for gender.

In [6]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age + df_control.sex'
model = smf.mnlogit(formula, data=df_control)
results = model.fit()
params_sex = results.params
results.summary()

Optimization terminated successfully.
         Current function value: 1.786516
         Iterations 6


0,1,2,3
Dep. Variable:,df_control.polviews,No. Observations:,56551.0
Model:,MNLogit,Df Residuals:,56523.0
Method:,MLE,Df Model:,21.0
Date:,"Thu, 20 Apr 2017",Pseudo R-squ.:,0.009212
Time:,00:43:55,Log-Likelihood:,-101030.0
converged:,True,LL-Null:,-101970.0
,,LLR p-value:,0.0

df_control.polviews=1,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-1.1247,0.151,-7.463,0.000,-1.420,-0.829
df_control.marital,0.2291,0.019,11.919,0.000,0.191,0.267
df_control.age,0.0025,0.002,1.357,0.175,-0.001,0.006
df_control.sex,-0.2823,0.061,-4.604,0.000,-0.403,-0.162
df_control.polviews=2,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,0.6196,0.102,6.048,0.000,0.419,0.820
df_control.marital,0.1345,0.013,10.072,0.000,0.108,0.161
df_control.age,-0.0030,0.001,-2.304,0.021,-0.005,-0.000
df_control.sex,-0.1848,0.043,-4.342,0.000,-0.268,-0.101
df_control.polviews=3,coef,std err,z,P>|z|,[0.025,0.975]


In [7]:
display(params_marital)
display(params_age)
display(params_sex)

Unnamed: 0,0,1,2,3,4,5,6
Intercept,-1.437356,0.162729,0.439606,1.734235,0.971281,1.051896,-0.563444
df_control.marital,0.22246,0.147319,0.072377,-0.004803,-0.061511,-0.133012,-0.084689


Unnamed: 0,0,1,2,3,4,5,6
Intercept,-1.560726,0.332161,0.875836,1.611877,0.978313,0.485752,-1.434335
df_control.marital,0.230549,0.135003,0.04347,0.002747,-0.061623,-0.102402,-0.035685
df_control.age,0.00228,-0.003115,-0.008308,0.002289,-0.000148,0.010527,0.015857


Unnamed: 0,0,1,2,3,4,5,6
Intercept,-1.124699,0.619605,1.195343,1.772563,1.586993,1.052248,-0.764788
df_control.marital,0.229101,0.134468,0.042761,0.002271,-0.062721,-0.102446,-0.03539
df_control.age,0.002542,-0.002964,-0.008132,0.002367,0.000173,0.010863,0.016329
df_control.sex,-0.282324,-0.184803,-0.205783,-0.102193,-0.397903,-0.371956,-0.445073


In [8]:
def effect_change_calc(array1, array2):
    return np.subtract(np.absolute(np.divide(array1, array2)), 1)

effect_change_age = effect_change_calc(params_age.loc["df_control.marital"], params_marital.loc["df_control.marital"])
effect_change_sex = effect_change_calc(params_sex.loc["df_control.marital"], params_age.loc["df_control.marital"])
effect_change_agesex = effect_change_calc(params_sex.loc["df_control.marital"], params_marital.loc["df_control.marital"])

display(effect_change_age)
display(effect_change_sex)
display(effect_change_agesex)

0    0.036360
1   -0.083607
2   -0.399400
3   -0.428118
4    0.001829
5   -0.230129
6   -0.578633
Name: df_control.marital, dtype: float64

0   -0.006281
1   -0.003963
2   -0.016306
3   -0.173345
4    0.017816
5    0.000432
6   -0.008266
Name: df_control.marital, dtype: float64

0    0.029851
1   -0.087238
2   -0.409193
3   -0.527251
4    0.019678
5   -0.229796
6   -0.582116
Name: df_control.marital, dtype: float64