In [1]:
from __future__ import print_function, division

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns

from IPython.display import display

import thinkstats2_mod
import thinkplot

import statsmodels.formula.api as smf

In [2]:
dct = thinkstats2_mod.ReadStataDct('GSS/GSS.dct')
df = dct.ReadFixedWidth('GSS/GSS.dat')
print('Total number of respondents: %i' %(df.shape[0] + 1))

Total number of respondents: 62467


In [3]:
# Filter out years without relevant data
df = df[df.year >= 1974]
print('Number of respondents left: %i' %(df.shape[0] + 1))

# Filter out invalid polviews
df = df[df.polviews <= 7]
df = df[df.polviews > 0]

# Center polviews likert scale around 0
df['polviews'] = np.subtract(df['polviews'], 4)
print('Number of respondents left: %i' %(df.shape[0] + 1))

# Filter out invalid age respondants
df = df[df.age <= 89]
print('Number of respondents left: %i' %(df.shape[0] + 1))

# Filter out marital status no answers
df = df[df.marital != 9]
print('Number of respondents left: %i' %(df.shape[0] + 1))

# Filter out highest education degree invalid
df = df[df.degree <= 4]

# Recode religion to make coding more consistent through GSS years
df['relig_recode'] = df.relig
#recode Christians who are not Catholic as '1' (replacing Protestant code)
df.relig_recode.replace([10, 11, 13], 1, inplace=True)
#recode Buddhism, Hinduism, Other eastern, Moslem/islam, Native american, Inter-nondenominational
df.relig_recode.replace([6, 7, 8, 9, 12], 5, inplace=True)
#remove don't know/don't answer respondants
df = df[df.relig_recode <= 5]

# For more information about SEI scores http://gss.norc.org/Documents/reports/methodological-reports/MR074.pdf

df_control = df.copy(deep=True)

Number of respondents left: 59350
Number of respondents left: 53082
Number of respondents left: 52916
Number of respondents left: 52905


In [4]:
formula = 'df_control.polviews ~ df_control.marital'
model = smf.ols(formula, data=df_control)
results = model.fit()
params_marital = results.params
results.summary()

0,1,2,3
Dep. Variable:,df_control.polviews,R-squared:,0.02
Model:,OLS,Adj. R-squared:,0.02
Method:,Least Squares,F-statistic:,1089.0
Date:,"Mon, 01 May 2017",Prob (F-statistic):,1.89e-236
Time:,23:49:34,Log-Likelihood:,-91036.0
No. Observations:,52664,AIC:,182100.0
Df Residuals:,52662,BIC:,182100.0
Df Model:,1,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3831,0.010,36.929,0.000,0.363,0.403
df_control.marital,-0.1208,0.004,-33.004,0.000,-0.128,-0.114

0,1,2,3
Omnibus:,380.059,Durbin-Watson:,1.912
Prob(Omnibus):,0.0,Jarque-Bera (JB):,276.011
Skew:,-0.069,Prob(JB):,1.16e-60
Kurtosis:,2.673,Cond. No.,5.38


In [5]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age'
model = smf.ols(formula, data=df_control)
results = model.fit()
params_age = results.params
results.summary()

0,1,2,3
Dep. Variable:,df_control.polviews,R-squared:,0.026
Model:,OLS,Adj. R-squared:,0.026
Method:,Least Squares,F-statistic:,691.4
Date:,"Mon, 01 May 2017",Prob (F-statistic):,4.18e-297
Time:,23:49:34,Log-Likelihood:,-90892.0
No. Observations:,52664,AIC:,181800.0
Df Residuals:,52661,BIC:,181800.0
Df Model:,2,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.0564,0.022,2.577,0.010,0.013,0.099
df_control.marital,-0.1002,0.004,-26.038,0.000,-0.108,-0.093
df_control.age,0.0061,0.000,16.957,0.000,0.005,0.007

0,1,2,3
Omnibus:,367.456,Durbin-Watson:,1.913
Prob(Omnibus):,0.0,Jarque-Bera (JB):,273.972
Skew:,-0.078,Prob(JB):,3.22e-60
Kurtosis:,2.683,Cond. No.,183.0


In [6]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age + df_control.sex'
model = smf.ols(formula, data=df_control)
results = model.fit()
params_sex = results.params
results.summary()

0,1,2,3
Dep. Variable:,df_control.polviews,R-squared:,0.027
Model:,OLS,Adj. R-squared:,0.027
Method:,Least Squares,F-statistic:,482.9
Date:,"Mon, 01 May 2017",Prob (F-statistic):,1.43e-309
Time:,23:49:35,Log-Likelihood:,-90860.0
No. Observations:,52664,AIC:,181700.0
Df Residuals:,52660,BIC:,181800.0
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.2008,0.028,7.090,0.000,0.145,0.256
df_control.marital,-0.1003,0.004,-26.078,0.000,-0.108,-0.093
df_control.age,0.0062,0.000,17.207,0.000,0.005,0.007
df_control.sex,-0.0955,0.012,-8.017,0.000,-0.119,-0.072

0,1,2,3
Omnibus:,360.36,Durbin-Watson:,1.913
Prob(Omnibus):,0.0,Jarque-Bera (JB):,272.322
Skew:,-0.083,Prob(JB):,7.35e-60
Kurtosis:,2.689,Cond. No.,245.0


In [7]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age + df_control.sex + df_control.race'
model = smf.ols(formula, data=df_control)
results = model.fit()
params_race = results.params
results.summary()

0,1,2,3
Dep. Variable:,df_control.polviews,R-squared:,0.029
Model:,OLS,Adj. R-squared:,0.029
Method:,Least Squares,F-statistic:,398.5
Date:,"Mon, 01 May 2017",Prob (F-statistic):,0.0
Time:,23:49:35,Log-Likelihood:,-90789.0
No. Observations:,52664,AIC:,181600.0
Df Residuals:,52659,BIC:,181600.0
Df Model:,4,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.3607,0.031,11.517,0.000,0.299,0.422
df_control.marital,-0.0946,0.004,-24.454,0.000,-0.102,-0.087
df_control.age,0.0058,0.000,16.308,0.000,0.005,0.007
df_control.sex,-0.0917,0.012,-7.709,0.000,-0.115,-0.068
df_control.race,-0.1324,0.011,-11.893,0.000,-0.154,-0.111

0,1,2,3
Omnibus:,336.458,Durbin-Watson:,1.916
Prob(Omnibus):,0.0,Jarque-Bera (JB):,254.752
Skew:,-0.077,Prob(JB):,4.8e-56
Kurtosis:,2.696,Cond. No.,270.0


In [8]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age + df_control.sex + df_control.race + df_control.degree'
model = smf.ols(formula, data=df_control)
results = model.fit()
params_degree = results.params
results.summary()

0,1,2,3
Dep. Variable:,df_control.polviews,R-squared:,0.031
Model:,OLS,Adj. R-squared:,0.031
Method:,Least Squares,F-statistic:,342.2
Date:,"Mon, 01 May 2017",Prob (F-statistic):,0.0
Time:,23:49:35,Log-Likelihood:,-90733.0
No. Observations:,52664,AIC:,181500.0
Df Residuals:,52658,BIC:,181500.0
Df Model:,5,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4710,0.033,14.292,0.000,0.406,0.536
df_control.marital,-0.0956,0.004,-24.722,0.000,-0.103,-0.088
df_control.age,0.0055,0.000,15.326,0.000,0.005,0.006
df_control.sex,-0.0966,0.012,-8.117,0.000,-0.120,-0.073
df_control.race,-0.1404,0.011,-12.594,0.000,-0.162,-0.119
df_control.degree,-0.0539,0.005,-10.658,0.000,-0.064,-0.044

0,1,2,3
Omnibus:,350.547,Durbin-Watson:,1.919
Prob(Omnibus):,0.0,Jarque-Bera (JB):,257.507
Skew:,-0.066,Prob(JB):,1.21e-56
Kurtosis:,2.684,Cond. No.,284.0


In [9]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age + df_control.sex + df_control.race + df_control.degree + df_control.sei10'
model = smf.ols(formula, data=df_control)
results = model.fit()
params_sei = results.params
results.summary()

0,1,2,3
Dep. Variable:,df_control.polviews,R-squared:,0.032
Model:,OLS,Adj. R-squared:,0.031
Method:,Least Squares,F-statistic:,285.5
Date:,"Mon, 01 May 2017",Prob (F-statistic):,0.0
Time:,23:49:35,Log-Likelihood:,-90732.0
No. Observations:,52664,AIC:,181500.0
Df Residuals:,52657,BIC:,181500.0
Df Model:,6,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4568,0.034,13.255,0.000,0.389,0.524
df_control.marital,-0.0952,0.004,-24.555,0.000,-0.103,-0.088
df_control.age,0.0055,0.000,15.104,0.000,0.005,0.006
df_control.sex,-0.0944,0.012,-7.874,0.000,-0.118,-0.071
df_control.race,-0.1395,0.011,-12.497,0.000,-0.161,-0.118
df_control.degree,-0.0590,0.006,-9.472,0.000,-0.071,-0.047
df_control.sei10,0.0004,0.000,1.410,0.159,-0.000,0.001

0,1,2,3
Omnibus:,349.388,Durbin-Watson:,1.919
Prob(Omnibus):,0.0,Jarque-Bera (JB):,256.745
Skew:,-0.066,Prob(JB):,1.77e-56
Kurtosis:,2.684,Cond. No.,397.0


In [10]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age + df_control.sex + df_control.race + df_control.degree + df_control.sei10 + df_control.coninc'
model = smf.ols(formula, data=df_control)
results = model.fit()
params_standard = results.params
results.summary()

0,1,2,3
Dep. Variable:,df_control.polviews,R-squared:,0.032
Model:,OLS,Adj. R-squared:,0.032
Method:,Least Squares,F-statistic:,247.0
Date:,"Mon, 01 May 2017",Prob (F-statistic):,0.0
Time:,23:49:35,Log-Likelihood:,-90724.0
No. Observations:,52664,AIC:,181500.0
Df Residuals:,52656,BIC:,181500.0
Df Model:,7,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.4147,0.036,11.494,0.000,0.344,0.485
df_control.marital,-0.0909,0.004,-22.581,0.000,-0.099,-0.083
df_control.age,0.0057,0.000,15.520,0.000,0.005,0.006
df_control.sex,-0.0902,0.012,-7.496,0.000,-0.114,-0.067
df_control.race,-0.1373,0.011,-12.283,0.000,-0.159,-0.115
df_control.degree,-0.0650,0.006,-10.134,0.000,-0.078,-0.052
df_control.sei10,0.0002,0.000,0.779,0.436,-0.000,0.001
df_control.coninc,7.132e-07,1.82e-07,3.929,0.000,3.57e-07,1.07e-06

0,1,2,3
Omnibus:,344.41,Durbin-Watson:,1.918
Prob(Omnibus):,0.0,Jarque-Bera (JB):,253.547
Skew:,-0.065,Prob(JB):,8.77e-56
Kurtosis:,2.686,Cond. No.,351000.0


In [11]:
formula = 'df_control.polviews ~ df_control.marital + df_control.age + df_control.sex + df_control.race + df_control.degree + df_control.sei10 + df_control.coninc + df_control.relig_recode'
model = smf.ols(formula, data=df_control)
results = model.fit()
params_all = results.params
results.summary()

0,1,2,3
Dep. Variable:,df_control.polviews,R-squared:,0.06
Model:,OLS,Adj. R-squared:,0.06
Method:,Least Squares,F-statistic:,417.9
Date:,"Mon, 01 May 2017",Prob (F-statistic):,0.0
Time:,23:49:35,Log-Likelihood:,-89954.0
No. Observations:,52664,AIC:,179900.0
Df Residuals:,52655,BIC:,180000.0
Df Model:,8,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5,6
,coef,std err,t,P>|t|,[0.025,0.975]
Intercept,0.8018,0.037,21.739,0.000,0.730,0.874
df_control.marital,-0.0727,0.004,-18.188,0.000,-0.080,-0.065
df_control.age,0.0045,0.000,12.324,0.000,0.004,0.005
df_control.sex,-0.1296,0.012,-10.889,0.000,-0.153,-0.106
df_control.race,-0.1240,0.011,-11.247,0.000,-0.146,-0.102
df_control.degree,-0.0455,0.006,-7.180,0.000,-0.058,-0.033
df_control.sei10,0.0004,0.000,1.184,0.237,-0.000,0.001
df_control.coninc,9.169e-07,1.79e-07,5.123,0.000,5.66e-07,1.27e-06
df_control.relig_recode,-0.2126,0.005,-39.535,0.000,-0.223,-0.202

0,1,2,3
Omnibus:,254.988,Durbin-Watson:,1.933
Prob(Omnibus):,0.0,Jarque-Bera (JB):,193.498
Skew:,-0.051,Prob(JB):,9.6e-43
Kurtosis:,2.721,Cond. No.,363000.0


In [12]:
display(params_marital)
display(params_standard)
display(params_all)

Intercept             0.383106
df_control.marital   -0.120832
dtype: float64

Intercept             4.147208e-01
df_control.marital   -9.093301e-02
df_control.age        5.669449e-03
df_control.sex       -9.024597e-02
df_control.race      -1.372915e-01
df_control.degree    -6.498396e-02
df_control.sei10      2.442175e-04
df_control.coninc     7.132458e-07
dtype: float64

Intercept                  8.017968e-01
df_control.marital        -7.266656e-02
df_control.age             4.452614e-03
df_control.sex            -1.296448e-01
df_control.race           -1.239546e-01
df_control.degree         -4.551156e-02
df_control.sei10           3.654536e-04
df_control.coninc          9.169204e-07
df_control.relig_recode   -2.125745e-01
dtype: float64

In [13]:
def effect_change_calc(array1, array2):
    effect1 = array1.loc['df_control.marital']
    effect2 = array2.loc['df_control.marital']
    return np.subtract(np.divide(effect1, effect2), 1)

effect_change_standard = effect_change_calc(params_standard, params_marital)
effect_change_all = effect_change_calc(params_all, params_marital)

display(effect_change_standard)
display(effect_change_all)

-0.24744282318620614

-0.3986150956339396