In [1]:
import pandas as pd
import statsmodels.api as sm

In [2]:
# Make dataset implied in the book on page 304 (Table 9.1)
data = pd.DataFrame(data={'reply':[1]*52+[0]*48+[1]*29+[0]*(100-29)+[1]*37+[0]*(100-37)+[1]*34+[0]*(100-34), 
                          'person':["Colin"]*200 + ["Jose"]*200,
                          'grammar': ["Good"]*100 + ["Bad"]*100 + ["Good"]*100 + ["Bad"]*100})

# Check to see if the dataset was created correctly
pd.crosstab(data.person, 
            data.grammar, 
            values=data.reply, 
            aggfunc=lambda x: x.sum(), 
            colnames=["Grammar"], 
            rownames=["Person"])

Grammar,Bad,Good
Person,Unnamed: 1_level_1,Unnamed: 2_level_1
Colin,29,52
Jose,34,37


In [3]:
# Make a dataset for the saturated model example
saturated_model = pd.DataFrame()
saturated_model["CG"] = ((data.person == "Colin") & (data.grammar == "Bad")).astype(int)
saturated_model["CB"] = ((data.person == "Colin") & (data.grammar == "Good")).astype(int)
saturated_model["JG"] = ((data.person == "Jose")  & (data.grammar == "Bad")).astype(int)
saturated_model["JB"] = ((data.person == "Jose")  & (data.grammar == "Good")).astype(int)

# Run regression
est = sm.OLS(data.reply, saturated_model)
est = est.fit()
est.summary()

0,1,2,3
Dep. Variable:,reply,R-squared:,0.031
Model:,OLS,Adj. R-squared:,0.024
Method:,Least Squares,F-statistic:,4.251
Date:,"Fri, 24 Jun 2016",Prob (F-statistic):,0.00567
Time:,19:09:21,Log-Likelihood:,-272.11
No. Observations:,400,AIC:,552.2
Df Residuals:,396,BIC:,568.2
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
CG,0.2900,0.048,6.040,0.000,0.196 0.384
CB,0.5200,0.048,10.830,0.000,0.426 0.614
JG,0.3400,0.048,7.081,0.000,0.246 0.434
JB,0.3700,0.048,7.706,0.000,0.276 0.464

0,1,2,3
Omnibus:,14.374,Durbin-Watson:,0.081
Prob(Omnibus):,0.001,Jarque-Bera (JB):,59.745
Skew:,0.479,Prob(JB):,1.06e-13
Kurtosis:,1.367,Cond. No.,1.0


In [4]:
# Make a dataset for the interaction model example
interaction_model = pd.DataFrame()
interaction_model["J"] = (data.person == "Jose")*1
interaction_model["G"] = (data.grammar == "Good")*1
interaction_model["JG"] = interaction_model.J * interaction_model.G

# Run regression
est = sm.OLS(data.reply, sm.add_constant(interaction_model))
est = est.fit()
est.summary()

0,1,2,3
Dep. Variable:,reply,R-squared:,0.031
Model:,OLS,Adj. R-squared:,0.024
Method:,Least Squares,F-statistic:,4.251
Date:,"Fri, 24 Jun 2016",Prob (F-statistic):,0.00567
Time:,19:09:21,Log-Likelihood:,-272.11
No. Observations:,400,AIC:,552.2
Df Residuals:,396,BIC:,568.2
Df Model:,3,,
Covariance Type:,nonrobust,,

0,1,2,3,4,5
,coef,std err,t,P>|t|,[95.0% Conf. Int.]
const,0.2900,0.048,6.040,0.000,0.196 0.384
J,0.0500,0.068,0.736,0.462,-0.083 0.183
G,0.2300,0.068,3.387,0.001,0.097 0.363
JG,-0.2000,0.096,-2.083,0.038,-0.389 -0.011

0,1,2,3
Omnibus:,14.374,Durbin-Watson:,0.081
Prob(Omnibus):,0.001,Jarque-Bera (JB):,59.745
Skew:,0.479,Prob(JB):,1.06e-13
Kurtosis:,1.367,Cond. No.,6.85
