In [1]:
import pandas as pd
import statsmodels.api as sm

from patsy import dmatrices

In [2]:
df = pd.read_csv('student-mat.csv')
df = df[['G1', 'failures', 'schoolsup', 'famsup', 'studytime', 'goout', 'sex']]
df = pd.get_dummies(df)
df = df[['G1', 'failures', 'schoolsup_yes', 'famsup_yes', 'studytime', 'goout', 'sex_F']]
df = df.rename(columns={'schoolsup_yes':'schoolsup', 'famsup_yes':'famsup', 'sex_F':'sex'})

In [3]:
df.head()

Unnamed: 0,G1,failures,schoolsup,famsup,studytime,goout,sex
0,5,0,True,False,2,4,True
1,5,0,False,True,2,3,True
2,7,3,True,False,2,2,True
3,15,0,False,True,3,2,True
4,6,0,False,True,2,2,True


In [4]:
reg_exp = 'G1 ~ failures + schoolsup + famsup + studytime + goout + sex + I(failures * sex)'

In [5]:
y_train, X_train = dmatrices(reg_exp, df, return_type='dataframe')

In [6]:
X_train.head()

Unnamed: 0,Intercept,schoolsup[T.True],famsup[T.True],sex[T.True],failures,studytime,goout,I(failures * sex)
0,1.0,1.0,0.0,1.0,0.0,2.0,4.0,0.0
1,1.0,0.0,1.0,1.0,0.0,2.0,3.0,0.0
2,1.0,1.0,0.0,1.0,3.0,2.0,2.0,3.0
3,1.0,0.0,1.0,1.0,0.0,3.0,2.0,0.0
4,1.0,0.0,1.0,1.0,0.0,2.0,2.0,0.0


Note that Patsy has added a placeholder column in $X$ for the intercept $\beta_0$, and it has added the column containing the interaction term ``failures * sex``.

In [7]:
model = sm.OLS(y_train, X_train)
model_results = model.fit()

In [8]:
model_results.summary2()

0,1,2,3
Model:,OLS,Adj. R-squared:,0.21
Dependent Variable:,G1,AIC:,1983.5698
Date:,2023-07-26 13:45,BIC:,2015.4009
No. Observations:,395,Log-Likelihood:,-983.78
Df Model:,7,F-statistic:,15.96
Df Residuals:,387,Prob (F-statistic):,1.94e-18
R-squared:,0.224,Scale:,8.7039

0,1,2,3,4,5,6
,Coef.,Std.Err.,t,P>|t|,[0.025,0.975]
Intercept,12.3244,0.6138,20.0804,0.0000,11.1177,13.5311
schoolsup[T.True],-1.9592,0.4489,-4.3643,0.0000,-2.8418,-1.0766
famsup[T.True],-0.5849,0.3114,-1.8783,0.0611,-1.1971,0.0274
sex[T.True],-1.0348,0.3413,-3.0323,0.0026,-1.7058,-0.3638
failures,-1.7986,0.2840,-6.3327,0.0000,-2.3570,-1.2402
studytime,0.5848,0.1903,3.0738,0.0023,0.2108,0.9589
goout,-0.3105,0.1367,-2.2723,0.0236,-0.5792,-0.0418
I(failures * sex),0.7312,0.4072,1.7955,0.0734,-0.0695,1.5318

0,1,2,3
Omnibus:,10.65,Durbin-Watson:,2.085
Prob(Omnibus):,0.005,Jarque-Bera (JB):,7.54
Skew:,0.216,Prob(JB):,0.023
Kurtosis:,2.478,Condition No.:,17.0
