In [1]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

%matplotlib inline

from __future__ import print_function
import numpy as np
import statsmodels.api as sm

np.random.seed(42)

In [2]:
spector_data = sm.datasets.spector.load_pandas()

In [3]:
spector_data.data.head()

Unnamed: 0,GPA,TUCE,PSI,GRADE
0,2.66,20.0,0.0,0.0
1,2.89,22.0,0.0,0.0
2,3.28,24.0,0.0,0.0
3,2.92,12.0,0.0,0.0
4,4.0,21.0,0.0,1.0


In [4]:
# One way to do it: sm.OLS(y, X)
m = sm.OLS.from_formula('GRADE ~ GPA + TUCE', spector_data.data)

In [5]:
m.df_model

m.df_resid

m.endog_names

m.exog_names

2.0

29.0

'GRADE'

['Intercept', 'GPA', 'TUCE']

In [6]:
res = m.fit()

In [7]:
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                  GRADE   R-squared:                       0.262
Model:                            OLS   Adj. R-squared:                  0.211
Method:                 Least Squares   F-statistic:                     5.136
Date:                Sat, 25 Mar 2017   Prob (F-statistic):             0.0123
Time:                        16:16:26   Log-Likelihood:                -16.730
No. Observations:                  32   AIC:                             39.46
Df Residuals:                      29   BIC:                             43.86
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.4494      0.578     -2.506      0.0

In [8]:
m.loglike(res.params)


-16.729727107827411

In [9]:
m.predict(res.params, [[1, 4.0, 25]])

array([ 0.8008647])

In [10]:
print(res.f_test("GPA = TUCE = 0"))
print(res.f_test("GPA = 0"))

<F test: F=array([[ 5.13598155]]), p=0.0123189334796, df_denom=29, df_num=2>
<F test: F=array([[ 6.66512585]]), p=0.0151520917707, df_denom=29, df_num=1>


In [11]:
m = sm.OLS.from_formula('GRADE ~ GPA + TUCE + PSI', spector_data.data)

In [12]:
res1 = m.fit()

In [13]:
print(res1.summary())

                            OLS Regression Results                            
Dep. Variable:                  GRADE   R-squared:                       0.416
Model:                            OLS   Adj. R-squared:                  0.353
Method:                 Least Squares   F-statistic:                     6.646
Date:                Sat, 25 Mar 2017   Prob (F-statistic):            0.00157
Time:                        16:23:38   Log-Likelihood:                -12.978
No. Observations:                  32   AIC:                             33.96
Df Residuals:                      28   BIC:                             39.82
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.4980      0.524     -2.859      0.0

In [14]:
print('p-value middle term')
res1.compare_f_test(res)

p-value middle term


(7.3985928277662021, 0.011087680712560059, 1.0)

In [15]:
res1.compare_lm_test(res)

res1.compare_lr_test(res)

(6.6882593791358582, 0.0097049916704974853, 1.0)

(7.5029618734839758, 0.0061597607382998061, 1.0)

In [16]:
res1.save('temp.pkl')

In [17]:
res2 = sm.load('temp.pkl')

In [18]:
print(res2.summary())

                            OLS Regression Results                            
Dep. Variable:                  GRADE   R-squared:                       0.416
Model:                            OLS   Adj. R-squared:                  0.353
Method:                 Least Squares   F-statistic:                     6.646
Date:                Sat, 25 Mar 2017   Prob (F-statistic):            0.00157
Time:                        16:25:01   Log-Likelihood:                -12.978
No. Observations:                  32   AIC:                             33.96
Df Residuals:                      28   BIC:                             39.82
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.4980      0.524     -2.859      0.0

In [None]:
reg_res = m.fit_regularized(alpha=[1,1,1,1], L1_wt=1.0, refit=False)