In [1]:
%matplotlib inline


import numpy as np
import statsmodels.api as sm

In [2]:
spector_data = sm.datasets.spector.load_pandas()

In [3]:
spector_data.data.head()

Unnamed: 0,GPA,TUCE,PSI,GRADE
0,2.66,20.0,0.0,0.0
1,2.89,22.0,0.0,0.0
2,3.28,24.0,0.0,0.0
3,2.92,12.0,0.0,0.0
4,4.0,21.0,0.0,1.0


In [4]:

m = sm.OLS.from_formula('GRADE ~ GPA + TUCE', spector_data.data)

In [5]:
m.df_model



2.0

In [6]:
m.df_resid



29.0

In [7]:
m.endog_names



'GRADE'

In [8]:
m.exog_names

['Intercept', 'GPA', 'TUCE']

In [9]:
res = m.fit()

In [10]:
print(res.summary())

                            OLS Regression Results                            
Dep. Variable:                  GRADE   R-squared:                       0.262
Model:                            OLS   Adj. R-squared:                  0.211
Method:                 Least Squares   F-statistic:                     5.136
Date:                Fri, 12 Jun 2020   Prob (F-statistic):             0.0123
Time:                        10:15:13   Log-Likelihood:                -16.730
No. Observations:                  32   AIC:                             39.46
Df Residuals:                      29   BIC:                             43.86
Df Model:                           2                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.4494      0.578     -2.506      0.0

P>|t| value for GPA is low thus its a good predictor and the value for TUCE is high thus its not a good predictor

In [11]:
m.loglike(res.params)

-16.72972710782741

In [12]:
m.predict(res.params, [[1, 3.0, 20]])

array([0.25842443])

In [13]:
print(res.f_test("GPA = TUCE = 0"))
print(res.f_test("GPA = 0"))

<F test: F=array([[5.13598155]]), p=0.0123189334795685, df_denom=29, df_num=2>
<F test: F=array([[6.66512585]]), p=0.015152091770702585, df_denom=29, df_num=1>


In [14]:
m = sm.OLS.from_formula('GRADE ~ GPA + TUCE + PSI', spector_data.data)

In [15]:
res1 = m.fit()

In [16]:
print(res1.summary())

                            OLS Regression Results                            
Dep. Variable:                  GRADE   R-squared:                       0.416
Model:                            OLS   Adj. R-squared:                  0.353
Method:                 Least Squares   F-statistic:                     6.646
Date:                Fri, 12 Jun 2020   Prob (F-statistic):            0.00157
Time:                        10:15:13   Log-Likelihood:                -12.978
No. Observations:                  32   AIC:                             33.96
Df Residuals:                      28   BIC:                             39.82
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept     -1.4980      0.524     -2.859      0.0

P>|t| Value for GPA and PSI are quite low this they are a good predictors , unline TUCE which has a significantly larger value

Comparing Models 

In [17]:
res1.compare_f_test(res)

(7.398592827766196, 0.011087680712560107, 1.0)

In [18]:
res1.compare_lm_test(res)



(6.688259379135858, 0.009704991670497485, 1.0)

In [19]:
res1.compare_lr_test(res)

(7.502961873483976, 0.006159760738299806, 1.0)

**Conclusion**

In all the above tests the model "Res1" tends to perform better than model "Res" even the model tends to perform better in R squared , Adjusted R squared and all other models when compared in summaries .