In [1]:
import numpy as np

from src.models.logreg import LogReg

import statsmodels.api as sm
import statsmodels.formula.api as smf

In [2]:
df = sm.datasets.spector.load_pandas().data
# Look at info on data
print(sm.datasets.spector.NOTE)

::

    Number of Observations - 32

    Number of Variables - 4

    Variable name definitions::

        Grade - binary variable indicating whether or not a student's grade
                improved.  1 indicates an improvement.
        TUCE  - Test score on economics test
        PSI   - participation in program
        GPA   - Student's grade point average


In [3]:
df

Unnamed: 0,GPA,TUCE,PSI,GRADE
0,2.66,20.0,0.0,0.0
1,2.89,22.0,0.0,0.0
2,3.28,24.0,0.0,0.0
3,2.92,12.0,0.0,0.0
4,4.0,21.0,0.0,1.0
5,2.86,17.0,0.0,0.0
6,2.76,17.0,0.0,0.0
7,2.87,21.0,0.0,0.0
8,3.03,25.0,0.0,0.0
9,3.92,29.0,0.0,1.0


In [4]:


res_logit = smf.logit("GRADE ~ GPA + TUCE + PSI", data=df).fit()
print(res_logit.summary())

Optimization terminated successfully.
         Current function value: 0.402801
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                  GRADE   No. Observations:                   32
Model:                          Logit   Df Residuals:                       28
Method:                           MLE   Df Model:                            3
Date:                Sat, 06 Jan 2024   Pseudo R-squ.:                  0.3740
Time:                        01:11:12   Log-Likelihood:                -12.890
converged:                       True   LL-Null:                       -20.592
Covariance Type:            nonrobust   LLR p-value:                  0.001502
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
Intercept    -13.0213      4.931     -2.641      0.008     -22.687      -3.356
GPA            2.8261      1.

In [5]:
marg_effect = res_logit.get_margeff(at="mean", method="dydx")
print(marg_effect.summary())

        Logit Marginal Effects       
Dep. Variable:                  GRADE
Method:                          dydx
At:                              mean
                dy/dx    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
GPA            0.5339      0.237      2.252      0.024       0.069       0.998
TUCE           0.0180      0.026      0.685      0.493      -0.033       0.069
PSI            0.4493      0.197      2.284      0.022       0.064       0.835


In [6]:
m = LogReg(df=df,
           outcome="GRADE",
           independent=["GPA", "TUCE", "PSI"])

m.marginal_effects(at="mean", method="dydx")

Converged in 6 iterations.


Unnamed: 0,Intercept,GPA,TUCE,PSI
0,-2.459761,0.533859,0.017975,0.449339


In [7]:
m.coefficients

array([-13.02134686,   2.82611259,   0.09515766,   2.37868766])

In [8]:
m.hessian

array([[   4.1054934 ,   13.33167023,   93.81061211,    2.28159465],
       [  13.33167023,   44.03652826,  306.22633145,    7.10687673],
       [  93.81061211,  306.22633145, 2197.39964968,   50.82349704],
       [   2.28159465,    7.10687673,   50.82349704,    2.28159465]])

In [9]:
m.hessian_inv

array([[ 2.43179585e+01, -4.57347866e+00, -3.46255709e-01,
        -2.35916089e+00],
       [-4.57347866e+00,  1.59502016e+00, -3.69205768e-02,
         4.27615656e-01],
       [-3.46255709e-01, -3.69205768e-02,  2.00375931e-02,
         1.49126418e-02],
       [-2.35916089e+00,  4.27615656e-01,  1.49126418e-02,
         1.13329705e+00]])