In [1]:
import numpy as np
import pandas as pd
import scipy.stats as stats

from statsmodels.miscmodels.ordinal_model import OrderedModel

url = "https://stats.idre.ucla.edu/stat/data/ologit.dta"
data_student = pd.read_stata(url)

data_student.head(5)

Unnamed: 0,apply,pared,public,gpa
0,very likely,0,0,3.26
1,somewhat likely,1,0,3.21
2,unlikely,1,1,3.94
3,somewhat likely,0,0,2.81
4,somewhat likely,0,0,2.53


In [2]:
data_student['apply'].dtype

CategoricalDtype(categories=['unlikely', 'somewhat likely', 'very likely'], ordered=True)

In [6]:
# Probit Ordinal regression

mod_prob = OrderedModel(data_student['apply'],
                        data_student[['pared', 'public', 'gpa']],
                        distr='probit')

res_prob = mod_prob.fit(method='bfgs')
res_prob.summary()

Optimization terminated successfully.
         Current function value: 0.896869
         Iterations: 17
         Function evaluations: 21
         Gradient evaluations: 21


0,1,2,3
Dep. Variable:,apply,Log-Likelihood:,-358.75
Model:,OrderedModel,AIC:,727.5
Method:,Maximum Likelihood,BIC:,747.5
Date:,"Mon, 17 Jun 2024",,
Time:,18:13:14,,
No. Observations:,400,,
Df Residuals:,395,,
Df Model:,3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
pared,0.5981,0.158,3.789,0.000,0.289,0.908
public,0.0102,0.173,0.059,0.953,-0.329,0.349
gpa,0.3582,0.157,2.285,0.022,0.051,0.665
unlikely/somewhat likely,1.2968,0.468,2.774,0.006,0.381,2.213
somewhat likely/very likely,0.1873,0.074,2.530,0.011,0.042,0.332


In [5]:
# Logit Ordinal regression

mod_log = OrderedModel(data_student['apply'],
                        data_student[['pared', 'public', 'gpa']],
                        distr='logit')

res_log = mod_log.fit(method='bfgs', disp=False)
res_log.summary()

0,1,2,3
Dep. Variable:,apply,Log-Likelihood:,-358.51
Model:,OrderedModel,AIC:,727.0
Method:,Maximum Likelihood,BIC:,747.0
Date:,"Mon, 17 Jun 2024",,
Time:,18:13:00,,
No. Observations:,400,,
Df Residuals:,395,,
Df Model:,3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
pared,1.0476,0.266,3.942,0.000,0.527,1.569
public,-0.0586,0.298,-0.197,0.844,-0.642,0.525
gpa,0.6158,0.261,2.363,0.018,0.105,1.127
unlikely/somewhat likely,2.2035,0.780,2.827,0.005,0.676,3.731
somewhat likely/very likely,0.7398,0.080,9.236,0.000,0.583,0.897


In [7]:
# Ordinal Regression with a custom cumulative cLogLog distribution

res_exp = OrderedModel(data_student['apply'],
                           data_student[['pared', 'public', 'gpa']],
                           distr=stats.expon).fit(method='bfgs', disp=False)
res_exp.summary()

0,1,2,3
Dep. Variable:,apply,Log-Likelihood:,-360.84
Model:,OrderedModel,AIC:,731.7
Method:,Maximum Likelihood,BIC:,751.6
Date:,"Mon, 17 Jun 2024",,
Time:,18:14:13,,
No. Observations:,400,,
Df Residuals:,395,,
Df Model:,3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
pared,0.4690,0.117,4.021,0.000,0.240,0.698
public,-0.1308,0.149,-0.879,0.379,-0.422,0.161
gpa,0.2198,0.134,1.638,0.101,-0.043,0.483
unlikely/somewhat likely,1.5370,0.405,3.792,0.000,0.742,2.332
somewhat likely/very likely,0.4082,0.093,4.403,0.000,0.226,0.590


In [12]:
modf_logit = OrderedModel.from_formula("apply ~ 0 + pared + public + gpa", data_student,
                                      distr='logit')
resf_logit = modf_logit.fit(method='bfgs')
resf_logit.summary()

Optimization terminated successfully.
         Current function value: 0.896281
         Iterations: 22
         Function evaluations: 24
         Gradient evaluations: 24


0,1,2,3
Dep. Variable:,apply,Log-Likelihood:,-358.51
Model:,OrderedModel,AIC:,727.0
Method:,Maximum Likelihood,BIC:,747.0
Date:,"Mon, 17 Jun 2024",,
Time:,18:15:11,,
No. Observations:,400,,
Df Residuals:,395,,
Df Model:,3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
pared,1.0476,0.266,3.942,0.000,0.527,1.569
public,-0.0586,0.298,-0.197,0.844,-0.642,0.525
gpa,0.6158,0.261,2.363,0.018,0.105,1.127
unlikely/somewhat likely,2.2035,0.780,2.827,0.005,0.676,3.731
somewhat likely/very likely,0.7398,0.080,9.236,0.000,0.583,0.897
