In [1]:
from IPython.display import display, HTML
display(HTML("<style>.container {width:85%;}</style>"))

# Ordered choice models

Ordered choice (ordinal) models are used for modeling ordinal (categorical variable where the ordering of the categories matters even though the categories are equidistant) dependent variables

- In ordered choice models, the dependent variable typically has three or more ordered categories e.g 
    - education levels (high school, bachelor, bachelor's degree, master's degree), 
    - rating systems (poor, fair, good, excellent),
    - opinion surveys (strongly agree, agree, neutral, disagree, strongly disagree)
    - employment (unemployed, part time, full time)
    - bond ratings (AAA, AA, A, B etc)  

## Difference between probit and logit model
| Aspect                                  | Probit Model                            | Logit Model                               |
|-----------------------------------------|-----------------------------------------|-------------------------------------------|
| <b>Underlying Distribution</b>          | Standard Normal Distribution (Bell-shaped curve) | Logistic Distribution (S-shaped curve)    |
| <b>Cumulative Distribution Function</b> | Φ(Xβ)                                 | 1 / (1 + exp(-Xβ))                        |
| <b>Interpretability of Coefficients</b> | Changes in z-scores                | Changes in odds                           |
| <b>Response Curve Shape</b>             | Closer to linear in the tails           | S-shaped                                  |
| <b>Estimation Method</b>                | Probit function                         | Logit function                            |

In [134]:
import numpy as np
import pandas as pd
import warnings
import scipy.stats as stats
import statsmodels.api as sm
from statsmodels.miscmodels.ordinal_model import OrderedModel

pd.set_option("display.max_columns", 100)
pd.set_option("display.max_rows", 100)
pd.set_option('display.float_format', lambda x:'{:.4f}'.format(x))
warnings.filterwarnings("ignore")

## Data

In [152]:
path = "/Users/geofrey.wanyama/Downloads/Microeconometrics1/Data sets/piaac9Feb2017Stata12.dta"
edcat7 = {
    'Tertiaryâ\x80\x93master/research degree': 'TertMA',
     'Upper secondary': 'UpSec',
     'Post-secondary, non-tertiary': 'PostSec',
     'Lower secondary or less': 'LowSec',
     'Tertiary-professional degree': 'TertProf',
     'Tertiary â\x80\x93 bachelor degree': 'TertBA'   
}
workreq = {
    'This level necessary': 'level_necessary',
    'Lower level sufficient': 'lower_sufficient',
    'Higher level needed': 'Higher_needed' 
}

df = (
    pd.read_stata(path)
    .query("ageg5lfs != '16-19' and country in ['fin', 'est']")
    .filter(["chatatwork", "ageg5lfs", "male", "edcat7", "immig", "indep", "workreq", "geo"])
    .dropna()
    .assign(
        ageg5lfs = lambda X: pd.Categorical(X["ageg5lfs"]).remove_unused_categories(),
        geo = lambda X: pd.Categorical(X["geo"]).remove_unused_categories(),
        edcat7 = lambda X: X["edcat7"].map(edcat7),
        workreq = lambda X: X["workreq"].map(workreq),
        male = lambda X: X["male"].astype(int).astype("category"),
        immig = lambda X: X["immig"].astype(int).astype("category")
    )
    .assign(ageg5lfs = lambda X: X["ageg5lfs"].apply(lambda x: x.replace("-", "_")))
)

data = pd.concat([df[["chatatwork"]+["male", "immig", "indep"]],
                  pd.get_dummies(df[["ageg5lfs", "edcat7", "workreq", "geo"]], drop_first=True)],
                 axis=1
)

data.head(3)

Unnamed: 0,chatatwork,male,immig,indep,ageg5lfs_25_29,ageg5lfs_30_34,ageg5lfs_35_39,ageg5lfs_40_44,ageg5lfs_45_49,ageg5lfs_50_54,ageg5lfs_55_59,ageg5lfs_60_65,edcat7_UpSec,edcat7_PostSec,edcat7_TertProf,edcat7_TertBA,edcat7_TertMA,workreq_lower_sufficient,workreq_Higher_needed,geo_fin
57418,At least once a month,0,0,2.75,0,1,0,0,0,0,0,0,0,0,0,0,1,0,0,0
57422,At least once a week,0,0,3.25,1,0,0,0,0,0,0,0,0,0,0,0,1,0,0,0
57423,Never,1,0,4.0,0,0,0,1,0,0,0,0,1,0,0,0,0,1,0,0


## Probit ordinal model

In [153]:
target = "chatatwork"
features = data.drop(target, axis=1).columns.to_list()
formula
mod_prob = OrderedModel(
    data[target],
    data[features],
    distr='probit'
)

res_prob = mod_prob.fit(method='bfgs')
print(res_prob.summary())

Optimization terminated successfully.
         Current function value: 0.948260
         Iterations: 67
         Function evaluations: 70
         Gradient evaluations: 70
                             OrderedModel Results                             
Dep. Variable:             chatatwork   Log-Likelihood:                -4934.7
Model:                   OrderedModel   AIC:                             9915.
Method:            Maximum Likelihood   BIC:                         1.007e+04
Date:                Wed, 06 Sep 2023                                         
Time:                        20:21:45                                         
No. Observations:                5204                                         
Df Residuals:                    5181                                         
Df Model:                          19                                         
                                                   coef    std err          z      P>|z|      [0.025      0.975]
----

## Logit ordinal model

In [138]:
target = "chatatwork"
features = data.drop(target, axis=1).columns.to_list()
formula
mod_prob = OrderedModel(
    data[target],
    data[features],
    distr='logit'
)

res_prob = mod_prob.fit(method='bfgs')
print(res_prob.summary())

Optimization terminated successfully.
         Current function value: 0.945404
         Iterations: 78
         Function evaluations: 81
         Gradient evaluations: 81
                             OrderedModel Results                             
Dep. Variable:             chatatwork   Log-Likelihood:                -4919.9
Model:                   OrderedModel   AIC:                             9884.
Method:            Maximum Likelihood   BIC:                         1.003e+04
Date:                Tue, 05 Sep 2023                                         
Time:                        23:34:23                                         
No. Observations:                5204                                         
Df Residuals:                    5182                                         
Df Model:                          18                                         
                                                   coef    std err          z      P>|z|      [0.025      0.975]
----