In [1]:
import numpy as np
import pandas as pd
from scipy import stats
from patsy import dmatrices
import sys

sys.path.append('/home/janos/Dropbox/Projects/estimagic/')
from estimagic.optimization.optimize import maximize

In [2]:
def ordered_logit(formula, data, dashboard=False):
    params, y, x, constr = ordered_logit_processing(formula, data)
    
    res = maximize(
        criterion=ordered_logit_loglike, 
        params=params, 
        algorithm='nlopt_bobyqa',
        constraints=constr,
        criterion_args=(y, x), 
        dashboard=dashboard)
    return res


def ordered_logit_loglike(params, y, x):
    beta = params.loc['beta'].to_numpy()
    cutoffs = params.loc['cutoff'].to_numpy()
    xb = x.dot(beta)
    
    upper_cutoffs = np.hstack([cutoffs, np.inf])[y]
    lower_cutoffs = np.hstack([-np.inf, cutoffs])[y]
    
    upper_cdf = stats.logistic.cdf(upper_cutoffs - xb)
    lower_cdf = stats.logistic.cdf(lower_cutoffs - xb)
    
    return np.log(upper_cdf - lower_cdf).sum()


def ordered_logit_processing(formula, data):
    # extract data arrays
    y, x = dmatrices(formula + ' - 1', data, return_type='dataframe')
    y = y[y.columns[0]]
    
    # extract dimensions
    num_choices = len(y.unique())
    beta_names = list(x.columns)
    num_betas = len(beta_names)
    num_cutoffs = num_choices - 1
    
    # set-up index for params_df
    names = beta_names + list(range(num_cutoffs))
    categories = ['beta'] * num_betas + ['cutoff'] * num_cutoffs
    index = pd.MultiIndex.from_tuples(
        zip(categories, names), names=['type', 'name'])
    
    # make params_df
    np.random.seed(5471)
    start_params = pd.DataFrame(index=index)
    start_params['value'] = np.hstack(
        [np.random.uniform(low=-0.5, high=0.5, size=len(x.columns)),
        np.arange(num_cutoffs) * 2])
    
    # make constraints
    constr = [{'loc': 'cutoff', 'type': 'increasing'}]
    
    return start_params, y.to_numpy().astype(int), x.to_numpy(), constr

In [3]:
df = pd.read_stata('ologit.dta')
df['apply'] = df['apply'].cat.codes
form = 'apply ~ pared + public + gpa'
correct = [1.047664, -.0586828, .6157458, 2.203323,  4.298767]

In [4]:
ordered_logit(form, df, dashboard=False)

({'x': [1.0476636809475925,
   -0.058682798734612227,
   0.6157458197420849,
   2.2033232357653287,
   4.298767391412519],
  'internal_x': [1.0476636809475925,
   -0.058682798734612227,
   0.6157458197420849,
   2.2033232357653287,
   2.0954441556471903],
  'f': -358.5124356620388},
 type    name  
 beta    pared     1.047664
         public   -0.058683
         gpa       0.615746
 cutoff  0         2.203323
         1         4.298767
 Name: value, dtype: float64)