In [1]:
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@
#A code to run ordinary least squares with associated statistics
#Jeremy Kedziora
#24 March 2016
#@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@

#import libraries
import numpy as np    #import for arrays
from scipy.optimize import minimize    #import for optimization
import scipy.stats    #import for distributions
from scipy.stats import t    #import for hypothesis testing

In [2]:
#define functions
def maker(N,n_vars):
    """A function to generate Monte Carlo linear regression data"""
    x = []    #an empty list to hold the data
    y = np.zeros(N)    #an array to hold the dependent variable
    b = []    #an empty list to hold the true bs
    mu = [0.0]    #an empty list to hold the true cutoffs in the ordered probit
    i = 1
    while i <= n_vars:    #loop over the variables we want to create
        x_i = np.random.normal(loc = 0.0, scale = 1.0, size = N)    #generate the data
        x.append(x_i)    #add it to the list of data
        b_i = np.random.normal(loc = 0.0, scale = 1.0)    #draw a random effect for this variable
        b.append(b_i)    #add it to the list of effects
        y = y + b_i*x_i    #add the variable effect to the dependent variable
        i += 1    #index up i
    
    x.append(np.ones(N))    #and a column of ones for a constant
    b_i = np.random.uniform(0.0,1.0)    #draw a random intercept
    
    V = [-1,1,1.5]#sorted(np.random.uniform(0,1.5,3))    #create the value for each outcome
    y = b_i + y + np.random.normal(loc = 0.0, scale = 1.0, size = N)    #add the normally distributed error term and the intercept
    mu = [-float('inf'),0,np.random.uniform(1,1.5,1)[0],float('inf')]    #append the cutoff for the next category
    y_cat = (y<0)*0    #set the ys less than 0 to fail
    y_cat = y_cat + (0<y)*(y<mu[2])*1    #code all the ys that fall between them to pass
    y_cat = y_cat + (y>mu[2])*2    #and code all the ys larger than the largest to distinction

    p_fail = scipy.stats.norm.cdf(- y)    #compute the probability of failure
    p_pass = scipy.stats.norm.cdf(mu[2] - y)    #compute the probability of passing
    p_distinction = 1 - scipy.stats.norm.cdf(mu[2] - y)    #compute the probability of distinction
    p_NW = scipy.stats.norm.cdf(p_fail*V[0] + p_pass*V[1] + p_distinction*V[2])    #compute the probability of not withdrawing
    p_W = 1 - p_NW    #compute the probability of withdrawing

    y_choice = []    #a list to hold the w/nw choice
    for i in p_NW:    #loop over obserations
        y_choice.append(np.random.binomial(1,i,1)[0])    #choose a w/nw
    y_choice = np.array(y_choice)    #make an array
    y = (y_choice==0)*1 + (y_choice==1)*(y_cat==0)*2 + (y_choice==1)*(y_cat==1)*3 + (y_choice==1)*(y_cat==2)*4    #and code

    b.append(b_i)    #append the constant
    
    return [np.array(x).T,np.array(y),np.array(b),np.array(mu),V]

In [3]:
def choice_mle(b,X,y,V):
    """A function to compute a choice model with an ordered probit outcome.
    Take in:
    b: the vector of parameters, the last of which is the cutoff
    X: a numpy array of features
    y: a numpy array of outcomes where y = 1 is W, y = 2 is NW,F, y = 3 is NW,P, y = 4 is NW,D
    V: a list [Value of failing, Value of passing, Value of Disctin]"""
    
    xb = X.dot(b[0:(X.shape[1])])    #compute xb
    mu = [float('-inf'),0]    #initialize the list of mus
    mu = mu + [np.exp(b[len(b) - 1])]    #add the cutoff point to be estimated
    mu = mu + [float('inf')]    #append infinity on the end
    #print(mu)
    p_fail = scipy.stats.norm.cdf(- xb)    #compute the probability of failure
    p_pass = scipy.stats.norm.cdf(mu[2] - xb)    #compute the probability of passing
    p_distinction = 1 - scipy.stats.norm.cdf(mu[2] - xb)    #compute the probability of distinction
    p_NW = scipy.stats.norm.cdf(p_fail*V[0] + p_pass*V[1] + p_distinction*V[2])    #compute the probability of not withdrawing
    p_W = 1 - p_NW    #compute the probability of withdrawing
    
    #assume y = 1 is W, y = 2 is NW,F, y = 3 is NW,P, y = 4 is NW,D
    log_probs = ((y==1)*np.log(p_W) + (y==2)*(np.log(p_NW) + np.log(p_fail)) 
                 + (y==3)*(np.log(p_NW) + np.log(p_pass)) + (y==4)*(np.log(p_NW) + np.log(p_distinction)))
    
    return -1*sum(log_probs)

In [28]:
N = 100
n_vars = 3
Data = maker(N,n_vars)
X = Data[0]
y = Data[1]

b = np.array(list(np.random.uniform(0,1,X.shape[1])*0.01) + list(np.random.uniform(0,1.0,1)))    #set starting values

choice_mle(b,X,y,V=[-1,1,1.5])

V_F = np.linspace(0,2,3)
V_P = np.linspace(-2,0,3)
V_D = np.linspace(2,3,3)

V = [[x,y,z] for x in V_F for y in V_P for z in V_D]

models = []
for v in V:
    print(v)
    model = minimize(choice_mle, x0 = b, args = (X,y,v), method = 'Nelder-Mead',options={'maxiter':2000})    #maximize the log-likelihood
    models.append(model)
#print(model)


[0.0, -2.0, 2.0]
[0.0, -2.0, 2.5]
[0.0, -2.0, 3.0]
[0.0, -1.0, 2.0]




[0.0, -1.0, 2.5]
[0.0, -1.0, 3.0]
[0.0, 0.0, 2.0]
[0.0, 0.0, 2.5]
[0.0, 0.0, 3.0]
[1.0, -2.0, 2.0]
[1.0, -2.0, 2.5]
[1.0, -2.0, 3.0]
[1.0, -1.0, 2.0]
[1.0, -1.0, 2.5]
[1.0, -1.0, 3.0]
[1.0, 0.0, 2.0]
[1.0, 0.0, 2.5]
[1.0, 0.0, 3.0]
[2.0, -2.0, 2.0]
[2.0, -2.0, 2.5]
[2.0, -2.0, 3.0]
[2.0, -1.0, 2.0]
[2.0, -1.0, 2.5]
[2.0, -1.0, 3.0]
[2.0, 0.0, 2.0]
[2.0, 0.0, 2.5]
[2.0, 0.0, 3.0]


In [29]:
models

[ final_simplex: (array([[  5.64342033e-01,  -3.68896962e-01,   6.01767192e-01,
          4.11936076e-01,  -2.36321909e+03],
       [  5.64342025e-01,  -3.68896957e-01,   6.01767187e-01,
          4.11936061e-01,  -2.36321905e+03],
       [  5.64342028e-01,  -3.68896964e-01,   6.01767189e-01,
          4.11936067e-01,  -2.36321906e+03],
       [  5.64342044e-01,  -3.68896959e-01,   6.01767201e-01,
          4.11936070e-01,  -2.36321914e+03],
       [  5.64342024e-01,  -3.68896966e-01,   6.01767195e-01,
          4.11936075e-01,  -2.36321904e+03],
       [  5.64342033e-01,  -3.68896970e-01,   6.01767193e-01,
          4.11936069e-01,  -2.36321908e+03]]), array([ 81.16797483,  81.16797483,  81.16797483,  81.16797483,
        81.16797483,  81.16797483]))
           fun: 81.167974830904157
       message: 'Optimization terminated successfully.'
          nfev: 765
           nit: 466
        status: 0
       success: True
             x: array([  5.64342033e-01,  -3.68896962e-01,   6.01767