## Imports and versions

In [25]:
import numpy as np
import pandas as pd
import statsmodels.formula.api as smf

## Data

In [27]:
# Remove aggregate rows, replace NaN with 0

puf = pd.read_csv('puf2011.csv')

puf = puf[(puf['RECID'] != 999996) &
          (puf['RECID'] != 999997) &
          (puf['RECID'] != 999998) &
          (puf['RECID'] != 999999)
         ]
           
puf = puf.fillna(0)

# Change MARS to string so mnlogit treats it as categorical

puf['MARS'].astype(str)

# Create categorical sign column, {-1: neg, 0: zero, 1: pos}

puf['sign'] = np.where(puf['P22250'] == 0, 0, np.where(puf['P22250'] > 0, 1, -1))

# Prune puf to include only: RECID, P22250, categorical and predictor columns.
# Removed E01100 and E00650


predictors =  ['DSI', 'EIC', 'MARS', 'XTOT', 'E00200', 'E00300', 'E00400','E00600', 
                         'E00800', 'E00900',           'E01400', 'E01500', 'E01700',
               'E02100', 'E02300', 'E02400', 'E03150', 'E03210', 'E03240', 'E03270',
               'E03300', 'E17500', 'E18400', 'E18500', 'E19200', 'E19800', 'E20100',
               'E20400', 'E32800', 'F2441', 'N24']


keep = ['RECID', 'P22250', 'sign'] + predictors

puf = puf[keep]

## Model

In [28]:
#Formula = sign ~ DSI + EIC + ... + F2441 + N24

formula = 'sign ~ ' + ' + '.join(predictors)
model = smf.mnlogit(formula = formula, data= puf).fit()
model.summary()


Optimization terminated successfully.
         Current function value: 0.581662
         Iterations 9


0,1,2,3
Dep. Variable:,sign,No. Observations:,163786.0
Model:,MNLogit,Df Residuals:,163722.0
Method:,MLE,Df Model:,62.0
Date:,"Mon, 16 Jul 2018",Pseudo R-squ.:,0.2319
Time:,17:06:53,Log-Likelihood:,-95268.0
converged:,True,LL-Null:,-124030.0
,,LLR p-value:,0.0

sign=0,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,3.0879,0.029,105.011,0.000,3.030,3.146
DSI,0.7952,0.096,8.322,0.000,0.608,0.982
EIC,1.4698,0.066,22.166,0.000,1.340,1.600
MARS,0.0663,0.014,4.625,0.000,0.038,0.094
XTOT,-0.3677,0.011,-33.133,0.000,-0.389,-0.346
E00200,-8.412e-07,2.03e-08,-41.360,0.000,-8.81e-07,-8.01e-07
E00300,-2.815e-06,1.58e-07,-17.849,0.000,-3.12e-06,-2.51e-06
E00400,-3.266e-06,2.01e-07,-16.238,0.000,-3.66e-06,-2.87e-06
E00600,-1.375e-06,9.45e-08,-14.550,0.000,-1.56e-06,-1.19e-06
E00800,-1.604e-05,4.91e-06,-3.268,0.001,-2.57e-05,-6.42e-06
