In [45]:
import os
import sys
import random
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
import matplotlib.pyplot as plt

In [80]:
sys.path.insert(0, "../compass")
from parameters import FLAGS
from model import CompassModel

# Set seeds for reproducibility
random.seed(3)
np.random.seed(3)


# Initialize
FLAGS.case = 'Noord'
FLAGS.alpha = 0.2
FLAGS.window_size = 30
FLAGS.conv_threshold = 0.01
FLAGS.max_school_steps = 300
FLAGS.max_move_fraction = 0.25
FLAGS.filename = 'choice-analysis-test'

In [47]:
model = CompassModel(vars(FLAGS), export=True)
model.simulate()

 Model initialised:
            NR AGENTS:  Households: 6235
            Neighbourhoods: 56
            Schools: 30
            In scheduler: 6321

School process: step 1 from 300

  log_r_jm = np.nan_to_num(np.log(r_jm))


School process: step 47 from 300

In [48]:
data = np.load(FLAGS.filename + '.npz')
headers = data['households_headers']
households = pd.DataFrame(data=data['households'][-1,:,:], columns=headers)

In [77]:
# print(households)
households['unit'].unique()
distance = 1 - households['distance']
compositions = households[['unit', 'category']]
compositions = compositions.join(compositions.groupby('unit').mean(), on='unit', rsuffix='_r')
compositions = compositions.rename({'category_r':'composition'}, axis='columns')
compositions = compositions['composition']
schools = households['unit']
variables = pd.DataFrame()
variables['school'] = schools
distance[distance<0.01] = 0.01
variables['distance'] = distance
compositions[compositions<0.01] = 0.01
variables['composition'] = compositions
variables['category'] = households['category']
# variables[['distance', 'composition']][variables[['distance', 'composition']] < 0.01] = 0.01
variables[['distance', 'composition']] = np.log(variables[['distance', 'composition']])
variables['group*category'] = variables['composition'] * variables['category']
print(variables)

      school  distance  composition  category  group*category
0       71.0 -2.555761    -0.978423       0.0       -0.000000
1       61.0 -2.264768     0.000000       1.0        0.000000
2       75.0 -2.925447    -0.837728       1.0       -0.837728
3       64.0 -4.605170     0.000000       1.0        0.000000
4       81.0 -2.518990    -1.360009       1.0       -1.360009
...      ...       ...          ...       ...             ...
6230    80.0 -4.605170    -0.386371       1.0       -0.386371
6231    70.0 -2.784585    -1.196629       0.0       -0.000000
6232    74.0 -4.605170    -0.734974       0.0       -0.000000
6233    74.0 -3.240908    -0.734974       1.0       -0.734974
6234    76.0 -2.292748    -4.605170       0.0       -0.000000

[6235 rows x 5 columns]


In [79]:
import statsmodels.api as sm
logit_mod = sm.MNLogit(variables['school'], variables[['distance', 'composition', 'group*category']])
logit_res = logit_mod.fit(method='bfgs', maxiter=500)
print(logit_res.summary())

Optimization terminated successfully.
         Current function value: 1.734983
         Iterations: 346
         Function evaluations: 349
         Gradient evaluations: 349
                          MNLogit Regression Results                          
Dep. Variable:                 school   No. Observations:                 6235
Model:                        MNLogit   Df Residuals:                     6157
Method:                           MLE   Df Model:                           52
Date:                Thu, 24 Mar 2022   Pseudo R-squ.:                  0.4393
Time:                        15:20:41   Log-Likelihood:                -10818.
converged:                       True   LL-Null:                       -19294.
Covariance Type:            nonrobust   LLR p-value:                     0.000
     school=58       coef    std err          z      P>|z|      [0.025      0.975]
----------------------------------------------------------------------------------
distance          28.2270  