In [None]:
import os
root = "../../Foundation_of_Advanced_Quantitative_Marketing_Python"
os.chdir(root)

In [None]:
import numpy as np
import pandas as pd
import src.logit_boost as lgt
from scipy.optimize import minimize
import statsmodels.api as sm

# Logit Model Estimation (Boost)

In [3]:
df = pd.read_excel('./Data/Yogurt100N.xlsx')

In [4]:
p=4 # number of alternatives
# construct X and y
cols =['Feature 1', 'Price 1', 'Feature 2', 'Price 2',
       'Feature 3', 'Price 3', 'Feature 4', 'Price 4']
features = df[cols].values
N = features.shape[0]
# add brand dummies
bdummy=np.vstack((np.eye(p-1),np.zeros((p-1,1)).T))
bdummy=np.tile(bdummy, (N,1)).reshape(N, p, 3)  # (N*4, 3)
# hstack bdummy and features
features = features.reshape(N, p, 2)  # (N, 4, 2)
features = np.concatenate([bdummy, features], axis=2)  # (N, 4, 5)
# features = features.reshape(N, p*5)  # (N, 20)
X = features
# construct y (choice)
brands = df[['Brand 1', 'Brand  2', 'Brand 3', 'Brand 4']].values
y = np.argmax(brands, axis=1)
X.shape, y.shape

((2430, 4, 5), (2430,))

In [7]:
model = lgt.LogitModel()
model.fit(X, y)

Optimized parameters:
[  1.38771879   0.64348772  -3.08609284   0.48743071 -37.05722294]
Maximized LL: -2658.5566976077034


<logit_boost.LogitModel at 0x16227ad70>

In [8]:
print(lgt.Metrics.rho2(model, X, y))
print(lgt.Metrics.AIC(model))
print(lgt.Metrics.BIC(model))
print(lgt.Metrics.HQC(model))
print(lgt.Metrics.CAIC(model))

0.21080523383523186
5327.113395215407
5356.09162789708
5337.649049630528
5327.1381476906545


# Nested Logit Model Estimation (Boost)
The last feature in X should indicate which group the product belongs to

In [None]:
df = pd.read_excel('./Data/Yogurt100N.xlsx')

In [2]:
p=4 # number of alternatives
# construct X and y
cols =['Feature 1', 'Price 1', 'Feature 2', 'Price 2',
       'Feature 3', 'Price 3', 'Feature 4', 'Price 4']
features = df[cols].values
N = features.shape[0]
# add brand dummies
bdummy=np.vstack((np.eye(p-1),np.zeros((p-1,1)).T))
bdummy=np.tile(bdummy, (N,1)).reshape(N, p, 3)  # (N*4, 3)
# hstack bdummy, group id and features
group_ids = np.array([0, 0, 0, 1])  # Group IDs for the nested structure
group_ids = np.tile(group_ids, (N, 1)).reshape(N, p, 1)  # (N, 4, 1)
features = features.reshape(N, p, 2)  # (N, 4, 2)
features = np.concatenate([bdummy, features,group_ids], axis=2)  # (N, 4, 3+2+1=6)
# features = features.reshape(N, p*5)  # (N, 20)
X = features
# construct y (choice)
brands = df[['Brand 1', 'Brand  2', 'Brand 3', 'Brand 4']].values
y = np.argmax(brands, axis=1)
X.shape, y.shape

model = lgt.NestedLogitModel()
model.fit(X, y, 2)

Optimized parameters for features:
[  1.38166457   0.83942797  -1.65844414   0.37446579 -26.58064293]
Optimized parameters for within-group correlations (rho fixed to 1 for single-alternative groups):
[0.6433721 1.       ]
Maximized LL: -2653.7645999950555


<logit_boost.NestedLogitModel at 0x16de12f50>

In [3]:
print(lgt.Metrics.rho2(model, X, y))
print(lgt.Metrics.AIC(model))
print(lgt.Metrics.BIC(model))
print(lgt.Metrics.HQC(model))
print(lgt.Metrics.CAIC(model))

None
5319.529199990111
5354.303079208119
5332.171985288256
5319.5638677573415




# LCM Estimation (Boost)
Realize global optimization with basinhopping.

In [None]:
df = pd.read_excel('./Data/24_Yogurt100_2SegSolution (1).xlsx')

In [7]:
p=4 # number of alternatives
# construct X and y
cols =['Feature 1', 'Price 1', 'Feature 2', 'Price 2',
       'Feature 3', 'Price 3', 'Feature 4', 'Price 4']
features = df[cols].values
indiv_id = df['Pan I.D.'].values
N = features.shape[0]
# add brand dummies
bdummy=np.vstack((np.eye(p-1),np.zeros((p-1,1)).T))
bdummy=np.tile(bdummy, (N,1)).reshape(N, p, 3)  # (N*4, 3)
# hstack bdummy and features
features = features.reshape(N, p, 2)  # (N, 4, 2)
features = np.concatenate([bdummy, features], axis=2)  # (N, 4, 5)
# features = features.reshape(N, p*5)  # (N, 20)
X = features
# construct y (choice)
brands = df[['Brand 1', 'Brand  2', 'Brand 3', 'Brand 4']].values
y = np.argmax(brands, axis=1)
X.shape, y.shape

((2431, 4, 5), (2431,))

In [8]:
# Model with two segments
model = lgt.LatentClassModel()
model.fit(X, y, nclasses=2, indiv_id=indiv_id)
print('rho2: ' + str(lgt.Metrics.rho2(model, X, y)))
print('AIC: ' + str(lgt.Metrics.AIC(model)))
print('BIC: ' + str(lgt.Metrics.BIC(model)))
print('HQC: ' + str(lgt.Metrics.HQC(model)))
print('CAIC: ' + str(lgt.Metrics.CAIC(model)))

Current NLL: 1915.8010570305285
Current NLL: 1915.801389732527
Current NLL: 1915.801568428267
Current NLL: 1915.80183900002
Current NLL: 1915.8011784194841
Current NLL: 1915.8012668323427
Current NLL: 1915.8013490753626
Current NLL: 1915.8023555551777
Current NLL: 1915.8017807024225
Current NLL: 1915.8015981865944
Current NLL: 1915.8035345491342
Optimized parameters for features:
[  2.69759036   3.851604    -0.67429236   1.42637642 -50.36673876
   1.33362468  -1.29435373  -4.28509319   0.37743458 -36.92107783]
Optimized parameters for class weights:
[0.48 0.52]
Maximized LL: -1915.8010570305285
rho2: None
AIC: 3853.602114061057
BIC: 3917.3587517785345
HQC: 3876.7817148578456
CAIC: 3853.7112500676712




In [9]:
# model with three segments
model = lgt.LatentClassModel()
model.fit(X, y, nclasses=3, indiv_id=indiv_id)
print('rho2: ' + str(lgt.Metrics.rho2(model, X, y)))
print('AIC: ' + str(lgt.Metrics.AIC(model)))
print('BIC: ' + str(lgt.Metrics.BIC(model)))
print('HQC: ' + str(lgt.Metrics.HQC(model)))
print('CAIC: ' + str(lgt.Metrics.CAIC(model)))

Current NLL: 2116.6379430156594
Current NLL: 1482.970166556051
Current NLL: 1482.970297127773
Current NLL: 1482.9706064401964
Current NLL: 1482.9703962444821
Current NLL: 1482.970385315409
Current NLL: 1482.9706037950482
Current NLL: 1482.97040159858
Current NLL: 1482.970351111512
Current NLL: 1482.9702597760977
Current NLL: 1482.9715240454175
Optimized parameters for features:
[  1.96793072   3.00790978  -1.64951561   1.3138889  -55.5036127
  -1.08695703  -2.64055436  -6.07560053   1.06041544 -20.21212096
   5.14330687   1.40374083  -1.84276375  -0.18441237 -57.47661284]
Optimized parameters for class weights:
[0.51 0.15 0.34]
Maximized LL: -1482.970166556051
rho2: None
AIC: 2999.940333112102
BIC: 3098.473318675476
HQC: 3035.763352525321
CAIC: 3000.193959303565


In [11]:
# model with four segments
model = lgt.LatentClassModel()
model.fit(X, y, nclasses=4, indiv_id=indiv_id)
print('rho2: ' + str(lgt.Metrics.rho2(model, X, y)))
print('AIC: ' + str(lgt.Metrics.AIC(model)))
print('BIC: ' + str(lgt.Metrics.BIC(model)))
print('HQC: ' + str(lgt.Metrics.HQC(model)))
print('CAIC: ' + str(lgt.Metrics.CAIC(model)))

Current NLL: 1482.9702170716093
Current NLL: 1389.9894195265692
Current NLL: 1389.989653130758
Current NLL: 1389.9894299029115
Current NLL: 1389.9894547444856
Current NLL: 1389.9894151773676
Current NLL: 1389.9895227591794
Current NLL: 1389.9894064778446
Current NLL: 1389.989584149271
Current NLL: 1389.9896538526275
Current NLL: 1389.9898836949592
Optimized parameters for features:
[  1.07062746   0.22291928  -2.43501502   1.00905052 -49.64068872
   2.34977431   4.06566956  -0.89394665   0.92561599 -41.00371026
  -1.45285263  -4.00400037 -21.53700673   1.09529151 -14.75996321
   5.17381596   1.41804757  -1.87235359  -0.18633601 -56.29386205]
Optimized parameters for class weights:
[0.21 0.37 0.09 0.33]
Maximized LL: -1389.9894064778446
rho2: None
AIC: 2825.978812955689
BIC: 2959.28814636496
HQC: 2874.445250985338
CAIC: 2826.4374751908367


In [12]:
# assign segemt (0-3) to 101 households
model.segment_assign()

array([1, 2, 0, 0, 1, 2, 1, 1, 3, 1, 0, 1, 0, 3, 2, 2, 3, 2, 1, 3, 0, 3,
       1, 1, 0, 1, 1, 1, 1, 0, 3, 3, 1, 3, 1, 3, 1, 3, 0, 3, 1, 0, 3, 1,
       3, 3, 3, 3, 0, 1, 1, 1, 0, 1, 3, 0, 0, 1, 3, 1, 3, 1, 0, 2, 3, 1,
       3, 1, 1, 2, 3, 1, 3, 1, 0, 0, 1, 1, 3, 3, 2, 1, 1, 0, 3, 1, 0, 3,
       0, 3, 3, 1, 2, 3, 3, 1, 2, 3, 3, 0, 1])

# Random Coefficients Model (Boost)

In [None]:
df = pd.read_excel('./Data/24_Yogurt100N_MVN.xlsx')
df = df[:2430]

In [14]:
p=4 # number of alternatives
# construct X and y
cols =['Feature 1', 'Price 1', 'Feature 2', 'Price 2',
       'Feature 3', 'Price 3', 'Feature 4', 'Price 4']
features = df[cols].values
indiv_id = df['Pan I.D.'].values
N = features.shape[0]
# add brand dummies
bdummy=np.vstack((np.eye(p-1),np.zeros((p-1,1)).T))
bdummy=np.tile(bdummy, (N,1)).reshape(N, p, 3)  # (N*4, 3)
# hstack bdummy and features
features = features.reshape(N, p, 2)  # (N, 4, 2)
features = np.concatenate([bdummy, features], axis=2)  # (N, 4, 5)
# features = features.reshape(N, p*5)  # (N, 20)
X = features
# construct y (choice)
brands = df[['Brand 1', 'Brand  2', 'Brand 3', 'Brand 4']].values
y = np.argmax(brands, axis=1)
X.shape, y.shape

((2430, 4, 5), (2430,))

In [None]:
# random coefficients model (10 draws by default)
model = lgt.RandomCoefficientsModel()
model.fit(X, y, indiv_id=indiv_id)
# print('rho2: ' + str(lgt.Metrics.rho2(model, X, y)))
print('AIC: ' + str(lgt.Metrics.AIC(model)))
print('BIC: ' + str(lgt.Metrics.BIC(model)))
print('HQC: ' + str(lgt.Metrics.HQC(model)))
print('CAIC: ' + str(lgt.Metrics.CAIC(model)))

Current NLL: 1277.5907832082403
Current NLL: 1277.2231836447797
Current NLL: 1276.8411693064895
Current NLL: 1277.0819305846824
Current NLL: 1277.074483986604
Current NLL: 1276.8880155085885
Maximized LL: -1276.8411693064895
The means of the coefficients are: [  4.84263851   3.38793614  -0.73182192   0.66088996 -48.55973706]
The standard deviations of the coefficients are: [ 3.95411324  5.59486938  3.52532925  0.6074086  96.44920302]
The covariance matrix of the coefficients is: [[ 1.56350116e+01  9.87862695e+00  1.01959209e+01 -9.12836230e-01
  -3.69384188e+01]
 [ 9.87862695e+00  3.13025634e+01  1.58460809e+01 -2.55786627e+00
  -3.74650990e+02]
 [ 1.01959209e+01  1.58460809e+01  1.24279463e+01 -8.54530647e-01
  -1.89462996e+02]
 [-9.12836230e-01 -2.55786627e+00 -8.54530647e-01  3.68945203e-01
   2.67406001e+01]
 [-3.69384188e+01 -3.74650990e+02 -1.89462996e+02  2.67406001e+01
   9.30244876e+03]]
rho2: None
AIC: 2593.682338612979
BIC: 2709.5952693396707
HQC: 2635.824956273463
CAIC: 259



In [None]:
# random coefficients model (100 draws)
model = lgt.RandomCoefficientsModel()
model.fit(X, y, indiv_id=indiv_id, draws=100)
print('rho2: ' + str(lgt.Metrics.rho2(model, X, y)))
print('AIC: ' + str(lgt.Metrics.AIC(model)))
print('BIC: ' + str(lgt.Metrics.BIC(model)))
print('HQC: ' + str(lgt.Metrics.HQC(model)))
print('CAIC: ' + str(lgt.Metrics.CAIC(model)))

Current NLL: 1218.0075071719389
Current NLL: 1218.0443885910918
Current NLL: 1210.1797772702614
Current NLL: 1209.9179194295864
Current NLL: 1207.9338593499235
Current NLL: 1207.8201541286119
Maximized LL: -1207.8201541286119
The means of the coefficients are: [  2.81347776   1.65963836  -3.64035468   1.04148219 -56.61998503]
The standard deviations of the coefficients are: [ 4.28511325  4.46455369  4.36708524  0.8420437  32.8980775 ]
The covariance matrix of the coefficients is: [[ 1.83621955e+01  5.85715833e+00  7.14378488e+00  1.57810923e-02
  -6.74282006e+01]
 [ 5.85715833e+00  1.99322396e+01  1.62666124e+01 -2.44717146e-02
   1.38030041e+01]
 [ 7.14378488e+00  1.62666124e+01  1.90714335e+01 -2.85609920e-01
  -8.71937172e+00]
 [ 1.57810923e-02 -2.44717146e-02 -2.85609920e-01  7.09037586e-01
  -6.90536775e+00]
 [-6.74282006e+01  1.38030041e+01 -8.71937172e+00 -6.90536775e+00
   1.08228350e+03]]
rho2: None
AIC: 2455.6403082572238
BIC: 2571.5532389839154
HQC: 2497.782925917708
CAIC: 2

## apply factor analysis with random coefficient model

In [1]:
import numpy as np
import pandas as pd
import logit_boost as lgt
from scipy.optimize import minimize
df = pd.read_excel('./Data/OJ300_OnlyPurchases_Brand Map_Cts.xlsx')
df = df[:2066]

In [2]:
p=8 # number of alternatives
# construct X and y
cols =[ 'price1', 'price2', 'price3', 'price4', 'price5', 'price6', 'price7', 'price8']
features = df[cols].values
indiv_id = df['panid'].values
N = features.shape[0]
# add brand dummies
bdummy=np.vstack((np.eye(p-1),np.zeros((p-1,1)).T))
bdummy=np.tile(bdummy, (N,1)).reshape(N, p, -1)
# hstack bdummy and features
features = features.reshape(N, p, -1)  # (N, 8, 2)
features = np.concatenate([bdummy, features], axis=2)  # (N, 8, 5)
X = features
# construct y (choice)
brands = df[['br1', 'br2', 'br3', 'br4', 'br5', 'br6', 'br7', 'br8']].values
y = np.argmax(brands, axis=1)
X.shape, y.shape

((2066, 8, 8), (2066,))

In [25]:
# apply factor analysis with random coefficient model (assume price is a homogeneous attribute but seven brand intercepts are heterogeneous)
homo_covariates=np.array([0]*8)
homo_covariates[-1] = 1
model = lgt.RandomCoefficientsModel()
model.fit(X, y, indiv_id=indiv_id, method='factor-analytic', homo_covariates=homo_covariates, niteration=4, optimizer='L-BFGS-B')
print('rho2: ' + str(lgt.Metrics.rho2(model, X, y)))
print('AIC: ' + str(lgt.Metrics.AIC(model)))
print('BIC: ' + str(lgt.Metrics.BIC(model)))
print('HQC: ' + str(lgt.Metrics.HQC(model)))
print('CAIC: ' + str(lgt.Metrics.CAIC(model)))

Current NLL: 2890.349837685409
Current NLL: 2890.3498413978386
Current NLL: 2890.3499068431806
Current NLL: 2890.3498400603216
Current NLL: 2890.349836955289
current BIC:  5895.200218655772
Current NLL: 2258.1496114756706
Current NLL: 2258.1496003726234
Current NLL: 2258.1496688845637
Current NLL: 2258.1495496643647
Current NLL: 2258.1496157708725
current BIC:  4676.599861972
Current NLL: 2115.849916473504
Current NLL: 2112.5439162174384
Current NLL: 2115.8498839563454
Current NLL: 2112.5438047140124
Current NLL: 2112.543965299235
current BIC:  4431.188589969373
Current NLL: 2018.600279490454
Current NLL: 2011.7908143818327
Current NLL: 2011.7907819620125
Current NLL: 2011.790976894939
Current NLL: 2011.7910177148997
current BIC:  4275.482762363451
Current NLL: 1934.487444995362
Current NLL: 1934.143220995153
Current NLL: 1934.141185824583
Current NLL: 1934.1391696032183
Current NLL: 1934.1397072476211
current BIC:  4165.97975554394
Current NLL: 1991.1236030484451
Current NLL: 1989.480

In [4]:
# apply factor analysis with random coefficient model (assume price is a homogeneous attribute but seven brand intercepts are heterogeneous)
# specify 2 factors
homo_covariates=np.array([0]*8)
homo_covariates[-1] = 1
model = lgt.RandomCoefficientsModel()
model.fit(X, y, indiv_id=indiv_id, method='factor-analytic', homo_covariates=homo_covariates, niteration=4, optimizer='Powell', nfactors=2)
print('rho2: ' + str(lgt.Metrics.rho2(model, X, y)))
print('AIC: ' + str(lgt.Metrics.AIC(model)))
print('BIC: ' + str(lgt.Metrics.BIC(model)))
print('HQC: ' + str(lgt.Metrics.HQC(model)))
print('CAIC: ' + str(lgt.Metrics.CAIC(model)))

Using factor-analytic method with 2 factors.
Current NLL: 1947.7134843098036
Current NLL: 1947.3485401665957
Current NLL: 1946.734378562523
Current NLL: 1946.8930571245746
Current NLL: 1946.8715683195253
BIC:  4053.7695197683174
Maximized LL: -1946.734378562523
The optimal number of factors is: 2
The means of the coefficients are: [-0.39624581  0.57941132 -3.1183935   1.77456347 -0.4728011  -0.89518217
 -2.27108348 -0.95209345]
The standard deviations of the coefficients are: [11.82068391 11.09779041  5.93910658 11.14551463  3.40421758 12.76247597
  1.10570064  0.        ]
The covariance matrix of the coefficients is: [[139.72856809 124.67071769 -12.41107821 108.12159722  34.07361055
  142.69459093 -12.43661678   0.        ]
 [124.67071769 123.16095207   9.11296263 118.46243539  24.14778183
  141.62117221  -9.92202393   0.        ]
 [-12.41107821   9.11296263  35.27298697  27.62403337 -13.61271551
   11.53853579   3.09254221   0.        ]
 [108.12159722 118.46243539  27.62403337 124.22

# BLP (boost)

In [None]:
df = pd.read_excel('./Data/CoffeeData.xlsx')
D = 50 # 9m 1s if use packages
p=4 # alternatives
t=114 # time periods
nfeatures = 8 # number of product features (including price and intercept)
shares = df['Share'].values
outside = df['Outside'].values
col_instruments = ['Spot 1', 'Spot 2', 'Spot 3', 'Spot 4', 'Spot 5', 'Spot 6']
col_endo = ['Price', 'Brand 1', 'Brand 2', 'Brand 3', 'Brand 4']
col_exo = ['Feature', 'Display', 'F&D']
col_x = col_endo + col_exo
X = df[col_x].values
Z = df[col_instruments+col_exo].values
Z = sm.add_constant(Z)
blp_model = lgt.BLP(X, Z, shares, outside, nfeatures, D, t, p)
gamma_hat, beta_hat = blp_model.fit()
blp_model.summary()

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =           36     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  8.18192D+00    |proj g|=  1.09789D+02

At iterate    1    f=  7.56787D+00    |proj g|=  6.95488D+01

At iterate    2    f=  7.22202D+00    |proj g|=  6.32109D+01

At iterate    3    f=  5.38299D+00    |proj g|=  5.25692D+01

At iterate    4    f=  4.34839D+00    |proj g|=  4.99217D+01

At iterate    5    f=  3.80387D+00    |proj g|=  4.38012D+01

At iterate    6    f=  3.35262D+00    |proj g|=  2.98633D+01

At iterate    7    f=  2.52300D+00    |proj g|=  1.67628D+01

At iterate    8    f=  2.37227D+00    |proj g|=  4.84201D+01

At iterate    9    f=  1.95169D+00    |proj g|=  2.01080D+01

At iterate   10    f=  1.55874D+00    |proj g|=  4.55512D+01

At iterate   11    f=  1.34552D+00    |proj g|=  3.61234D+01

At iterate   12    f=  1.28845D+00    |proj g|=  2.98637D+01

At iterate   13    f=  1.1

In [None]:
import logit_boost as lgt
import pandas as pd
import numpy as np
import statsmodels.api as sm
df = pd.read_excel('./Data/CoffeeData.xlsx')
D = 50 # takes 2m 13.3s if use closed-form solution
p=4 # alternatives
t=114 # time periods
nfeatures = 8 # number of product features (including price and intercept)
shares = df['Share'].values
outside = df['Outside'].values
col_instruments = ['Spot 1', 'Spot 2', 'Spot 3', 'Spot 4', 'Spot 5', 'Spot 6']
col_endo = ['Price', 'Brand 1', 'Brand 2', 'Brand 3', 'Brand 4']
col_exo = ['Feature', 'Display', 'F&D']
col_x = col_endo + col_exo
X = df[col_x].values
Z = df[col_instruments+col_exo].values
Z = sm.add_constant(Z)
blp_model = lgt.BLP(X, Z, shares, outside, nfeatures, D, t, p)
gamma_hat, beta_hat = blp_model.fit()
blp_model.summary()

RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =           36     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  1.23629D+01    |proj g|=  2.49454D+02


 This problem is unconstrained.



At iterate    1    f=  6.97355D+00    |proj g|=  1.37503D+02

At iterate    2    f=  5.75338D+00    |proj g|=  2.21668D+01

At iterate    3    f=  5.65402D+00    |proj g|=  2.04579D+01

At iterate    4    f=  5.23374D+00    |proj g|=  3.49521D+01

At iterate    5    f=  5.00529D+00    |proj g|=  2.51370D+01

At iterate    6    f=  2.93898D+00    |proj g|=  4.08783D+01

At iterate    7    f=  2.91967D+00    |proj g|=  5.02575D+01

At iterate    8    f=  2.72712D+00    |proj g|=  4.17499D+01

At iterate    9    f=  2.21879D+00    |proj g|=  1.03883D+01

At iterate   10    f=  2.10720D+00    |proj g|=  8.67722D+00

At iterate   11    f=  1.98188D+00    |proj g|=  8.21322D+00

At iterate   12    f=  1.84421D+00    |proj g|=  2.08596D+01

At iterate   13    f=  1.65182D+00    |proj g|=  1.75022D+01

At iterate   14    f=  1.43560D+00    |proj g|=  5.83788D+00

At iterate   15    f=  1.33901D+00    |proj g|=  8.64141D+00

At iterate   16    f=  1.25156D+00    |proj g|=  5.98023D+00

At iter

In [None]:
blp_model.summary()

Estimated gamma: [ 0.33225212  0.08077807  0.17158019  0.143304    0.08283135  0.14259936
  0.02039275  0.26272177 -0.01254056  0.2466869   0.2122864   0.16963058
  0.11759996 -0.04291711  0.16178155  0.11396875  0.04081778  0.07972788
  0.0242688   0.07352828 -0.08388212  0.11424163 -0.0182946  -0.05132044
 -0.22481324  0.23413167  0.13109459 -0.08216028  0.24961746 -0.06350413
  0.22632767  0.03156396  0.1018018   0.08375035 -0.10240417  0.0996917 ]
Estimated beta: [ -2.14129092  12.58328093  11.86224346   5.69395541 -20.07684508
  -0.44203436  -0.49240553  -0.40204642]
Final loss: 1.7914017257157713e-13


In [None]:
df = pd.read_excel('./Data/CoffeeData.xlsx')
D = 100 # takes 45m 40s (using closed-form solution)
p=4 # alternatives
t=114 # time periods
nfeatures = 8 # number of product features (including price and intercept)
shares = df['Share'].values
outside = df['Outside'].values
col_instruments = ['Spot 1', 'Spot 2', 'Spot 3', 'Spot 4', 'Spot 5', 'Spot 6']
col_endo = ['Price', 'Brand 1', 'Brand 2', 'Brand 3', 'Brand 4']
col_exo = ['Feature', 'Display', 'F&D']
col_x = col_endo + col_exo
X = df[col_x].values
Z = df[col_instruments+col_exo].values
Z = sm.add_constant(Z)
blp_model = lgt.BLP(X, Z, shares, outside, nfeatures, D, t, p)
gamma_hat, beta_hat = blp_model.fit()
blp_model.summary()

 This problem is unconstrained.


RUNNING THE L-BFGS-B CODE

           * * *

Machine precision = 2.220D-16
 N =           36     M =           10

At X0         0 variables are exactly at the bounds

At iterate    0    f=  5.37128D+01    |proj g|=  7.75551D+02

At iterate    1    f=  1.36948D+01    |proj g|=  2.46320D+02

At iterate    2    f=  4.14529D+00    |proj g|=  5.50075D+01

At iterate    3    f=  2.93015D+00    |proj g|=  6.48847D+01

At iterate    4    f=  4.77447D-01    |proj g|=  2.24831D+01

At iterate    5    f=  1.82483D-01    |proj g|=  8.23394D+00

At iterate    6    f=  8.48835D-02    |proj g|=  3.84221D+00

At iterate    7    f=  2.19300D-02    |proj g|=  1.75376D+00

At iterate    8    f=  2.47573D-03    |proj g|=  4.81323D-01

At iterate    9    f=  6.16183D-05    |proj g|=  1.92202D-01

At iterate   10    f=  3.14372D-06    |proj g|=  5.95269D-02

At iterate   11    f=  3.57696D-08    |proj g|=  4.82707D-03

At iterate   12    f=  5.68113D-10    |proj g|=  5.18862D-04

At iterate   13    f=  5.8