In [2]:
%reset -f
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from linearmodels.iv import IV2SLS
import functions as fun
import statsmodels.api as sm
import seaborn as sns

In [3]:
df = pd.read_csv('dataset.csv')
Nobs=df['ID'].count()
df['const']=np.ones((Nobs,1))
data = df[df['Market share'] != 0]

data.head(20)

Unnamed: 0,ID,Year,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,const
8,1,2021,0.010373,Aiways,U5,400,284621.7,201,34,SUV,C,CN,257,1.0
9,1,2022,0.005976,Aiways,U5,400,313681.829,201,34,SUV,C,CN,183,1.0
10,1,2023,0.00286,Aiways,U5,400,264524.0,201,34,SUV,C,CN,177,1.0
21,2,2023,4.8e-05,Aiways,U6,405,360638.0,214,34,SUV,C,CN,3,1.0
28,3,2019,0.04063,Audi,e-tron,375,979704.475,402,17,SUV,F,DE,222,1.0
29,3,2020,0.03468,Audi,e-tron,375,890101.41,402,17,SUV,F,DE,491,1.0
30,3,2021,0.010494,Audi,e-tron,375,800035.193,402,17,SUV,F,DE,260,1.0
31,3,2022,0.01757,Audi,e-tron,375,789723.656,402,17,SUV,F,DE,538,1.0
32,3,2023,0.001099,Audi,e-tron,375,673037.728,402,17,SUV,F,DE,68,1.0
41,4,2021,0.003391,Audi,e-tron GT,472,1278896.11,522,17,Sedan,F,DE,84,1.0


In [4]:
# Copy the dataframe
df2 = data.copy()

In [5]:
#Scale for better intepretation

df2['Price'] = df2['Price']/10_000 #(Change in ms(%) for change in pris in 10.000)
df2['HP'] = df2['HP']/10           #(Change in ms(%) for change in HP in 10)
df2['Range'] = df2['Range']/10     #(Change in ms(%) for change in rækkevidde in 10)

In [6]:
# Creating dummy for china
df2['China'] = (df2['Country'] == 'CN').astype(int)

In [7]:
# Take the log of the market share
df2['log_market_share'] = np.log(df2['Market share'])

# OLS

In [8]:
y = df2['log_market_share']
x = df2[['const', 'Range', 'Price', 'HP', 'Chargetime']]
dummies = df2[['China']]
X = pd.concat([x, dummies], axis=1)
print(X.shape)

(334, 6)


In [9]:
OLS_model = sm.OLS(y, X)
OLS_result = OLS_model.fit(cov_type='HC3')
# Print summary of the regression results
print(OLS_result.summary())

                            OLS Regression Results                            
Dep. Variable:       log_market_share   R-squared:                       0.207
Model:                            OLS   Adj. R-squared:                  0.195
Method:                 Least Squares   F-statistic:                     21.01
Date:                Sat, 04 May 2024   Prob (F-statistic):           3.18e-18
Time:                        16:47:58   Log-Likelihood:                -716.37
No. Observations:                 334   AIC:                             1445.
Df Residuals:                     328   BIC:                             1468.
Df Model:                           5                                         
Covariance Type:                  HC3                                         
                 coef    std err          z      P>|z|      [0.025      0.975]
------------------------------------------------------------------------------
const         -6.4240      0.700     -9.171      0.0

# BLP instrument (sum)

In [10]:
fun.BLP(df2, 'HP')
fun.BLP(df2, 'Chargetime')
fun.BLP(df2, 'Range')

Unnamed: 0,ID,Year,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,const,China,log_market_share,HP_sum,Chargetime_sum,Range_sum
8,1,2021,0.010373,Aiways,U5,40.0,28.462170,20.1,34,SUV,C,CN,257,1.0,1,-4.568514,1358.3,1861,2151.3
9,1,2022,0.005976,Aiways,U5,40.0,31.368183,20.1,34,SUV,C,CN,183,1.0,1,-5.119955,2033.9,2664,3218.0
10,1,2023,0.002860,Aiways,U5,40.0,26.452400,20.1,34,SUV,C,CN,177,1.0,1,-5.856883,2640.3,3241,4177.4
21,2,2023,0.000048,Aiways,U6,40.5,36.063800,21.4,34,SUV,C,CN,3,1.0,1,-9.934421,2639.0,3241,4176.9
28,3,2019,0.040630,Audi,e-tron,37.5,97.970447,40.2,17,SUV,F,DE,222,1.0,0,-3.203259,390.1,570,580.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1154,189,2023,0.000065,Volvo,EX30,47.5,36.824500,26.8,28,SUV,B,SE,4,1.0,0,-9.646739,2633.6,3247,4169.9
1163,190,2021,0.014652,Volvo,XC40,45.7,46.206060,40.2,28,SUV,C,SE,363,1.0,0,-4.223188,1338.2,1867,2145.6
1164,190,2022,0.033310,Volvo,XC40,45.7,41.626340,40.2,28,SUV,C,SE,1020,1.0,0,-3.401883,2013.8,2670,3212.3
1165,190,2023,0.031752,Volvo,XC40,45.7,43.926660,40.2,28,SUV,C,SE,1965,1.0,0,-3.449786,2620.2,3247,4171.7


In [11]:
y = df2['log_market_share']
x = df2[['const', 'Range', 'HP', 'Chargetime']]
dummies = df2[[ 'China']]
X = pd.concat([x, dummies], axis=1)
k = df2['Price']
z=df2[['Range_sum', 'HP_sum', 'Chargetime_sum']] 

In [12]:
BLP_model = IV2SLS(dependent=y, exog=X, endog=k, instruments=z).fit(cov_type='robust')

In [13]:
BLP_model.first_stage

0,1
,Price
R-squared,0.5822
Partial R-squared,0.0261
Shea's R-squared,0.0261
Partial F-statistic,17.145
P-value (Partial F-stat),0.0007
Partial F-stat Distn,chi2(3)
==========================,===========
const,-1.3281
,(-0.1735)


In [14]:
BLP_model.summary

0,1,2,3
Dep. Variable:,log_market_share,R-squared:,-5.7877
Estimator:,IV-2SLS,Adj. R-squared:,-5.8912
No. Observations:,334,F-statistic:,28.868
Date:,"Sat, May 04 2024",P-value (F-stat),0.0000
Time:,16:47:58,Distribution:,chi2(5)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-2.8041,2.3134,-1.2121,0.2255,-7.3383,1.7300
Range,0.1995,0.0588,3.3923,0.0007,0.0842,0.3148
HP,0.3270,0.0955,3.4228,0.0006,0.1398,0.5143
Chargetime,-0.0848,0.0305,-2.7760,0.0055,-0.1447,-0.0249
China,-5.4735,1.1202,-4.8862,0.0000,-7.6690,-3.2779
Price,-0.3222,0.0761,-4.2313,0.0000,-0.4714,-0.1729


# Gandhi Houde instrument (Local Diff)

In [15]:
fun.create_instrument_localsum(df2, 'HP', 0.5)
fun.create_instrument_localsum(df2, 'Chargetime', 0.5)
fun.create_instrument_localsum(df2, 'Range', 0.5)

Unnamed: 0,ID,Year,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,...,Sales,const,China,log_market_share,HP_sum,Chargetime_sum,Range_sum,HP_instrument_localsum,Chargetime_instrument_localsum,Range_instrument_localsum
8,1,2021,0.010373,Aiways,U5,40.0,28.462170,20.1,34,SUV,...,257,1.0,1,-4.568514,1358.3,1861,2151.3,740.5,1317,1499.9
9,1,2022,0.005976,Aiways,U5,40.0,31.368183,20.1,34,SUV,...,183,1.0,1,-5.119955,2033.9,2664,3218.0,1060.2,1796,2153.5
10,1,2023,0.002860,Aiways,U5,40.0,26.452400,20.1,34,SUV,...,177,1.0,1,-5.856883,2640.3,3241,4177.4,1501.0,2099,2792.3
21,2,2023,0.000048,Aiways,U6,40.5,36.063800,21.4,34,SUV,...,3,1.0,1,-9.934421,2639.0,3241,4176.9,1456.2,2099,2746.8
28,3,2019,0.040630,Audi,e-tron,37.5,97.970447,40.2,17,SUV,...,222,1.0,0,-3.203259,390.1,570,580.9,310.5,533,470.9
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1154,189,2023,0.000065,Volvo,EX30,47.5,36.824500,26.8,28,SUV,...,4,1.0,0,-9.646739,2633.6,3247,4169.9,1777.6,1813,2189.4
1163,190,2021,0.014652,Volvo,XC40,45.7,46.206060,40.2,28,SUV,...,363,1.0,0,-4.223188,1338.2,1867,2145.6,1026.5,1138,1169.9
1164,190,2022,0.033310,Volvo,XC40,45.7,41.626340,40.2,28,SUV,...,1020,1.0,0,-3.401883,2013.8,2670,3212.3,1621.7,1483,1702.8
1165,190,2023,0.031752,Volvo,XC40,45.7,43.926660,40.2,28,SUV,...,1965,1.0,0,-3.449786,2620.2,3247,4171.7,1978.6,1813,2206.7


In [16]:
z=df2[['Range_instrument_localsum', 'HP_instrument_localsum', 'Chargetime_instrument_localsum']] 
GH_model = IV2SLS(dependent=y, exog=X, endog=k, instruments=z).fit(cov_type='robust')
GH_model.first_stage

0,1
,Price
R-squared,0.6105
Partial R-squared,0.0919
Shea's R-squared,0.0919
Partial F-statistic,22.829
P-value (Partial F-stat),4.383e-05
Partial F-stat Distn,chi2(3)
================================,===========
const,1.7422
,(0.2392)


In [17]:
GH_model.summary

0,1,2,3
Dep. Variable:,log_market_share,R-squared:,-1.1894
Estimator:,IV-2SLS,Adj. R-squared:,-1.2228
No. Observations:,334,F-statistic:,73.809
Date:,"Sat, May 04 2024",P-value (F-stat),0.0000
Time:,16:47:59,Distribution:,chi2(5)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-4.6767,1.4901,-3.1386,0.0017,-7.5972,-1.7562
Range,0.1395,0.0231,6.0487,0.0000,0.0943,0.1847
HP,0.1587,0.0585,2.7127,0.0067,0.0440,0.2734
Chargetime,-0.0478,0.0201,-2.3749,0.0176,-0.0872,-0.0083
China,-3.7943,0.6345,-5.9798,0.0000,-5.0380,-2.5507
Price,-0.1722,0.0448,-3.8463,0.0001,-0.2599,-0.0844


# Logit

In [18]:
GH_params = GH_model.params
GH_params = np.array(GH_params)
GH_params = np.array([GH_params[0], GH_params[-1], *GH_params[1:-1]])
GH_params

array([-4.67668433, -0.17218025,  0.13951338,  0.15870948, -0.04775668,
       -3.79431677])

In [19]:
logit_df = df.copy()
#Scale for better intepretation
logit_df['Price'] = logit_df['Price']/10_000 #(Change in ms(%) for change in pris in 10.000)
logit_df['HP'] = logit_df['HP']/10           #(Change in ms(%) for change in HP in 10)
logit_df['Range'] = logit_df['Range']/10     #(Change in ms(%) for change in rækkevidde in 10)
logit_df = fun.straf_0ms(logit_df)

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df['Price'][i] = 10_000_000


In [20]:
logit_df['China'] = (logit_df['Country'] == 'CN').astype(int)
x = logit_df[['const', 'Price', 'Range', 'HP', 'Chargetime']]
dummies = logit_df[['China']]
X = pd.concat([x, dummies], axis=1)

alpha = GH_params[1]
alpha = np.array(alpha)
beta = GH_params
beta = np.array(beta)
p_j = logit_df['Price']
p_j = np.array(p_j)
columns = X.columns
X = np.array(X)

In [21]:
logit_df['logit_market_share'] = fun.ccp(alpha, beta, X, p_j)
logit_df['logit_market_share'] = pd.Series(logit_df['logit_market_share'], dtype=float)
logit_df2 = logit_df[logit_df['Market share'] != 0]
ccp = logit_df2['logit_market_share']
ccp = np.array(ccp)

share_j: 1177
 choice probability sum: 1.0 
 ccp:[array([[0.]]), array([[0.]]), array([[0.]]), array([[0.]]), array([[0.]]), array([[0.]]), array([[0.]]), array([[0.]]), array([[3.85198732e-05]]), array([[2.33551211e-05]]), array([[5.44464141e-05]])]


In [22]:
probability_ratio = fun.probability_ratio(ccp, logit_df2['Model'])
probability_ratio

Model,U5,U5,U5,U6,e-tron,e-tron,e-tron,e-tron,e-tron,e-tron GT,...,up!,up!,C40,C40,C40,EX30,XC40,XC40,XC40,Free
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
U5,1.0,1.649312,0.707482,2.80884,91.881064,19.642728,4.165952,3.488251,0.467806,610.446272,...,0.501248,0.332134,0.007977,0.005089,0.003423,0.008645,0.006664,0.003029,0.004501,0.345941
U5,0.606314,1.0,0.428956,1.703038,55.708734,11.909652,2.525873,2.114974,0.283637,370.121846,...,0.303913,0.201377,0.004837,0.003086,0.002076,0.005242,0.004041,0.001837,0.002729,0.209749
U5,1.413463,2.331241,1.0,3.970191,129.87048,27.764268,5.888419,4.930514,0.661227,862.843196,...,0.708495,0.469459,0.011275,0.007194,0.004839,0.01222,0.00942,0.004281,0.006362,0.488975
U6,0.356019,0.587186,0.251877,1.0,32.711394,6.993182,1.483158,1.241883,0.166548,217.3304,...,0.178454,0.118246,0.00284,0.001812,0.001219,0.003078,0.002373,0.001078,0.001602,0.123162
e-tron,0.010884,0.017951,0.0077,0.03057,1.0,0.213784,0.045341,0.037965,0.005091,6.643875,...,0.005455,0.003615,0.000087,0.000055,0.000037,0.000094,0.000073,0.000033,0.000049,0.003765
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
EX30,115.669003,190.774235,81.83377,324.895698,10627.791067,2272.054721,481.871518,403.482542,54.110705,70609.711935,...,57.978843,38.417601,0.922693,0.588675,0.395978,1.0,0.770848,0.350358,0.520622,40.014648
XC40,150.054171,247.486092,106.160667,421.478124,13787.136814,2947.473195,625.118475,523.426644,70.196307,91600.009141,...,75.214335,49.838082,1.196984,0.763672,0.513691,1.297272,1.0,0.454509,0.675388,51.909887
XC40,330.145573,544.512938,233.572143,927.326017,30334.126376,6484.959538,1375.370614,1151.630696,154.444225,201536.134067,...,165.484769,109.652549,2.633575,1.680213,1.13021,2.854227,2.200176,1.0,1.485973,114.210882
XC40,222.174703,366.435325,157.184665,624.053143,20413.648,4364.117155,925.569151,775.001177,103.934757,135625.719007,...,111.364599,73.791759,1.77229,1.130716,0.760586,1.92078,1.48063,0.67296,1.0,76.859334


In [23]:
marginal_effects = fun.marginal_effects(ccp, logit_df2['Model'], columns, beta)
marginal_effects

Unnamed: 0_level_0,const,Price,Range,HP,Chargetime,China
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
U5,-0.000007,0.000005,0.000006,-0.000002,-0.000146,
U5,-0.000004,0.000003,0.000004,-0.000001,-0.000089,
U5,-0.000009,0.000008,0.000009,-0.000003,-0.000207,
U6,-0.000002,0.000002,0.000002,-0.000001,-0.000052,
e-tron,-0.0,0.0,0.0,-0.0,-0.000002,
...,...,...,...,...,...,...
EX30,-0.000767,0.000622,0.000707,-0.000213,-0.016906,
XC40,-0.000995,0.000806,0.000917,-0.000276,-0.021931,
XC40,-0.00219,0.001774,0.002018,-0.000607,-0.048253,
XC40,-0.001473,0.001194,0.001358,-0.000409,-0.032472,


In [24]:
elasticity = fun.elasticity(ccp, logit_df2['Model'], columns, beta, X)
elasticity

elasticity shape: 
(334, 6)


Unnamed: 0_level_0,const,Price,Range,HP,Chargetime,China
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
U5,[[-1721736.1429716381]],[[5.580320380211402]],[[3.189937737536149]],[[-1.623664424000624]],[[-3.7941706091348752]],
U5,[[-1721762.2536793128]],[[5.580405007646781]],[[3.189986114014801]],[[-1.6236890474176962]],[[-3.7942281490082452]],
U5,[[-1721708.7206142985]],[[5.580231501587253]],[[3.1898869309053612]],[[-1.6236385636466921]],[[-3.794110178793864]],
U6,[[-1721778.8541258806]],[[5.580458811367335]],[[3.190016870406356]],[[-1.6237047022871285]],[[-3.7942647312259816]],
e-tron,[[-1721801.744742373]],[[5.580533002162626]],[[3.1900592808777177]],[[-1.6237262890324573]],[[-3.7943151750205635]],
...,...,...,...,...,...,...
EX30,[[-1714130.8803965242]],[[5.555670957639882]],[[3.17584712661922]],[[-1.6164923643741802]],[[-3.777410977378846]],
XC40,[[-1711850.331875376]],[[5.548279470017011]],[[3.1716218521370703]],[[-1.6143417180535398]],[[-3.7723853582054505]],
XC40,[[-1699906.0194795495]],[[5.509566749625952]],[[3.14949208909765]],[[-1.6030777649876649]],[[-3.7460638110720135]],
XC40,[[-1707067.0376449118]],[[5.532776331288253]],[[3.1627596284811497]],[[-1.6098308847860638]],[[-3.7618444664095985]],


In [25]:
cross_elasticity = fun.cross_elasticity(ccp, beta, X, np.array(logit_df2['Model']))
cross_elasticity

Change in : U5 
 [[[ 6.63236128e+01 -2.14961514e-04 -1.22880731e-04  6.25457571e-05
    1.46156601e-04]]

 [[ 6.63236128e+01 -2.14961514e-04 -1.22880731e-04  6.25457571e-05
    1.46156601e-04]]

 [[ 6.63236128e+01 -2.14961514e-04 -1.22880731e-04  6.25457571e-05
    1.46156601e-04]]

 ...

 [[ 6.63236128e+01 -2.14961514e-04 -1.22880731e-04  6.25457571e-05
    1.46156601e-04]]

 [[ 6.63236128e+01 -2.14961514e-04 -1.22880731e-04  6.25457571e-05
    1.46156601e-04]]

 [[ 6.63236128e+01 -2.14961514e-04 -1.22880731e-04  6.25457571e-05
    1.46156601e-04]]]
Change in : U5 
 [[[ 4.02129051e+01 -1.30334079e-04 -7.45042520e-05  3.79223400e-05
    8.86167275e-05]]

 [[ 4.02129051e+01 -1.30334079e-04 -7.45042520e-05  3.79223400e-05
    8.86167275e-05]]

 [[ 4.02129051e+01 -1.30334079e-04 -7.45042520e-05  3.79223400e-05
    8.86167275e-05]]

 ...

 [[ 4.02129051e+01 -1.30334079e-04 -7.45042520e-05  3.79223400e-05
    8.86167275e-05]]

 [[ 4.02129051e+01 -1.30334079e-04 -7.45042520e-05  3.79223400e-

# Costs

In [26]:
p_j = np.array(logit_df2['Price'])
logit_df2['Cost'] = fun.cost(p_j, ccp, alpha)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  logit_df2['Cost'] = fun.cost(p_j, ccp, alpha)


In [27]:
logit_df2

Unnamed: 0,ID,Year,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,const,China,logit_market_share,Cost
8,1,2021,0.010373,Aiways,U5,40.0,28.462170,20.1,34,SUV,C,CN,257,1.0,1,3.851987e-05,4498.369056
9,1,2022,0.005976,Aiways,U5,40.0,31.368183,20.1,34,SUV,C,CN,183,1.0,1,2.335512e-05,7403.637677
10,1,2023,0.002860,Aiways,U5,40.0,26.452400,20.1,34,SUV,C,CN,177,1.0,1,5.444641e-05,3188.832320
21,2,2023,0.000048,Aiways,U6,40.5,36.063800,21.4,34,SUV,C,CN,3,1.0,1,1.371380e-05,12591.316072
28,3,2019,0.040630,Audi,e-tron,37.5,97.970447,40.2,17,SUV,F,DE,222,1.0,0,4.192363e-07,410797.769781
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1154,189,2023,0.000065,Volvo,EX30,47.5,36.824500,26.8,28,SUV,B,SE,4,1.0,0,4.455555e-03,75.468447
1163,190,2021,0.014652,Volvo,XC40,45.7,46.206060,40.2,28,SUV,C,SE,363,1.0,0,5.780068e-03,75.994681
1164,190,2022,0.033310,Volvo,XC40,45.7,41.626340,40.2,28,SUV,C,SE,1020,1.0,0,1.271717e-02,55.165540
1165,190,2023,0.031752,Volvo,XC40,45.7,43.926660,40.2,28,SUV,C,SE,1965,1.0,0,8.558141e-03,64.045543


In [28]:
logit_df2.iloc[4]

ID                                3
Year                           2019
Market share                0.04063
Manufacturer                   Audi
Model                        e-tron
Range                          37.5
Price                     97.970447
HP                             40.2
Chargetime                       17
Type                            SUV
Segment                           F
Country                          DE
Sales                           222
const                           1.0
China                             0
logit_market_share              0.0
Cost                  410797.769781
Name: 28, dtype: object