In [1]:
%reset -f
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from linearmodels.iv import IV2SLS
import functions as fun
import statsmodels.formula.api as sm
import seaborn as sns
from matplotlib import pyplot as plt

In [2]:
df = pd.read_csv('dataset.csv')
Nobs=df['ID'].count()
df['Intercept']=np.ones((Nobs,1))
df.rename(columns={'Market share':'Market_share'}, inplace=True)
df2 = df[df['Market_share'] != 0]

df2.head(20)

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept
8,1,2021,0.010373,Aiways,U5,400,284621.7,201,34,SUV,C,CN,257,1.0
9,1,2022,0.005976,Aiways,U5,400,313681.829,201,34,SUV,C,CN,183,1.0
10,1,2023,0.00286,Aiways,U5,400,264524.0,201,34,SUV,C,CN,177,1.0
21,2,2023,4.8e-05,Aiways,U6,405,360638.0,214,34,SUV,C,CN,3,1.0
28,3,2019,0.04063,Audi,e-tron,375,979704.475,402,17,SUV,F,DE,222,1.0
29,3,2020,0.03468,Audi,e-tron,375,890101.41,402,17,SUV,F,DE,491,1.0
30,3,2021,0.010494,Audi,e-tron,375,800035.193,402,17,SUV,F,DE,260,1.0
31,3,2022,0.01757,Audi,e-tron,375,789723.656,402,17,SUV,F,DE,538,1.0
32,3,2023,0.001099,Audi,e-tron,375,673037.728,402,17,SUV,F,DE,68,1.0
41,4,2021,0.003391,Audi,e-tron GT,472,1278896.11,522,17,Sedan,F,DE,84,1.0


In [3]:
# Copy the dataframe
data = df2.copy().reset_index(drop=True)
data.head(20)

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept
0,1,2021,0.010373,Aiways,U5,400,284621.7,201,34,SUV,C,CN,257,1.0
1,1,2022,0.005976,Aiways,U5,400,313681.829,201,34,SUV,C,CN,183,1.0
2,1,2023,0.00286,Aiways,U5,400,264524.0,201,34,SUV,C,CN,177,1.0
3,2,2023,4.8e-05,Aiways,U6,405,360638.0,214,34,SUV,C,CN,3,1.0
4,3,2019,0.04063,Audi,e-tron,375,979704.475,402,17,SUV,F,DE,222,1.0
5,3,2020,0.03468,Audi,e-tron,375,890101.41,402,17,SUV,F,DE,491,1.0
6,3,2021,0.010494,Audi,e-tron,375,800035.193,402,17,SUV,F,DE,260,1.0
7,3,2022,0.01757,Audi,e-tron,375,789723.656,402,17,SUV,F,DE,538,1.0
8,3,2023,0.001099,Audi,e-tron,375,673037.728,402,17,SUV,F,DE,68,1.0
9,4,2021,0.003391,Audi,e-tron GT,472,1278896.11,522,17,Sedan,F,DE,84,1.0


In [4]:
#Scale for better intepretation

data['Price'] = data['Price']/10_000 #(Change in ms(%) for change in pris in 10.000)
data['HP'] = data['HP']/10           #(Change in ms(%) for change in HP in 10)
data['Range'] = data['Range']/10     #(Change in ms(%) for change in rækkevidde in 10)

In [5]:
# Creating dummy for china
data['China'] = (data['Country'] == 'CN').astype(int)

# IV

In [6]:
fun.BLP(data, 'Range')
fun.BLP(data, 'HP')
fun.BLP(data, 'Chargetime')

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept,China,Range_BLP,HP_BLP,Chargetime_BLP
0,1,2021,0.010373,Aiways,U5,40.0,28.462170,20.1,34,SUV,C,CN,257,1.0,1,2151.3,1358.3,1861
1,1,2022,0.005976,Aiways,U5,40.0,31.368183,20.1,34,SUV,C,CN,183,1.0,1,3218.0,2033.9,2664
2,1,2023,0.002860,Aiways,U5,40.0,26.452400,20.1,34,SUV,C,CN,177,1.0,1,4177.4,2640.3,3241
3,2,2023,0.000048,Aiways,U6,40.5,36.063800,21.4,34,SUV,C,CN,3,1.0,1,4176.9,2639.0,3241
4,3,2019,0.040630,Audi,e-tron,37.5,97.970447,40.2,17,SUV,F,DE,222,1.0,0,580.9,390.1,570
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329,189,2023,0.000065,Volvo,EX30,47.5,36.824500,26.8,28,SUV,B,SE,4,1.0,0,4169.9,2633.6,3247
330,190,2021,0.014652,Volvo,XC40,45.7,46.206060,40.2,28,SUV,C,SE,363,1.0,0,2145.6,1338.2,1867
331,190,2022,0.033310,Volvo,XC40,45.7,41.626340,40.2,28,SUV,C,SE,1020,1.0,0,3212.3,2013.8,2670
332,190,2023,0.031752,Volvo,XC40,45.7,43.926660,40.2,28,SUV,C,SE,1965,1.0,0,4171.7,2620.2,3247


In [7]:
formula = 'np.log(Market_share) ~ 1 + [Price ~ Range_BLP + HP_BLP + Chargetime_BLP] + Range + HP + Chargetime + China '
IV = IV2SLS.from_formula(formula, data).fit(cov_type='robust')
IV.summary#.tables[1]

0,1,2,3
Dep. Variable:,np.log(Market_share),R-squared:,-5.7877
Estimator:,IV-2SLS,Adj. R-squared:,-5.8912
No. Observations:,334,F-statistic:,28.868
Date:,"Sun, May 12 2024",P-value (F-stat),0.0000
Time:,22:43:25,Distribution:,chi2(5)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
Intercept,-2.8041,2.3134,-1.2121,0.2255,-7.3383,1.7300
Range,0.1995,0.0588,3.3923,0.0007,0.0842,0.3148
HP,0.3270,0.0955,3.4228,0.0006,0.1398,0.5143
Chargetime,-0.0848,0.0305,-2.7760,0.0055,-0.1447,-0.0249
China,-5.4735,1.1202,-4.8862,0.0000,-7.6690,-3.2779
Price,-0.3222,0.0761,-4.2313,0.0000,-0.4714,-0.1729


# Willingness to pay

In [8]:
alpha = IV.params[-1] # Price coefficient
beta = IV.params[:-1]
beta_alpha_ratio = [b / -alpha for b in beta[1:]] #Willingness to pay (excluding constant)

for i in range(len(IV.params.index[1:-1])): #[1:-1] to exclude constant and price
    print('W2P:', IV.params.index[1:-1][i], beta_alpha_ratio[i])

W2P: Range 0.6192729655546761
W2P: HP 1.015016128898077
W2P: Chargetime -0.26322153555449346
W2P: China -16.989110018549052


NOTE: English delimiter\
6,192 DKK for every 10 increase in Range\
10,150 DKK for every 10 increase in HP\
-2,632 DKK for every 10 increase in Chargetime\
-169,891 DKK for Chinese cars OR -16,989 DKK for Chinese cars???

# Logit

In [9]:
# IMPORTANT: The data must be sorted by year and ID before running the function
#             Because the CCPs returned by the function are sorted by year and ID
logit_data = data.sort_values(['Year', 'ID']).reset_index(drop=True)
X = logit_data[['Intercept', 'Range', 'HP', 'Chargetime', 'China']]
p_j = logit_data['Price']
logit_data['CCP'] = fun.ccp(alpha, beta, data, X)

In [10]:
logit_data[logit_data['Model'] == 'Model Y']

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept,China,Range_BLP,HP_BLP,Chargetime_BLP,CCP
145,175,2021,0.061514,Tesla,Model Y,50.6,50.3239,50.6,27,SUV,D,US,1524,1.0,0,2140.7,1327.8,1868,0.544861
222,175,2022,0.075602,Tesla,Model Y,50.6,52.11506,50.6,27,SUV,D,US,2315,1.0,0,3207.4,2003.4,2671,0.325551
319,175,2023,0.290103,Tesla,Model Y,50.6,40.55544,50.6,27,SUV,D,US,17953,1.0,0,4166.8,2609.8,3248,0.675284


In [11]:
probability_ratio = fun.probability_ratio(logit_data, 2013)
probability_ratio

Model,I3,Leaf,Zoe,Fortwo,Model S,up!
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I3,1.0,9.342696,1.535961,4839.801371,0.001193,39.579667
Leaf,0.107035,1.0,0.164402,518.030468,0.000128,4.236429
Zoe,0.651058,6.082638,1.0,3150.991658,0.000777,25.768661
Fortwo,0.000207,0.00193,0.000317,1.0,0.0,0.008178
Model S,838.157302,7830.649183,1287.377214,4056514.860533,1.0,33173.987048
up!,0.025265,0.236048,0.038807,122.279992,3e-05,1.0


In [12]:
logit_data['Model_year'] = logit_data['Model'] + '_' + logit_data['Year'].astype(str)
marginal_effects = fun.marginal_effects(logit_data, IV)
marginal_effects

Unnamed: 0_level_0,Intercept,Range,HP,Chargetime,China,Price
Model_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I3_2013,-0.003335,0.000237,0.000389,-0.000101,-0.006509,-0.000383
Leaf_2013,-0.000357,0.000025,0.000042,-0.000011,-0.000697,-0.000041
Zoe_2013,-0.002172,0.000155,0.000253,-0.000066,-0.004239,-0.00025
Fortwo_2013,-0.000001,0.0,0.0,-0.0,-0.000001,-0.0
Model S_2013,-0.005942,0.000423,0.000693,-0.00018,-0.011598,-0.000683
...,...,...,...,...,...,...
up!_2023,-0.000003,0.0,0.0,-0.0,-0.000007,-0.0
C40_2023,-0.01158,0.000824,0.00135,-0.00035,-0.022603,-0.00133
EX30_2023,-0.001299,0.000092,0.000151,-0.000039,-0.002536,-0.000149
XC40_2023,-0.007346,0.000523,0.000857,-0.000222,-0.014338,-0.000844


In [13]:
marginal_effects[IV.params.index].mean()

Intercept    -0.039760
Range         0.002829
HP            0.004637
Chargetime   -0.001202
China        -0.077608
Price        -0.004568
dtype: float64

In [14]:
elasticity = fun.elasticity(logit_data, IV)
elasticity

Unnamed: 0_level_0,Intercept,Range,HP,Chargetime,China,Price
Model_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I3_2013,-2.800804,5.878661,5.454609,-1.524643,-0.0,-8.044782
Leaf_2013,-2.803785,6.543231,4.806473,-3.646079,-0.0,-8.172212
Zoe_2013,-2.801969,7.276622,4.378573,-4.745307,-0.0,-5.574133
Fortwo_2013,-2.804142,2.533829,2.616101,-5.0882,-0.0,-6.780409
Model S_2013,-0.005954,0.025801,0.046871,-0.005402,-0.0,-0.049227
...,...,...,...,...,...,...
up!_2023,-2.804139,5.107556,2.648799,-4.070556,-0.0,-5.633317
C40_2023,-2.792514,9.258805,13.091395,-2.364647,-0.0,-13.817402
EX30_2023,-2.802843,9.472531,8.759877,-2.373393,-0.0,-11.858429
XC40_2023,-2.796777,9.093849,13.111381,-2.368257,-0.0,-14.114894


In [15]:
elasticity[IV.params.index].mean()

Intercept     -2.711790
Range          7.534031
HP             7.744819
Chargetime    -2.808678
China         -0.638151
Price        -14.531440
dtype: float64

# Analysis on subsample

### The subsample consists of the 2023 market with:
The 5 highest market share models: Model Y, Model 3, Enyag iV, ID.4 and Q4 e-tron\
The 5 highest market share chinese models: 4, Euniq6, Atto 3, Marvel R and Dolphin\
The highest ccp model: Ocean\
The highest ccp chinese model: Seal\
The highest ccp korean model (so we have models outside EU, US and china): Ioniq 6\
The highest priced model: I7\
Polestar 2 as a random interest


In [16]:
analysis_data = logit_data[logit_data['Year']==2023].copy()
analysis_data = analysis_data[analysis_data['Model'].isin(['I7', 'Model 3', 'Model Y', 'ID.4', 'Enyaq iV', 'Ocean' , '2', 'Ioniq 6', 'Q4 e-tron'
                                                           , '4', 'Euniq6', 'Atto 3', 'Marvel R', 'Dolphin', 'Seal'])]
analysis_data.reset_index(drop=True, inplace=True)
analysis_data

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept,China,Range_BLP,HP_BLP,Chargetime_BLP,CCP,Model_year
0,5,2023,0.037958,Audi,Q4 e-tron,49.6,65.855307,28.1,28,SUV,C,DE,2349,1.0,0,4167.8,2632.3,3247,9.346262e-08,Q4 e-tron_2023
1,13,2023,0.000679,BMW,I7,60.1,186.519444,44.9,28,Sedan,F,DE,42,1.0,0,4157.3,2615.5,3247,2.416563e-21,I7_2023
2,17,2023,0.003587,BYD,Atto 3,42.0,31.0,20.1,37,SUV,C,CN,222,1.0,1,4175.4,2640.3,3238,2.209555e-07,Atto 3_2023
3,18,2023,0.00328,BYD,Dolphin,42.7,23.323517,9.3,40,Hatchback,C,CN,203,1.0,1,4174.7,2651.1,3235,6.83502e-08,Dolphin_2023
4,20,2023,0.000517,BYD,Seal,54.0,32.062,52.2,38,Sedan,D,CN,32,1.0,1,4163.4,2608.2,3237,0.05721258,Seal_2023
5,49,2023,0.004686,Fisker,Ocean,70.0,60.23792,56.3,35,SUV,D,DK,290,1.0,0,4147.4,2604.1,3240,0.1868029,Ocean_2023
6,69,2023,0.003199,Hyundai,Ioniq 6,56.7,43.629166,32.0,16,Sedan,D,KR,198,1.0,0,4160.7,2628.4,3259,0.004915012,Ioniq 6_2023
7,100,2023,0.003991,Maxus,Euniq6,35.4,37.950676,17.4,45,MPV,M,CN,247,1.0,1,4182.0,2643.0,3230,1.323757e-09,Euniq6_2023
8,117,2023,0.01199,MG,4,42.7,26.589191,24.1,41,Hatchback,C,CN,742,1.0,1,4174.7,2636.3,3234,2.772515e-06,4_2023
9,119,2023,0.003361,MG,Marvel R,38.8,30.23565,17.7,38,SUV,C,CN,208,1.0,1,4178.6,2642.7,3237,6.256067e-08,Marvel R_2023


In [17]:
cross_elasticity = fun.cross_elasticity_1(analysis_data, IV)
cross_elasticity

  cross_elasticity_table.loc[(model_labels[i], model_labels[j], X.columns[k]), 'Cross_Elasticity'] = -coefficients[k] * X.iloc[j, k] * ccp.iloc[j]


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Cross_Elasticity
Model_year,Model_year,Unnamed: 2_level_1,Unnamed: 3_level_1
Q4 e-tron_2023,Q4 e-tron_2023,Intercept,0.0
Q4 e-tron_2023,Q4 e-tron_2023,Range,-0.000001
Q4 e-tron_2023,Q4 e-tron_2023,HP,-0.000001
Q4 e-tron_2023,Q4 e-tron_2023,Chargetime,0.0
Q4 e-tron_2023,Q4 e-tron_2023,China,0.0
...,...,...,...
ID.4_2023,ID.4_2023,Range,-0.002285
ID.4_2023,ID.4_2023,HP,-0.002096
ID.4_2023,ID.4_2023,Chargetime,0.000542
ID.4_2023,ID.4_2023,China,0.0


# Cost

In [18]:
logit_data['Cost']=fun.cost_original(logit_data, alpha)
logit_data

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,...,Country,Sales,Intercept,China,Range_BLP,HP_BLP,Chargetime_BLP,CCP,Model_year,Cost
0,10,2013,0.002237,BMW,I3,29.5,25.00000,16.7,18,Hatchback,...,DE,1,1.0,0,168.5,111.7,237,1.190560e-03,I3_2013,-2.456078e+02
1,132,2013,0.472036,Nissan,Leaf,32.8,25.36900,14.7,43,Hatchback,...,JP,211,1.0,0,165.2,113.7,212,1.274322e-04,Leaf_2013,-2.502838e+03
2,158,2013,0.205817,Renault,Zoe,36.5,17.31500,13.4,56,Hatchback,...,FR,92,1.0,0,161.5,115.0,199,7.751237e-04,Zoe_2013,-3.983281e+02
3,167,2013,0.002237,Smart,Fortwo,12.7,21.04575,8.0,60,Hatchback,...,DE,1,1.0,0,185.3,120.4,195,2.459936e-07,Fortwo_2013,-1.309667e+06
4,173,2013,0.250559,Tesla,Model S,60.9,71.95631,67.5,30,Liftback,...,US,112,1.0,0,137.1,60.9,225,9.978766e-01,Model S_2013,7.163345e+01
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329,187,2023,0.001277,Volkswagen,up!,25.6,17.48530,8.1,48,Hatchback,...,DE,79,1.0,0,4191.8,2652.3,3227,1.207733e-06,up!_2023,-2.667425e+05
330,188,2023,0.008354,Volvo,C40,46.6,43.06649,40.2,28,SUV,...,SE,517,1.0,0,4170.8,2620.2,3247,4.146832e-03,C40_2023,-3.462530e+01
331,189,2023,0.000065,Volvo,EX30,47.5,36.82450,26.8,28,SUV,...,SE,4,1.0,0,4169.9,2633.6,3247,4.634532e-04,EX30_2023,-6.583371e+02
332,190,2023,0.031752,Volvo,XC40,45.7,43.92666,40.2,28,SUV,...,SE,1965,1.0,0,4171.7,2620.2,3247,2.626512e-03,XC40_2023,-7.873594e+01


In [19]:
logit_data[logit_data['Cost']>logit_data['Price']]

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,...,Country,Sales,Intercept,China,Range_BLP,HP_BLP,Chargetime_BLP,CCP,Model_year,Cost


In [20]:
len(logit_data[logit_data['Cost']<0])

290