In [96]:
%reset -f
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from linearmodels.iv import IV2SLS
import functions as fun
import statsmodels.formula.api as sm
import seaborn as sns
from matplotlib import pyplot as plt

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [97]:
df = pd.read_csv('dataset.csv')
Nobs=df['ID'].count()
df['Intercept']=np.ones((Nobs,1))
df.rename(columns={'Market share':'Market_share'}, inplace=True)
df2 = df[df['Market_share'] != 0]

df2.head(20)

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept
8,1,2021,0.010373,Aiways,U5,400,284621.7,201,34,SUV,C,CN,257,1.0
9,1,2022,0.005976,Aiways,U5,400,313681.829,201,34,SUV,C,CN,183,1.0
10,1,2023,0.00286,Aiways,U5,400,264524.0,201,34,SUV,C,CN,177,1.0
21,2,2023,4.8e-05,Aiways,U6,405,360638.0,214,34,SUV,C,CN,3,1.0
28,3,2019,0.04063,Audi,e-tron,375,979704.475,402,17,SUV,F,DE,222,1.0
29,3,2020,0.03468,Audi,e-tron,375,890101.41,402,17,SUV,F,DE,491,1.0
30,3,2021,0.010494,Audi,e-tron,375,800035.193,402,17,SUV,F,DE,260,1.0
31,3,2022,0.01757,Audi,e-tron,375,789723.656,402,17,SUV,F,DE,538,1.0
32,3,2023,0.001099,Audi,e-tron,375,673037.728,402,17,SUV,F,DE,68,1.0
41,4,2021,0.003391,Audi,e-tron GT,472,1278896.11,522,17,Sedan,F,DE,84,1.0


In [98]:
# Copy the dataframe
data = df2.copy().reset_index(drop=True)
data.head(20)

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept
0,1,2021,0.010373,Aiways,U5,400,284621.7,201,34,SUV,C,CN,257,1.0
1,1,2022,0.005976,Aiways,U5,400,313681.829,201,34,SUV,C,CN,183,1.0
2,1,2023,0.00286,Aiways,U5,400,264524.0,201,34,SUV,C,CN,177,1.0
3,2,2023,4.8e-05,Aiways,U6,405,360638.0,214,34,SUV,C,CN,3,1.0
4,3,2019,0.04063,Audi,e-tron,375,979704.475,402,17,SUV,F,DE,222,1.0
5,3,2020,0.03468,Audi,e-tron,375,890101.41,402,17,SUV,F,DE,491,1.0
6,3,2021,0.010494,Audi,e-tron,375,800035.193,402,17,SUV,F,DE,260,1.0
7,3,2022,0.01757,Audi,e-tron,375,789723.656,402,17,SUV,F,DE,538,1.0
8,3,2023,0.001099,Audi,e-tron,375,673037.728,402,17,SUV,F,DE,68,1.0
9,4,2021,0.003391,Audi,e-tron GT,472,1278896.11,522,17,Sedan,F,DE,84,1.0


In [99]:
#Scale for better intepretation
data['Price'] = data['Price']/10_000 #(Change in ms(%) for change in pris in 10.000)
data['HP'] = data['HP']/10           #(Change in ms(%) for change in HP in 10)
data['Range'] = data['Range']/10     #(Change in ms(%) for change in rækkevidde in 10)

In [100]:
# Creating dummy for china
data['China'] = (data['Country'] == 'CN').astype(int)

# OLS

In [101]:
OLS = sm.ols('np.log(Market_share) ~ Price + Range + HP + Chargetime + China', data).fit(cov_type='HC3')
OLS.summary()#.tables[1]

0,1,2,3
Dep. Variable:,np.log(Market_share),R-squared:,0.207
Model:,OLS,Adj. R-squared:,0.195
Method:,Least Squares,F-statistic:,21.01
Date:,"Sun, 12 May 2024",Prob (F-statistic):,3.18e-18
Time:,14:35:51,Log-Likelihood:,-716.37
No. Observations:,334,AIC:,1445.0
Df Residuals:,328,BIC:,1468.0
Df Model:,5,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,-6.4240,0.700,-9.171,0.000,-7.797,-5.051
Price,-0.0322,0.005,-6.537,0.000,-0.042,-0.023
Range,0.0835,0.014,5.836,0.000,0.055,0.112
HP,0.0017,0.012,0.136,0.892,-0.022,0.026
Chargetime,-0.0132,0.012,-1.058,0.290,-0.038,0.011
China,-2.2275,0.362,-6.152,0.000,-2.937,-1.518

0,1,2,3
Omnibus:,16.332,Durbin-Watson:,0.928
Prob(Omnibus):,0.0,Jarque-Bera (JB):,17.595
Skew:,-0.56,Prob(JB):,0.000151
Kurtosis:,3.096,Cond. No.,450.0


# Willingness to pay

In [102]:
alpha = OLS.params[1] # Price coefficient
beta = list(OLS.params[:1]) + list(OLS.params[2:]) # Other coefficients
beta_alpha_ratio = [b / -alpha for b in beta[1:]] #Willingness to pay (excluding constant)

for i in range(len(OLS.params.index[2:])): #[2:] to exclude constant and price
    print('W2P:', OLS.params.index[2:][i], beta_alpha_ratio[i])

W2P: Range 2.592680339023965
W2P: HP 0.05154449225980682
W2P: Chargetime -0.40934384664303847
W2P: China -69.14201739405368


NOTE: English delimiter\
25,926 DKK for every 10 increase in Range\
515 DKK for every 10 increase in HP\
-4,093 DKK for every 10 increase in Chargetime\
-691,420 DKK for Chinese cars OR -69,000 DKK for Chinese cars???

# Logit

In [103]:
# IMPORTANT: The data must be sorted by year and ID before running the function
#             Because the CCPs returned by the function are sorted by year and ID
logit_data = data.sort_values(['Year', 'ID']).reset_index(drop=True)
X = logit_data[['Intercept', 'Range', 'HP', 'Chargetime', 'China']]
p_j = logit_data['Price']
logit_data['CCP'] = fun.ccp(alpha, beta, data, X)

In [104]:
probability_ratio = fun.probability_ratio(logit_data, 2013)
probability_ratio

Model,I3,Leaf,Zoe,Fortwo,Model S,up!
Model,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I3,1.0,1.071713,0.722054,6.322656,0.35485,1.632381
Leaf,0.933086,1.0,0.673739,5.899579,0.331105,1.523151
Zoe,1.384937,1.484255,1.0,8.756481,0.491445,2.260745
Fortwo,0.158161,0.169504,0.114201,1.0,0.056124,0.25818
Model S,2.818092,3.020186,2.034816,17.817827,1.0,4.600199
up!,0.612602,0.656534,0.442332,3.873273,0.217382,1.0


In [105]:
logit_data['Model_year'] = logit_data['Model'] + '_' + logit_data['Year'].astype(str)
marginal_effects = fun.marginal_effects(logit_data, OLS)
marginal_effects

Unnamed: 0_level_0,Intercept,Price,Range,HP,Chargetime,China
Model_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I3_2013,-0.795428,-0.003989,0.010342,0.000206,-0.001633,-0.275805
Leaf_2013,-0.75061,-0.003764,0.009759,0.000194,-0.001541,-0.260265
Zoe_2013,-1.029828,-0.005164,0.01339,0.000266,-0.002114,-0.357081
Fortwo_2013,-0.143736,-0.000721,0.001869,0.000037,-0.000295,-0.049839
Model S_2013,-1.551645,-0.007781,0.020174,0.000401,-0.003185,-0.538015
...,...,...,...,...,...,...
up!_2023,-0.031202,-0.000156,0.000406,0.000008,-0.000064,-0.010819
C40_2023,-0.107255,-0.000538,0.001395,0.000028,-0.00022,-0.037189
EX30_2023,-0.13758,-0.00069,0.001789,0.000036,-0.000282,-0.047704
XC40_2023,-0.096932,-0.000486,0.00126,0.000025,-0.000199,-0.03361


In [106]:
marginal_effects[OLS.params.index].mean()

Intercept    -0.184726
Price        -0.000926
Range         0.002402
HP            0.000048
Chargetime   -0.000379
China        -0.064051
dtype: float64

In [107]:
elasticity = fun.elasticity(logit_data, OLS)
elasticity

Unnamed: 0_level_0,Intercept,Price,Range,HP,Chargetime,China
Model_year,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
I3_2013,-5.493923,-0.688783,2.107238,0.023716,-0.203003,-0.0
Leaf_2013,-5.556159,-0.706868,2.369504,0.021112,-0.490446,-0.0
Zoe_2013,-5.135897,-0.445963,2.437352,0.017789,-0.590409,-0.0
Fortwo_2013,-6.276908,-0.662476,1.036474,0.01298,-0.773117,-0.0
Model S_2013,-3.802935,-1.372296,3.011239,0.066354,-0.234201,-0.0
...,...,...,...,...,...,...
up!_2023,-6.392657,-0.56055,2.127798,0.013385,-0.629899,-0.0
C40_2023,-6.314904,-1.363849,3.826147,0.06562,-0.362972,-0.0
EX30_2023,-6.283352,-1.160348,3.880556,0.043528,-0.361158,-0.0
XC40_2023,-6.325571,-1.393439,3.75859,0.065731,-0.363585,-0.0


In [108]:
elasticity[OLS.params.index].mean()

Intercept    -6.212443
Price        -1.466067
Range         3.181566
HP            0.040390
Chargetime   -0.435161
China        -0.259694
dtype: float64

In [109]:
print(OLS.params)

X = logit_data[OLS.params.index]
print(X[:5])

CCP = logit_data['CCP']
print(CCP[:5])


Intercept    -6.424012
Price        -0.032216
Range         0.083525
HP            0.001661
Chargetime   -0.013187
China        -2.227452
dtype: float64
   Intercept     Price  Range    HP  Chargetime  China
0        1.0  25.00000   29.5  16.7          18      0
1        1.0  25.36900   32.8  14.7          43      0
2        1.0  17.31500   36.5  13.4          56      0
3        1.0  21.04575   12.7   8.0          60      0
4        1.0  71.95631   60.9  67.5          30      0
0    0.144783
1    0.135095
2    0.200516
3    0.022899
4    0.408012
Name: CCP, dtype: float64


In [110]:
#cross_elasticity = fun.cross_elasticity_1(logit_data, OLS)
#cross_elasticity[:50]

# Cost

In [111]:
logit_data['Cost']=fun.cost_original(logit_data, alpha)
logit_data

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept,China,CCP,Model_year,Cost
0,10,2013,0.002237,BMW,I3,29.5,25.00000,16.7,18,Hatchback,B,DE,1,1.0,0,0.144783,I3_2013,24.777491
1,132,2013,0.472036,Nissan,Leaf,32.8,25.36900,14.7,43,Hatchback,C,JP,211,1.0,0,0.135095,Leaf_2013,25.130534
2,158,2013,0.205817,Renault,Zoe,36.5,17.31500,13.4,56,Hatchback,B,FR,92,1.0,0,0.200516,Zoe_2013,17.154336
3,167,2013,0.002237,Smart,Fortwo,12.7,21.04575,8.0,60,Hatchback,A,DE,1,1.0,0,0.022899,Fortwo_2013,19.638900
4,173,2013,0.250559,Tesla,Model S,60.9,71.95631,67.5,30,Liftback,F,US,112,1.0,0,0.408012,Model S_2013,71.877353
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
329,187,2023,0.001277,Volkswagen,up!,25.6,17.48530,8.1,48,Hatchback,A,DE,79,1.0,0,0.004881,up!_2023,10.885023
330,188,2023,0.008354,Volvo,C40,46.6,43.06649,40.2,28,SUV,C,SE,517,1.0,0,0.016984,C40_2023,41.169712
331,189,2023,0.000065,Volvo,EX30,47.5,36.82450,26.8,28,SUV,B,SE,4,1.0,0,0.021896,EX30_2023,35.353201
332,190,2023,0.031752,Volvo,XC40,45.7,43.92666,40.2,28,SUV,C,SE,1965,1.0,0,0.015324,XC40_2023,41.824344


In [112]:
logit_data[logit_data['Cost']>logit_data['Price']]

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept,China,CCP,Model_year,Cost


In [113]:
logit_data[logit_data['Cost']<0]

Unnamed: 0,ID,Year,Market_share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,Intercept,China,CCP,Model_year,Cost
119,98,2021,0.002947,Maxus,Euniq,26.0,35.666533,17.4,45,MPV,M,CN,73,1.0,1,0.000583,Euniq_2021,-19.633416
183,98,2022,0.000294,Maxus,Euniq,26.0,35.666533,17.4,45,MPV,M,CN,9,1.0,1,0.00047,Euniq_2022,-32.88344
184,99,2022,0.001274,Maxus,Euniq5,28.9,37.209879,17.4,45,MPV,M,CN,39,1.0,1,0.00057,Euniq5_2022,-19.336489
251,21,2023,8.1e-05,BYD,Tang,40.0,56.01545,50.9,45,SUV,E,CN,5,1.0,1,0.000566,Tang_2023,-0.950097
276,99,2023,0.000145,Maxus,Euniq5,28.9,37.209879,17.4,45,MPV,M,CN,9,1.0,1,0.000388,Euniq5_2023,-45.833803
277,100,2023,0.003991,Maxus,Euniq6,35.4,37.950676,17.4,45,MPV,M,CN,247,1.0,1,0.000652,Euniq6_2023,-11.467629
289,118,2023,0.000986,MG,5,38.0,21.96918,15.4,42,Stationcar,C,CN,61,1.0,1,0.001405,5_2023,-0.952017
291,120,2023,0.002327,MG,ZS,33.1,27.85648,15.4,37,SUV,B,CN,144,1.0,1,0.000825,ZS_2023,-11.202007
313,167,2023,4.8e-05,Smart,Fortwo,12.7,21.04575,8.0,60,Hatchback,A,DE,3,1.0,0,0.001265,Fortwo_2023,-4.429224


In [116]:
OLS = sm.ols('Cost ~  Range + HP + Chargetime + China', logit_data).fit(cov_type='HC3')
OLS.summary()#.tables[1]

0,1,2,3
Dep. Variable:,Cost,R-squared:,0.701
Model:,OLS,Adj. R-squared:,0.698
Method:,Least Squares,F-statistic:,127.6
Date:,"Sun, 12 May 2024",Prob (F-statistic):,1.2400000000000001e-65
Time:,14:36:27,Log-Likelihood:,-1399.9
No. Observations:,334,AIC:,2810.0
Df Residuals:,329,BIC:,2829.0
Df Model:,4,,
Covariance Type:,HC3,,

0,1,2,3,4,5,6
,coef,std err,z,P>|z|,[0.025,0.975]
Intercept,4.0952,5.351,0.765,0.444,-6.392,14.583
Range,0.6368,0.169,3.770,0.000,0.306,0.968
HP,0.9544,0.095,9.994,0.000,0.767,1.142
Chargetime,-0.2687,0.067,-3.995,0.000,-0.400,-0.137
China,-35.2226,2.526,-13.946,0.000,-40.173,-30.272

0,1,2,3
Omnibus:,108.218,Durbin-Watson:,1.493
Prob(Omnibus):,0.0,Jarque-Bera (JB):,375.838
Skew:,1.408,Prob(JB):,2.4399999999999997e-82
Kurtosis:,7.368,Cond. No.,336.0


In [118]:
logit_data['Cost'].describe()

count    334.000000
mean      40.097286
std       29.320439
min      -45.833803
25%       24.484142
50%       31.815440
75%       46.549984
max      145.663128
Name: Cost, dtype: float64