In [52]:
%reset -f
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from tabulate import tabulate
from matplotlib import pyplot as plt
import scipy.stats as st
import statsmodels.api as sm
import seaborn as sns
import functions as fun
import OLS_Estimation as OLS


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [53]:
df = pd.read_csv('Dataset.csv')
#df = df[df['Market share'] != 0]
Nobs=df['ID'].count()
df['const']=np.ones((Nobs,1))
print(f'shape: {df.shape}')
df.head(5)

shape: (1232, 14)


Unnamed: 0,ID,Year,Sales,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,const
0,1,2013,0,0.0,Aiways,U5,400,330825.789474,201,34,SUV,C,CH,1.0
1,1,2014,0,0.0,Aiways,U5,400,330825.789474,201,34,SUV,C,CH,1.0
2,1,2015,0,0.0,Aiways,U5,400,330825.789474,201,34,SUV,C,CH,1.0
3,1,2016,0,0.0,Aiways,U5,400,330825.789474,201,34,SUV,C,CH,1.0
4,1,2017,0,0.0,Aiways,U5,400,330825.789474,201,34,SUV,C,CH,1.0


# Creating dummies

In [54]:
df2 = df.copy()
df2 = pd.get_dummies(df2, columns=['Segment'], drop_first=True)
df2 = pd.get_dummies(df2, columns=['Year'], drop_first=True)
df2['China'] = (df2['Country'] == 'CH').astype(int)

# Creating log market share

In [55]:
df2['Market share'] = df2['Market share'].replace(0, 1)
df2['log_market_share'] = np.log(df2['Market share'])
df2['log_market_share'].head(5)

0    0.0
1    0.0
2    0.0
3    0.0
4    0.0
Name: log_market_share, dtype: float64

# Independent OLS

In [56]:
data = df2
#data = df2_2014
df = df#[df['Year'] == 2014 & (df['Market share'] != 1)]
print(f'shape: {data.shape}')
#data format
N = data['ID'].unique().size
T = df['Year'].unique().size
print(f'Data has N={N} and T={T} data is = {data.shape[0]}, {N*T}')
assert data.shape[0] == N*T, f'Error: data is not a balanced panel'



variables = ['Price', 'HP', 'Chargetime', 'Range','Segment_B', 'Segment_C', 'Segment_D', 'Segment_E', 'Segment_F', 'Segment_M', 'Segment_J', 'Year_2014', 'Year_2015', 'Year_2016', 'Year_2017', 'Year_2018', 'Year_2019', 'Year_2020', 'Year_2021', 'Year_2022', 'Year_2023', 'China']
#Declare variables
y = data["log_market_share"].values.reshape((N*T,1))

X = np.hstack([np.ones((N*T, 1))] + [data[var].values.reshape((N*T, 1)) for var in variables])
xnames = ['Cons'] + variables

print(f'Variables: {len(variables)}')





# Estimate with OLS
results = fun.est_OLS(y, X, xnames)
# fun.logit(y, X, xnames)


shape: (1232, 31)
Data has N=112 and T=11 data is = 1232, 1232
Variables: 22
                            OLS Regression Results                            
Dep. Variable:           Market share   R-squared:                       0.535
Model:                            OLS   Adj. R-squared:                  0.527
Method:                 Least Squares   F-statistic:                     63.25
Date:                Wed, 13 Mar 2024   Prob (F-statistic):          3.28e-183
Time:                        19:55:07   Log-Likelihood:                -2516.0
No. Observations:                1232   AIC:                             5078.
Df Residuals:                    1209   BIC:                             5196.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------

In [57]:
#X skal være [i:i+1,2:] 2: for at fjerne consten og pris
#beta skal være [2:] for at fjerne konstant og pris-koefficienten og reshapes for at kunne lave matrixmultiplikaiton. 

#coefficients = OLS.get_coefficients_OLS(pd.read_csv('Dataset.csv'))
coefficients = results.params
print(f'coefficients: {coefficients[:3]}, shape: {coefficients.shape}')

coefficients_labels = xnames[1:] #Fjerne konstanten

model_labels = data['Model'].unique()
j = 0
for i in model_labels:
    j = j
    #print(f'Car {j}: {i}')
    j = j+1  
print(f'models: {model_labels[:3]}')

repeated_models = np.repeat(model_labels, T)

alpha = coefficients[1]
beta = coefficients
p_j = data['Price'].values.reshape((N*T,1)) 

coefficients: [-1.47693704e+00 -3.99841294e-07 -1.99868872e-03], shape: (23,)
models: ['U5' 'U6' 'e-tron']


In [59]:
ccp_list = fun.ccp(alpha, beta, X, p_j)
print(f'ccp_list: {len(ccp_list)}')
#print(f'ccp_list: {ccp_list.shape}')

choice probability sum: 1.0 
 3 highest probability: [[[0.00302612]]

 [[0.00304901]]

 [[0.00317268]]]
ccp_list: 1232


In [67]:
m = 3

In [68]:
fun.probability_ratio(ccp_list[:m], repeated_models[:m], repeated_models[:m])

Unnamed: 0,U5,U5.1,U5.2
U5,[[1.0]],[[0.9610204037923953]],[[1.007564234271441]]
U5,[[1.0405606333161947]],[[1.0]],[[1.0484316777202374]]
U5,[[0.9924925538103179]],[[0.9538055948237374]],[[1.0]]


In [69]:
fun.marginal_effects(ccp_list[:m], repeated_models[:m], coefficients_labels[:m], coefficients[:m])

Unnamed: 0,Price,HP,Chargetime
U5,[[-9.039467443489727e-10]],[[-4.518146991267711e-06]],
U5,[[-9.406113967838793e-10]],[[-4.70140589464919e-06]],
U5,[[-8.971604128074345e-10]],[[-4.4842272458536955e-06]],


In [70]:
fun.cross_marginal_effects(ccp_list[:m], coefficients[:m])

(3, 3, 2)


array([[[2.05290292e-12, 1.02618564e-08],
        [2.13616997e-12, 1.06780838e-08],
        [2.03749087e-12, 1.01848160e-08]],

       [[2.13616997e-12, 1.06780838e-08],
        [2.22281437e-12, 1.11111936e-08],
        [2.12013279e-12, 1.05979186e-08]],

       [[2.03749087e-12, 1.01848160e-08],
        [2.12013279e-12, 1.05979186e-08],
        [2.02219451e-12, 1.01083541e-08]]])

In [71]:
fun.elasticity(ccp_list[:m], repeated_models[:m], coefficients_labels[:m], coefficients[:m], X[:m])

elasticity shape: 
(3, 3)


Unnamed: 0,Price,HP,Chargetime
U5,[[-0.1319780835437069]],[[-0.40082613905363085]],
U5,[[-0.13196592638423418]],[[-0.4007892169589734]],
U5,[[-0.1319803337359031]],[[-0.400832973035652]],


In [74]:
fun.cross_elasticity(ccp_list[:m], coefficients[:m], X[:m], model_labels[:m])

3
Change in : U5 
 [[[0.00029973 0.00091029]]

 [[0.00029973 0.00091029]]

 [[0.00029973 0.00091029]]]
Change in : U6 
 [[[0.00031189 0.00094722]]

 [[0.00031189 0.00094722]]

 [[0.00031189 0.00094722]]]
Change in : e-tron 
 [[[0.00029748 0.00090346]]

 [[0.00029748 0.00090346]]

 [[0.00029748 0.00090346]]]
