In [1]:
%reset -f
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from tabulate import tabulate
from matplotlib import pyplot as plt
import scipy.stats as st
import statsmodels.api as sm
import seaborn as sns


In [2]:
df = pd.read_csv('dataset.csv')
Nobs=df['ID'].count()
df['const']=np.ones((Nobs,1))
data = df[df['Market share'] != 0]
data.head(20)

Unnamed: 0,ID,Year,Sales,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,const
8,1,2021,257,0.010319,Aiways,U5,400,330825.8,201,34,SUV,C,CH,1.0
9,1,2022,183,0.005938,Aiways,U5,400,330825.8,201,34,SUV,C,CH,1.0
10,1,2023,177,0.002822,Aiways,U5,400,330825.8,201,34,SUV,C,CH,1.0
21,2,2023,3,4.8e-05,Aiways,U6,405,375710.0,214,34,SUV,C,CH,1.0
28,3,2019,222,0.040217,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
29,3,2020,491,0.034512,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
30,3,2021,260,0.01044,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
31,3,2022,538,0.017456,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
32,3,2023,68,0.001084,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
41,4,2021,84,0.003373,Audi,e-tron GT,472,1081416.0,522,17,Sedan,F,DE,1.0


# Creating dummies

In [3]:
# Copy the dataframe
df2 = data.copy()

In [4]:
# Creating dummies for each segment
df2 = pd.get_dummies(df2, columns=['Segment'], drop_first=True)

# Creating dummies for each year
df2 = pd.get_dummies(df2, columns=['Year'], drop_first=True)

# Creating dummy for china
df2['China'] = (df2['Country'] == 'CH').astype(int)

In [5]:
df2.head(20)

Unnamed: 0,ID,Sales,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,...,Year_2015,Year_2016,Year_2017,Year_2018,Year_2019,Year_2020,Year_2021,Year_2022,Year_2023,China
8,1,257,0.010319,Aiways,U5,400,330825.8,201,34,SUV,...,0,0,0,0,0,0,1,0,0,1
9,1,183,0.005938,Aiways,U5,400,330825.8,201,34,SUV,...,0,0,0,0,0,0,0,1,0,1
10,1,177,0.002822,Aiways,U5,400,330825.8,201,34,SUV,...,0,0,0,0,0,0,0,0,1,1
21,2,3,4.8e-05,Aiways,U6,405,375710.0,214,34,SUV,...,0,0,0,0,0,0,0,0,1,1
28,3,222,0.040217,Audi,e-tron,375,714953.0,402,17,SUV,...,0,0,0,0,1,0,0,0,0,0
29,3,491,0.034512,Audi,e-tron,375,714953.0,402,17,SUV,...,0,0,0,0,0,1,0,0,0,0
30,3,260,0.01044,Audi,e-tron,375,714953.0,402,17,SUV,...,0,0,0,0,0,0,1,0,0,0
31,3,538,0.017456,Audi,e-tron,375,714953.0,402,17,SUV,...,0,0,0,0,0,0,0,1,0,0
32,3,68,0.001084,Audi,e-tron,375,714953.0,402,17,SUV,...,0,0,0,0,0,0,0,0,1,0
41,4,84,0.003373,Audi,e-tron GT,472,1081416.0,522,17,Sedan,...,0,0,0,0,0,0,1,0,0,0


# Creating log market share

In [6]:
# Take the log of the market share
df2['log_market_share'] = np.log(df2['Market share'])

# Independent OLS

In [7]:
y = df2['log_market_share']
x = df2[['const', 'Range', 'Price', 'HP', 'Chargetime']]
dummies = df2[['Segment_B', 'Segment_C', 'Segment_D', 'Segment_E', 'Segment_F', 'Segment_M', 'Segment_J',
                'Year_2014', 'Year_2015', 'Year_2016', 'Year_2017', 'Year_2018', 'Year_2019', 'Year_2020', 'Year_2021', 'Year_2022', 'Year_2023',
                'China']]
X = pd.concat([x, dummies], axis=1)

In [8]:
OLS_model = sm.OLS(y, X)
OLS_result = OLS_model.fit()
# Print summary of the regression results
print(OLS_result.summary())

                            OLS Regression Results                            
Dep. Variable:       log_market_share   R-squared:                       0.511
Model:                            OLS   Adj. R-squared:                  0.477
Method:                 Least Squares   F-statistic:                     15.41
Date:                Tue, 12 Mar 2024   Prob (F-statistic):           2.41e-38
Time:                        22:04:58   Log-Likelihood:                -664.02
No. Observations:                 348   AIC:                             1374.
Df Residuals:                     325   BIC:                             1463.
Df Model:                          22                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const         -3.9672      0.855     -4.640      0.0