In [5]:
%reset -f
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from linearmodels.iv import IV2SLS


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [6]:
df = pd.read_csv('dataset.csv')
Nobs=df['ID'].count()
df['const']=np.ones((Nobs,1))
data = df[df['Market share'] != 0]
data.head(20)

Unnamed: 0,ID,Year,Sales,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,const
8,1,2021,257,0.010319,Aiways,U5,400,330825.8,201,34,SUV,C,CH,1.0
9,1,2022,183,0.005938,Aiways,U5,400,330825.8,201,34,SUV,C,CH,1.0
10,1,2023,177,0.002822,Aiways,U5,400,330825.8,201,34,SUV,C,CH,1.0
21,2,2023,3,4.8e-05,Aiways,U6,405,375710.0,214,34,SUV,C,CH,1.0
28,3,2019,222,0.040217,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
29,3,2020,491,0.034512,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
30,3,2021,260,0.01044,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
31,3,2022,538,0.017456,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
32,3,2023,68,0.001084,Audi,e-tron,375,714953.0,402,17,SUV,F,DE,1.0
41,4,2021,84,0.003373,Audi,e-tron GT,472,1081416.0,522,17,Sedan,F,DE,1.0


In [7]:
# Copy the dataframe
df2 = data.copy()

In [8]:
def BLP(dataframe):
    # Group the DataFrame by the year
    grouped_data = dataframe.groupby('Year')

    # Define a function to sum the attributes of other models in the same year
    def sum_attributes(row):
        # Filter the DataFrame for the same year excluding the current model
        same_year_data = dataframe[(dataframe['Year'] == row['Year']) & (dataframe['Model'] != row['Model'])]
        
        # Sum the attributes of other models in the same year
        BLP_range = same_year_data['Range'].sum()
        BLP_hp = same_year_data['HP'].sum()
        BLP_chargetime = same_year_data['Chargetime'].sum()
        
        return pd.Series({'BLP_Range': BLP_range, 'BLP_HP': BLP_hp, 'BLP_Chargetime': BLP_chargetime})

    # Apply the function to each row in the DataFrame
    new_columns = dataframe.apply(sum_attributes, axis=1)

    # Add the new columns to the DataFrame
    dataframe['BLP_Range'] = new_columns['BLP_Range']
    dataframe['BLP_HP'] = new_columns['BLP_HP']
    dataframe['BLP_Chargetime'] = new_columns['BLP_Chargetime']

    return dataframe

# Call the function with your input DataFrame
df2 = BLP(df2)

# Print the updated DataFrame with summed attributes
print(df2)


       ID  Year  Sales  Market share Manufacturer   Model  Range  \
8       1  2021    257      0.010319       Aiways      U5    400   
9       1  2022    183      0.005938       Aiways      U5    400   
10      1  2023    177      0.002822       Aiways      U5    400   
21      2  2023      3      0.000048       Aiways      U6    405   
28      3  2019    222      0.040217         Audi  e-tron    375   
...   ...   ...    ...           ...          ...     ...    ...   
1209  189  2023      4      0.000064        Volvo    EX30    475   
1218  190  2021    363      0.014575        Volvo    XC40    457   
1219  190  2022   1020      0.033094        Volvo    XC40    457   
1220  190  2023   1965      0.031327        Volvo    XC40    457   
1231  192  2023      8      0.000128        Voyah    Free    500   

              Price   HP  Chargetime Type Segment Country  const  BLP_Range  \
8     330825.789474  201          34  SUV       C      CH    1.0      22223   
9     330825.789474  201 

In [9]:
# Creating dummies for each segment
df2 = pd.get_dummies(df2, columns=['Segment'], drop_first=True)

# Creating dummies for each year
df2 = pd.get_dummies(df2, columns=['Year'], drop_first=True)

# Creating dummy for china
df2['China'] = (df2['Country'] == 'CH').astype(int)

In [10]:
# Take the log of the market share
df2['log_market_share'] = np.log(df2['Market share'])

In [11]:
y = df2['log_market_share']
x = df2[['const', 'Range', 'HP', 'Chargetime']]
dummies = df2[['Segment_B', 'Segment_C', 'Segment_D', 'Segment_E', 'Segment_F', 'Segment_M', 'Segment_J',
                #'Year_2014', 'Year_2015', 'Year_2016', 'Year_2017', 'Year_2018', 'Year_2019', 'Year_2020', 'Year_2021', 'Year_2022', 'Year_2023',
                'China'
               ]]
X = pd.concat([x, dummies], axis=1)
k = df2['Price']
z=df2[['BLP_Range', 'BLP_HP', 'BLP_Chargetime']] 

# Including constant doesnt give full rank.
# Constant can only be in one of the stage to be full rank.

In [12]:
model = IV2SLS(dependent=y, exog=X, endog=k, instruments=z).fit(cov_type='unadjusted')

In [13]:
model.summary

0,1,2,3
Dep. Variable:,log_market_share,R-squared:,-8.4666
Estimator:,IV-2SLS,Adj. R-squared:,-8.8057
No. Observations:,348,F-statistic:,16.544
Date:,"Sat, Mar 09 2024",P-value (F-stat),0.1676
Time:,16:43:00,Distribution:,chi2(12)
Cov. Estimator:,unadjusted,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-6.2556,2.4806,-2.5218,0.0117,-11.118,-1.3937
Range,0.0400,0.0154,2.5956,0.0094,0.0098,0.0702
HP,-0.0088,0.0065,-1.3443,0.1788,-0.0215,0.0040
Chargetime,0.0521,0.0413,1.2628,0.2066,-0.0288,0.1330
Segment_B,4.5954,2.6559,1.7303,0.0836,-0.6100,9.8009
Segment_C,7.7536,3.6256,2.1386,0.0325,0.6476,14.860
Segment_D,14.533,6.7332,2.1585,0.0309,1.3366,27.730
Segment_E,23.462,11.010,2.1309,0.0331,1.8820,45.042
Segment_F,45.212,19.723,2.2923,0.0219,6.5554,83.868
