In [49]:
%reset -f
%load_ext autoreload
%autoreload 2
import pandas as pd
import numpy as np
from linearmodels.iv import IV2SLS


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [50]:
df = pd.read_csv('dataset.csv')
Nobs=df['ID'].count()
df['const']=np.ones((Nobs,1))
data = df[df['Market share'] != 0]
data.head(20)

Unnamed: 0,ID,Year,Market share,Manufacturer,Model,Range,Price,HP,Chargetime,Type,Segment,Country,Sales,const
8,1,2021,0.010373,Aiways,U5,400,284621.7,201,34,SUV,C,CH,257,1.0
9,1,2022,0.005976,Aiways,U5,400,313681.829,201,34,SUV,C,CH,183,1.0
10,1,2023,0.00286,Aiways,U5,400,264524.0,201,34,SUV,C,CH,177,1.0
21,2,2023,4.8e-05,Aiways,U6,405,360638.0,214,34,SUV,C,CH,3,1.0
28,3,2019,0.04063,Audi,e-tron,375,979704.475,402,17,SUV,F,DE,222,1.0
29,3,2020,0.03468,Audi,e-tron,375,890101.41,402,17,SUV,F,DE,491,1.0
30,3,2021,0.010494,Audi,e-tron,375,800035.193,402,17,SUV,F,DE,260,1.0
31,3,2022,0.01757,Audi,e-tron,375,789723.656,402,17,SUV,F,DE,538,1.0
32,3,2023,0.001099,Audi,e-tron,375,673037.728,402,17,SUV,F,DE,68,1.0
41,4,2021,0.003391,Audi,e-tron GT,472,1278896.11,522,17,Sedan,F,DE,84,1.0


In [51]:
# Copy the dataframe
df2 = data.copy()

In [52]:
def BLP(dataframe):
    # Group the DataFrame by the year
    grouped_data = dataframe.groupby('Year')

    # Define a function to sum the attributes of other models in the same year
    def sum_attributes(row):
        # Filter the DataFrame for the same year excluding the current model
        same_year_data = dataframe[(dataframe['Year'] == row['Year']) & (dataframe['Model'] != row['Model'])]
        
        # Sum the attributes of other models in the same year
        BLP_range = same_year_data['Range'].sum()
        BLP_hp = same_year_data['HP'].sum()
        BLP_chargetime = same_year_data['Chargetime'].sum()
        
        return pd.Series({'BLP_Range': BLP_range, 'BLP_HP': BLP_hp, 'BLP_Chargetime': BLP_chargetime})

    # Apply the function to each row in the DataFrame
    new_columns = dataframe.apply(sum_attributes, axis=1)

    # Add the new columns to the DataFrame
    dataframe['BLP_Range'] = new_columns['BLP_Range']
    dataframe['BLP_HP'] = new_columns['BLP_HP']
    dataframe['BLP_Chargetime'] = new_columns['BLP_Chargetime']

    return dataframe

# Call the function with your input DataFrame
df2 = BLP(df2)

# Print the updated DataFrame with summed attributes
print(df2)


       ID  Year  Market share Manufacturer   Model  Range       Price   HP  \
8       1  2021      0.010373       Aiways      U5    400  284621.700  201   
9       1  2022      0.005976       Aiways      U5    400  313681.829  201   
10      1  2023      0.002860       Aiways      U5    400  264524.000  201   
21      2  2023      0.000048       Aiways      U6    405  360638.000  214   
28      3  2019      0.040630         Audi  e-tron    375  979704.475  402   
...   ...   ...           ...          ...     ...    ...         ...  ...   
1154  189  2023      0.000065        Volvo    EX30    475  368245.000  268   
1163  190  2021      0.014652        Volvo    XC40    457  462060.600  402   
1164  190  2022      0.033310        Volvo    XC40    457  416263.400  402   
1165  190  2023      0.031752        Volvo    XC40    457  439266.600  402   
1176  192  2023      0.000129        Voyah    Free    500  504768.500  482   

      Chargetime Type Segment Country  Sales  const  BLP_Range 

In [53]:
# Creating dummies for each segment
df2 = pd.get_dummies(df2, columns=['Segment'], drop_first=True)

# Creating dummies for each year
df2 = pd.get_dummies(df2, columns=['Year'], drop_first=True)

# Creating dummy for china
df2['China'] = (df2['Country'] == 'CH').astype(int)

In [54]:
# Take the log of the market share
df2['log_market_share'] = np.log(df2['Market share'])

In [55]:
y = df2['log_market_share']
x = df2[['const', 'Range', 'HP', 'Chargetime']]
dummies = df2[['Segment_B', 'Segment_C', 'Segment_D', 'Segment_E', 'Segment_F', 'Segment_M', 'Segment_J',
                #'Year_2014', 'Year_2015', 'Year_2016', 'Year_2017', 'Year_2018', 'Year_2019', 'Year_2020', 'Year_2021', 'Year_2022', 'Year_2023',
                'China'
               ]]
X = pd.concat([x, dummies], axis=1)
k = df2['Price']
z=df2[['BLP_Range', 'BLP_HP', 'BLP_Chargetime']] 

# Including constant doesnt give full rank.
# Constant can only be in one of the stage to be full rank.

In [57]:
model = IV2SLS(dependent=y, exog=X, endog=k, instruments=z).fit(cov_type='robust')

In [58]:
model.first_stage

0,1
,Price
R-squared,0.8025
Partial R-squared,0.0450
Shea's R-squared,0.0450
Partial F-statistic,16.167
P-value (Partial F-stat),0.0010
Partial F-stat Distn,chi2(3)
==========================,============
const,-3.949e+04
,(-0.7174)


In [59]:
model.summary

0,1,2,3
Dep. Variable:,log_market_share,R-squared:,-2.4229
Estimator:,IV-2SLS,Adj. R-squared:,-2.5509
No. Observations:,334,F-statistic:,163.34
Date:,"Thu, Apr 25 2024",P-value (F-stat),0.0000
Time:,14:24:46,Distribution:,chi2(12)
Cov. Estimator:,robust,,
,,,

0,1,2,3,4,5,6
,Parameter,Std. Err.,T-stat,P-value,Lower CI,Upper CI
const,-3.6417,1.4738,-2.4711,0.0135,-6.5302,-0.7532
Range,0.0207,0.0050,4.1185,0.0000,0.0108,0.0305
HP,-0.0065,0.0049,-1.3372,0.1811,-0.0160,0.0030
Chargetime,-0.0118,0.0181,-0.6500,0.5157,-0.0473,0.0237
Segment_B,2.3744,0.7874,3.0154,0.0026,0.8311,3.9177
Segment_C,4.3521,1.0624,4.0965,0.0000,2.2698,6.4343
Segment_D,7.7019,2.1574,3.5700,0.0004,3.4735,11.930
Segment_E,13.642,3.6052,3.7839,0.0002,6.5756,20.708
Segment_F,24.562,5.8159,4.2232,0.0000,13.163,35.961
