**Installing and Importing the Required Libraries**

In [1]:
# Installing Libraries
%pip install pyblp


Collecting pyblp
  Downloading pyblp-1.1.0-py3-none-any.whl (2.0 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
Collecting pyhdfe>=0.1.0 (from pyblp)
  Downloading pyhdfe-0.2.0-py3-none-any.whl (19 kB)
Installing collected packages: pyhdfe, pyblp
Successfully installed pyblp-1.1.0 pyhdfe-0.2.0


In [2]:
# Importing libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import pyblp
import seaborn as sns

from google.colab import files


**Loading and Preprocessing the Data**

In [3]:
# Upload data
uploaded = files.upload()


Saving suvBook1.csv to suvBook1.csv


In [6]:
# reading data
data = pd.read_csv('suvBook1.csv')
subset_data = data.copy()
"""# Rename columns to remove spaces and special characters
subset_data.columns = subset_data.columns.str.replace(' ', '_').str.replace('.', '').str.replace('(', '').str.replace(')', '')"""


# Display the first few rows of the DataFrame
data.head()
subset_data.head()


Unnamed: 0,model,msrp_usd,horsepower,fuel_economy_combined,engine_type,transmission,drivetrain,dimensionsIn_L_W_H_ inch),seating_capacity,sales_q1_2024,market_share,air_suspension,panoramic_sunroof
0,BMW X7,82895,375HP,22,Turbocharged,8-speed automatic,AWD,203.6/78.7/72.2,7,6956,2.48%,yes,yes
1,BMW XM,159995,644HP,14,Twin-Turbocharged (hybrid),8-speed automatic,AWD,201.2/78.9/69.1,5,541,0.19%,no,yes
2,Cadillac Escalade,83890,420HP,16,Supercharged,10-speed automatic,RWD,211.9/81.1/76.7,7,9135,3.25%,yes,yes
3,Infiniti QX80,76145,400HP,16,Turbocarged,7-speed automatic,RWD,210.2/79.9/75.8,7,2472,0.88%,yes,no
4,Jeep Grand Wagoneer,93945,510HP,17,Twin-Turbocharged,8-speed automatic,4WD,214.7/83.6/75.6,7,3550,1.26%,yes,yes


In [24]:
"""# Creating the AWD dummy variable: 1 if AWD or 4WD, 0 otherwise
subset_data["awd_dummy"] = np.where(subset_data["drivetrain"].isin(["AWD", "4WD"]), 1, 0)"""

# Creating the Air Suspension dummy variable: 1 if Yes, 0 if No
subset_data["air_suspension_dummy"] = np.where(subset_data["air_suspension"] == "yes", 1, 0)

# Creating the Panoramic Sunroof dummy variable: 1 if Yes, 0 if No
subset_data["panoramic_sunroof_dummy"] = np.where(subset_data["panoramic_sunroof"] == "yes", 1, 0)



In [25]:
# Check for Missing Columns: Before creating product_data_blp, ensure all required columns are present.
required_columns = ["msrp_usd", "horsepower", "fuel_economy_combined", "air_suspension_dummy", "panoramic_sunroof_dummy"] #" "awd_dummy" removed to solve error"
missing_columns = [col for col in required_columns if col not in subset_data.columns]
print("Missing columns:", missing_columns)


Missing columns: []


In [26]:
# Prepare Data for BLP Model: Ensure that product_data_blp includes all required columns.
product_data_blp = subset_data[required_columns]


In [29]:
product_data_blp = subset_data[["msrp_usd", "horsepower", "fuel_economy_combined", "air_suspension_dummy", "panoramic_sunroof_dummy"]] # #" "awd_dummy" removed to solve error"

In [30]:
# Select a limited number of variables for the initial estimation
variables = ["msrp_usd", "horsepower", "fuel_economy_combined", "air_suspension_dummy", "panoramic_sunroof_dummy"] # "awd_dummy"

# Drop rows with missing values in the selected columns
subset_data = subset_data.dropna(subset=variables + ['sales_q1_2024', 'market_share'])


In [31]:
# Define the product characteristics (X) and the instruments (Z)
# Prepare the data for the BLP model
product_data_blp = subset_data[["msrp_usd", "horsepower", "fuel_economy_combined", "air_suspension_dummy", "panoramic_sunroof_dummy"]] # "awd_dummy" removed to solve error

print(product_data_blp.dtypes)

msrp_usd                    int64
horsepower                 object
fuel_economy_combined       int64
air_suspension_dummy        int64
panoramic_sunroof_dummy     int64
dtype: object


In [32]:
print(product_data_blp.head())

   msrp_usd horsepower  fuel_economy_combined  air_suspension_dummy  \
0     82895      375HP                     22                     1   
1    159995      644HP                     14                     0   
2     83890      420HP                     16                     1   
3     76145      400HP                     16                     1   
4     93945      510HP                     17                     1   

   panoramic_sunroof_dummy  
0                        1  
1                        1  
2                        1  
3                        0  
4                        1  


In [33]:
# Market data
market_data = {
    "product_ids": data.index,
    "market_ids": np.ones(subset_data.shape[0]),
    "shares": subset_data["market_share"]
}

In [34]:
# Combine the data into the format required by pyblp
# X1: Linear Characteristics
X1_formulation = pyblp.Formulation("1 + msrp_usd + horsepower + fuel_economy_combined + air_suspension_dummy + panoramic_sunroof_dummy") # + awd_dummy removed to solve error

# X2: Nonlinear Characteristics (using the same variables here for simplicity)
X2_formulation = pyblp.Formulation("1 + horsepower + fuel_economy_combined + air_suspension_dummy + panoramic_sunroof_dummy") # + awd_dummy removed to solve error
# X3: Log Cost Characteristics
X3_formulation = pyblp.Formulation("1 +log(horsepower) + log(fuel_economy_combined) + panoramic_sunroof_dummy") # + awd_dummy removed to solve error


In [35]:
product_formulations = (X1_formulation, X2_formulation, X3_formulation)

In [36]:
# Create the problem instance with integration method
mc_integration = pyblp.Integration('monte_carlo', size=200, specification_options={'seed': 0})
problem = pyblp.Problem(product_formulations, market_data, product_data_blp, integration=mc_integration)

# Define initial values
initial_sigma = np.diag([1.0, 1.0, 1.0, 1.0, 1.0, 1.0])
initial_beta = np.array([1.0, -1.0, 1.0, 1.0, 1.0, 1.0])
initial_gamma = np.array([1.0, 1.0, 1.0, 1.0, 1.0, 1.0])

# Set up optimization routine
tnc = pyblp.Optimization('tnc', {'maxfun': 500})

# Solve the problem
result = problem.solve(initial_sigma, initial_beta, initial_gamma, tnc)


Initializing the problem ...


PatsyError: Failed to load data for 'msrp_usd' because of the above exception.
    1 + msrp_usd + horsepower + fuel_economy_combined + air_suspension_dummy + panoramic_sunroof_dummy
    ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^

In [22]:
# Print Column Names: Ensure that 'all' are present in product_data_blp.
print(product_data_blp.columns)

Index(['msrp_usd', 'horsepower', 'fuel_economy_combined', 'awd_dummy',
       'air_suspension_dummy', 'panoramic_sunroof_dummy'],
      dtype='object')
