<a href="https://colab.research.google.com/github/DaheePark0415/Econ512-Fall2024/blob/main/IO_HW1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Probome 1(a). Multinomial Logit

In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from scipy.optimize import minimize
from sklearn.linear_model import LinearRegression
from statsmodels.sandbox.regression.gmm import IV2SLS
from scipy.optimize import fsolve

# Step 1: Load the dataset
data = pd.read_csv('product_data.csv')

In [None]:
# Number of consumers in each market
M_t = 100000

# Compute the share of the outside option (assuming it’s 1 minus the total share of soft drinks)
total_share = data.groupby('t')['market_share'].sum()
data = data.merge(total_share.rename('total_share'), on='t')
data['outside_share'] = 1 - data['total_share']

# Log of market share relative to the outside option's share
data['log_share'] = np.log(data['market_share']) - np.log(data['outside_share'])

# Define independent variables (price, sugar, caffeine, Diet, Regular)
# Create dummy variables based on the 'nest' variable

data['diet_d'] = np.where(data['nest'] == 'Diet', 1, 0)  # 1 if Diet, 0 otherwise
data['regular_d'] = np.where(data['nest'] == 'Regular', 1, 0)  # 1 if Regular, 0 otherwise
X = data[['price', 'sugar', 'caffeine', 'diet_d', 'regular_d']]

# The dependent variable is the log difference between market share and outside share
y = data['log_share']

# Perform OLS regression
ols_model = sm.OLS(y, X).fit()

# Print the summary of the OLS regression
print(ols_model.summary())


                            OLS Regression Results                            
Dep. Variable:              log_share   R-squared:                       0.885
Model:                            OLS   Adj. R-squared:                  0.884
Method:                 Least Squares   F-statistic:                     1912.
Date:                Sun, 22 Sep 2024   Prob (F-statistic):               0.00
Time:                        22:00:46   Log-Likelihood:                -1305.2
No. Observations:                1000   AIC:                             2620.
Df Residuals:                     995   BIC:                             2645.
Df Model:                           4                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
price         -1.6261      0.021    -77.352      0.0

# 1(b).

In [None]:
!pip install linearmodels


Collecting linearmodels
  Downloading linearmodels-6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (7.9 kB)
Collecting mypy-extensions>=0.4 (from linearmodels)
  Downloading mypy_extensions-1.0.0-py3-none-any.whl.metadata (1.1 kB)
Collecting pyhdfe>=0.1 (from linearmodels)
  Downloading pyhdfe-0.2.0-py3-none-any.whl.metadata (4.0 kB)
Collecting formulaic>=1.0.0 (from linearmodels)
  Downloading formulaic-1.0.2-py3-none-any.whl.metadata (6.8 kB)
Collecting setuptools-scm<9.0.0,>=8.0.0 (from setuptools-scm[toml]<9.0.0,>=8.0.0->linearmodels)
  Downloading setuptools_scm-8.1.0-py3-none-any.whl.metadata (6.6 kB)
Collecting interface-meta>=1.2.0 (from formulaic>=1.0.0->linearmodels)
  Downloading interface_meta-1.3.0-py3-none-any.whl.metadata (6.7 kB)
Downloading linearmodels-6.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (1.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m1.5/1.5 MB[0m [31m8.2 MB/s[0m eta [36m0:00:00[0m
[?25hD

In [None]:
import numpy as np
import pandas as pd
from linearmodels.iv import IV2SLS

# Step 1: Load the dataset
df = pd.read_csv('product_data.csv')

# Step 2: Compute the share of the outside option
total_share = df.groupby('t')['market_share'].sum()
df = df.merge(total_share.rename('total_share'), on='t')
df['outside_share'] = 1 - df['total_share']

# Step 3: Compute log of market share relative to the outside option's share
df['log_share'] = np.log(df['market_share']) - np.log(df['outside_share'])

# Step 4: Create dummy variables for Diet and Regular
df['Diet'] = (df['nest'] == 'Diet').astype(int)
df['Regular'] = (df['nest'] == 'Regular').astype(int)

# Step 5: Create the instrument variables (caffeine extract price * caffeine, corn syrup price * sugar)
df['instr_caffeine_price'] = df['caffeine_extract_price'] * df['caffeine']
df['instr_sugar_price'] = df['corn_syrup_price'] * df['sugar']

# Step 6: Exogenous variables (excluding price)
exog = df[['sugar', 'caffeine', 'Diet', 'Regular']]

# Step 7: Endogenous variable (price)
endog = df['price']

# Step 8: Instruments
instruments = df[['instr_caffeine_price', 'instr_sugar_price']]

# Step 9: Dependent variable (log of market share)
dependent = df['log_share']

# Step 10: Run the IV2SLS regression
iv_model = IV2SLS(dependent, exog, endog=endog, instruments=instruments)
iv_results = iv_model.fit()

# Step 11: Print the regression summary
print(iv_results.summary)


                          IV-2SLS Estimation Summary                          
Dep. Variable:              log_share   R-squared:                      0.8459
Estimator:                    IV-2SLS   Adj. R-squared:                 0.8453
No. Observations:                1000   F-statistic:                    6241.0
Date:                Sun, Sep 22 2024   P-value (F-stat)                0.0000
Time:                        22:20:54   Distribution:                  chi2(5)
Cov. Estimator:                robust                                         
                                                                              
                             Parameter Estimates                              
            Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------
sugar          1.3486     0.1272     10.599     0.0000      1.0992      1.5979
caffeine       1.3318     0.0846     15.742     0.00

Problem 1(b) involves addressing endogeneity in the price variable. In the given context, the price of soft drinks is correlated with unobserved quality (𝜉𝑗𝑡​). This means that standard OLS estimators would be biased. To solve this issue, we can use instrumental variables (IV) for the price.

In this problem, the suggested instruments are:

Caffeine Extract Price and Corn Syrup Price.

These instruments are used because they affect the price but are not directly related to the unobserved demand shock (𝜉𝑗𝑡).

Approach:
Two-Stage Least Squares (2SLS): First, we will regress the endogenous variable (price) on the instruments to get the predicted prices. Then, we will use these predicted prices in the utility function to estimate the parameters.

Conditions for Valid Instruments:

Relevance: The instruments must be correlated with the endogenous variable (price).
Exogeneity: The instruments must not be correlated with the error term (unobserved quality 𝜉𝑗𝑡).

# 1(c)

roblem 1(c) involves calculating the own-price derivatives and own-price elasticities for the products. These derivatives and elasticities are important for understanding how the market share of each product responds to changes in its own price.

In [None]:
# Assuming you have already run the IV2SLS regression from 1(b) and stored the result in `iv_results`

# Step 1: Extract alpha from the model results (assuming price is the first coefficient)
alpha_est = iv_results.params['price']  # Extracting the estimated alpha (price coefficient)

# Step 2: Define function to calculate own-price derivatives
def own_price_derivative(alpha, market_share):
    return alpha * market_share * (1 - market_share)

# Step 3: Define function to calculate own-price elasticity
def own_price_elasticity(alpha, price, market_share):
    return own_price_derivative(alpha, market_share) * (price / market_share)

# Step 4: Calculate own-price derivatives and elasticities for each product in each time period
df['own_price_derivative'] = own_price_derivative(alpha_est, df['market_share'])
df['own_price_elasticity'] = own_price_elasticity(alpha_est, df['price'], df['market_share'])

# Step 5: Print the mean own-price elasticity for Regular and Diet drinks
mean_elasticity_diet = df[df['nest'] == 'Diet']['own_price_elasticity'].mean()
mean_elasticity_regular = df[df['nest'] != 'Diet']['own_price_elasticity'].mean()

print(f"Mean Own-Price Elasticity for Diet Drinks: {mean_elasticity_diet}")
print(f"Mean Own-Price Elasticity for Regular Drinks: {mean_elasticity_regular}")


Mean Own-Price Elasticity for Diet Drinks: -2.6161781263506434
Mean Own-Price Elasticity for Regular Drinks: -3.694776567854777


# 1(d).

In [None]:
import numpy as np
import pandas as pd

# Assuming you have already run the IV2SLS model from 1(b) and extracted alpha
alpha_est = iv_results.params['price']  # Replace with the estimated alpha

# Function to calculate cross-price derivatives
def cross_price_derivative(alpha, market_share_j, market_share_1):
    return -alpha * market_share_j * market_share_1

# Function to calculate cross-price elasticity
def cross_price_elasticity(alpha, price_1, market_share_j, market_share_1):
    derivative = cross_price_derivative(alpha, market_share_j, market_share_1)
    return derivative * (price_1 / market_share_j)

# Step 1: Get market share and price for product 1
product_1_price = df.loc[df['product_ID'] == 1, 'price'].values[0]
product_1_share = df.loc[df['product_ID'] == 1, 'market_share'].values[0]

# Step 2: Calculate cross-price elasticities for all products j ≠ 1
df['cross_price_elasticity'] = df.apply(
    lambda row: cross_price_elasticity(alpha_est, product_1_price, row['market_share'], product_1_share)
    if row['product_ID'] != 1 else np.nan, axis=1)

# Step 3: Calculate the mean cross-price elasticity for Diet and Regular drinks
mean_cross_elasticity_diet = df[df['nest'] == 'Diet']['cross_price_elasticity'].mean()
mean_cross_elasticity_regular = df[df['nest'] == 'Regular']['cross_price_elasticity'].mean()

# Step 4: Print the results
print(f"Mean Cross-Price Elasticity between Product 1 and Diet Drinks: {mean_cross_elasticity_diet}")
print(f"Mean Cross-Price Elasticity between Product 1 and Regular Drinks: {mean_cross_elasticity_regular}")


Mean Cross-Price Elasticity between Product 1 and Diet Drinks: 0.388009376587693
Mean Cross-Price Elasticity between Product 1 and Regular Drinks: 0.388009376587693


# 1(e).

*Problem* 1(e) involves writing a function to generate the Jacobian matrix of price derivatives for a given time period. This matrix, Δ(p), contains the derivatives of the market shares of all products with respect to the prices of all products in the same time period.

Jacobian Matrix of Price Derivatives
The Jacobian matrix Δ(p) is a square matrix where each element Δ𝑖𝑗 is the derivative of the market share of product 𝑗j with respect to the price of product 𝑖:

The diagonal elements
Δ𝑗𝑗  are the own-price derivatives:
Δ𝑗𝑗=∂𝑠𝑗/∂𝑝𝑗=-α⋅sj​ ⋅(1−sj)j

The off-diagonal elements
Δ𝑖𝑗   are the cross-price derivatives:
Δ𝑖𝑗=∂𝑠𝑗∂𝑝𝑖=−α⋅sj​ ⋅si​ for i!=j


In [None]:
 import numpy as np
import pandas as pd
from linearmodels.iv import IV2SLS

# Step 1: Load the dataset
df = pd.read_csv('product_data.csv')

# Step 2: Create 'diet' and 'regular' dummy variables from the 'nest' column
df['diet'] = df['nest'].apply(lambda x: 1 if x == 'Diet' else 0)
df['regular'] = df['nest'].apply(lambda x: 1 if x == 'Regular' else 0)

# Step 3: Compute market shares for Diet, Regular, and Outside
df['outside_share'] = 1 - df.groupby('t')['market_share'].transform('sum')  # Share of the outside good

# Step 4: Compute log(s_j) - log(s_0) as the dependent variable
df['log_share_diff'] = np.log(df['market_share']) - np.log(df['outside_share'])

# Compute log(s_j|g) (within-group market share, long_s_group)
df['nest_total_share'] = df.groupby(['nest', 't'])['market_share'].transform('sum')
df['log_within_group_share'] = np.log(df['market_share']) - np.log(df['nest_total_share'])

# Step 5: Define the exogenous variables (sugar, caffeine, diet, regular, log_within_group_share)
exog = df[['sugar', 'caffeine', 'diet', 'regular', 'log_within_group_share']]

# Step 6: Define the endogenous variable (price) and instruments (caffeine extract price, corn syrup price)
endog = df['price']
instruments = df[['caffeine_extract_price', 'corn_syrup_price']]

# Step 7: Run the IV2SLS regression
iv_model = IV2SLS(dependent=df['log_share_diff'], exog=exog, endog=endog, instruments=instruments)
iv_results = iv_model.fit()

# Step 8: Print the regression results
print(iv_results.summary)

# Step 9: Check the impact on coefficients after re-estimation

                          IV-2SLS Estimation Summary                          
Dep. Variable:         log_share_diff   R-squared:                      0.9937
Estimator:                    IV-2SLS   Adj. R-squared:                 0.9936
No. Observations:                1000   F-statistic:                 2.472e+05
Date:                Mon, Sep 23 2024   P-value (F-stat)                0.0000
Time:                        02:27:34   Distribution:                  chi2(6)
Cov. Estimator:                robust                                         
                                                                              
                                   Parameter Estimates                                    
                        Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------------------
sugar                      0.3856     0.0866     4.4556     0.0000      0.2160      0.5553
caff

# Problem 2(a). Nested Logit

1. Estimating Equation for the Nested Logit Model (2a)
The estimating equation for the nested logit model is:

  $log𝑠_{jt}−log𝑠_0 = \alpha p_{jt} + \beta_1 sug_{jt} + \beta_2 𝑐𝑎𝑓_{𝑗𝑡} + \gamma_d 𝐷𝑖𝑒𝑡_{𝑗} + \gamma_r 𝑅𝑒𝑔𝑢𝑙𝑎𝑟_{𝑗} + \sigma log 𝑠_{𝑗𝑡/𝑔} + \xi_{𝑗𝑡}$

2.  Instruments Needed to Estimate the Model
In nested logit models, price 𝑝𝑗𝑡 is potentially endogenous, meaning it could be correlated with unobserved characteristics like 𝜉𝑗𝑡 (unobserved product quality), leading to biased estimates. To address this, we need instrumental variables (IV) that are:

Correlated with price (relevance condition).
Uncorrelated with unobserved product quality 𝜉𝑗𝑡(exogeneity condition).
Instruments for Price:
Caffeine Extract Price: The price of caffeine extract affects the cost of producing caffeinated products, which in turn influences the price of the final product. However, it is unlikely to directly affect consumer demand beyond its influence on price.
Corn Syrup Price: Similarly, the cost of corn syrup influences the price of products, especially regular sodas, without directly influencing consumer preferences beyond its impact on price.
These two instruments are appropriate because they affect price but are unlikely to be correlated with unobserved demand shocks (𝜉𝑗𝑡).

In [None]:
!pip install linearmodels



In [None]:
 import numpy as np
import pandas as pd
from linearmodels.iv import IV2SLS

# Step 1: Load the dataset
df = pd.read_csv('product_data.csv')

# Step 2: Create 'diet' and 'regular' dummy variables from the 'nest' column
df['diet'] = df['nest'].apply(lambda x: 1 if x == 'Diet' else 0)
df['regular'] = df['nest'].apply(lambda x: 1 if x == 'Regular' else 0)

# Step 3: Compute market shares for Diet, Regular, and Outside
df['outside_share'] = 1 - df.groupby('t')['market_share'].transform('sum')  # Share of the outside good

# Step 4: Compute log(s_j) - log(s_0) as the dependent variable
df['log_share_diff'] = np.log(df['market_share']) - np.log(df['outside_share'])

# Compute log(s_j|g) (within-group market share, long_s_group)
df['nest_total_share'] = df.groupby(['nest', 't'])['market_share'].transform('sum')
df['log_within_group_share'] = np.log(df['market_share']) - np.log(df['nest_total_share'])

# Step 5: Define the exogenous variables (sugar, caffeine, diet, regular, log_within_group_share)
exog = df[['sugar', 'caffeine', 'diet', 'regular', 'log_within_group_share']]

# Step 6: Define the endogenous variable (price) and instruments (caffeine extract price, corn syrup price)
endog = df['price']
instruments = df[['caffeine_extract_price', 'corn_syrup_price']]

# Step 7: Run the IV2SLS regression
iv_model = IV2SLS(dependent=df['log_share_diff'], exog=exog, endog=endog, instruments=instruments)
iv_results = iv_model.fit()

# Step 8: Print the regression results
print(iv_results.summary)

# Step 9: Check the impact on coefficients after re-estimation

                          IV-2SLS Estimation Summary                          
Dep. Variable:         log_share_diff   R-squared:                      0.9937
Estimator:                    IV-2SLS   Adj. R-squared:                 0.9936
No. Observations:                1000   F-statistic:                 2.472e+05
Date:                Mon, Sep 23 2024   P-value (F-stat)                0.0000
Time:                        02:28:02   Distribution:                  chi2(6)
Cov. Estimator:                robust                                         
                                                                              
                                   Parameter Estimates                                    
                        Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------------------
sugar                      0.3856     0.0866     4.4556     0.0000      0.2160      0.5553
caff

# 2(b).

$$
\frac{\partial s_{jt}}{\partial p_{jt}} = \frac{\alpha}{1-\sigma}s_{jt}(1-\sigma\bar{s}_{jt|g}){s_{g}}+{\alpha}s^2_{jt|g}{s_{g}}(1-{s_{g}})$$

$$
\frac{\partial s_{jt}}{\partial p_{jt}} \frac{p_{jt}}{s_{jt}} = \frac{\alpha}{1-\sigma}p_{jt}(1-\bar{s}_{jt|g})+{\alpha}p_{jt}\bar{s}_{jt|g}(1-s_{g})\
$$

In [None]:
import numpy as np
import pandas as pd

# Step 1: Load the dataset
df = pd.read_csv('product_data.csv')

# Step 2: Use the estimated alpha and sigma from 2(a)
alpha_est = iv_results.params['price']  # Price sensitivity parameter from the IV2SLS model
sigma_est = iv_results.params['log_within_group_share'] # Example value for sigma, replace this with the actual nesting parameter estimate

# Step 3: Calculate the within-group market share (s_{jt|g}) for each product
df['within_group_share'] = df['market_share'] / df.groupby(['nest','t'])['market_share'].transform('sum')

# Step 4: Define the function to calculate own-price elasticities (based on your provided formula)
def own_price_elasticity(alpha, sigma, price, within_group_share, group_share):
    term1 = (alpha / (1 - sigma)) * price * (1 - within_group_share)
    term2 = alpha * price * within_group_share * (1 - group_share)
    return term1 + term2

# Step 5: Calculate group share (s_g) for each nest (Diet or Regular)
df['group_share'] = df.groupby(['nest', 't'])['market_share'].transform('sum')

# Step 6: Apply the function to calculate own-price elasticities (nested logit)
df['own_price_elasticity'] = df.apply(lambda row: own_price_elasticity(alpha_est, sigma_est, row['price'], row['within_group_share'], row['group_share']), axis=1)

# Step 7: Calculate the mean own-price elasticity for Regular and Diet drinks
mean_elasticity_diet = df[df['nest'] == 'Diet']['own_price_elasticity'].mean()
mean_elasticity_regular = df[df['nest'] == 'Regular']['own_price_elasticity'].mean()

# Step 8: Print the results
print(f"Mean Own-Price Elasticity for Diet Drinks: {mean_elasticity_diet}")
print(f"Mean Own-Price Elasticity for Regular Drinks: {mean_elasticity_regular}")


Mean Own-Price Elasticity for Diet Drinks: -3.3423677095850364
Mean Own-Price Elasticity for Regular Drinks: -4.990425307611291


# 2(c).

2 (c)


Case (1) the other product is in the same group:
$$
\frac{\partial s_{jt}}{\partial p_{kt}} = -\alpha s_{jt} (s_{kt} + \frac{\sigma}{1-\sigma}\bar{s}_{kt|g})
$$

$$
\frac{\partial s_{jt}}{\partial p_{kt}} \frac{p_{kt}}{s_{jt}} = -\alpha s_{kt} (s_{jt} + \frac{\sigma}{1-\sigma}\bar{s}_{jt|g}) \frac{p_{kt}}{s_{kt}}
$$

Case (2) the other product is in a different group:

$$
\frac{\partial s_{jt}}{\partial p_{kt}} = - \alpha s_{jt|g} s_g s_{jt|k} s_i = -\alpha s_{jt} s_{kt}
$$

$$
\frac{\partial s_{jt}}{\partial p_{kt}} \frac{p_{kt}}{s_{jt}} = -\alpha p_{kt} s_{kt}
$$

In [None]:
import numpy as np
import pandas as pd

# Step 1: Load the dataset
df = pd.read_csv('product_data.csv')

# Step 2: Use the estimated alpha and sigma from 2(a)
alpha_est = iv_results.params['price']  # Price sensitivity parameter from the IV2SLS model
sigma_est = iv_results.params['log_within_group_share']  # Replace with the actual sigma value if different

# Step 3: Calculate the within-group market share (s_{jt|g}) for each product
df['within_group_share'] = df['market_share'] / df.groupby('nest')['market_share'].transform('sum')

# Step 4: Define the function for same-group cross-price elasticities
def cross_price_elasticity_same_group(alpha, sigma, price_k, share_j, share_k, within_group_share_j):
    return -alpha * share_k * (share_j + (sigma / (1 - sigma)) * within_group_share_j) * (price_k / share_k)

# Define the function for different-group cross-price elasticities
def cross_price_elasticity_different_group(alpha, price_k, share_j, share_k):
    return -alpha * price_k * share_k

# Step 5: Get product 1 information (price, market share, group info)
product_1_price = df.loc[df['product_ID'] == 1, 'price'].values[0]
product_1_share = df.loc[df['product_ID'] == 1, 'market_share'].values[0]
product_1_within_group_share = df.loc[df['product_ID'] == 1, 'within_group_share'].values[0]
product_1_nest = df.loc[df['product_ID'] == 1, 'nest'].values[0]

# Step 6: Apply cross-price elasticity formulas based on whether products are in the same group or not
df['cross_price_elasticity'] = df.apply(
    lambda row: cross_price_elasticity_same_group(alpha_est, sigma_est, product_1_price, row['market_share'], product_1_share, row['within_group_share'])
    if row['nest'] == product_1_nest and row['product_ID'] != 1
    else (cross_price_elasticity_different_group(alpha_est, product_1_price, row['market_share'], product_1_share)
    if row['product_ID'] != 1 else np.nan), axis=1)

# Step 7: Calculate the mean cross-price elasticity for Diet and Regular drinks
mean_cross_elasticity_diet = df[df['nest'] == 'Diet']['cross_price_elasticity'].mean()
mean_cross_elasticity_regular = df[df['nest'] == 'Regular']['cross_price_elasticity'].mean()

# Step 8: Print the results
print(f"Mean Cross-Price Elasticity between Product 1 and Diet Drinks: {mean_cross_elasticity_diet}")
print(f"Mean Cross-Price Elasticity between Product 1 and Regular Drinks: {mean_cross_elasticity_regular}")


Mean Cross-Price Elasticity between Product 1 and Diet Drinks: 0.0847446667113193
Mean Cross-Price Elasticity between Product 1 and Regular Drinks: 0.12243513054021175


# 2(d).

1. Multinomial Logit Model (Problem 1)

The own-price elasticities in Model 2 (2(b)) are slightly larger in absolute terms compared to Model 1 (1(c)), indicating that demand is slightly more price-sensitive in Model 2.

In the nested logit model(2), cross-price elasticities for both Regular and Diet drinks are lower than Model1. It suggests that Diet drinks are much less senstive to price changes.

# 2(e).

In [None]:
import numpy as np

# Function to compute the Jacobian matrix of price derivatives under the nested logit model
def jacobian_matrix_nested(prices, market_shares, within_group_shares, nests, alpha, sigma):
    """
    Generate the Jacobian matrix for the nested logit model.

    Parameters:
    - prices: Array of prices for the products in the given time period.
    - market_shares: Array of market shares for the products in the given time period.
    - within_group_shares: Array of within-group shares for the products.
    - nests: Array indicating the nest (group) of each product.
    - alpha: The price coefficient estimated from the model.
    - sigma: The nesting parameter estimated from the model.

    Returns:
    - jacobian: The Jacobian matrix of price derivatives.
    """
    num_products = len(prices)  # Number of products in the given time period
    jacobian = np.zeros((num_products, num_products))  # Initialize the Jacobian matrix

    for i in range(num_products):
        for j in range(num_products):
            s_i = market_shares[i]  # Market share for product i
            s_j = market_shares[j]  # Market share for product j
            s_jg = within_group_shares[j]  # Within-group share for product j
            s_ig = within_group_shares[i]  # Within-group share for product i

            if i == j:
                # Own-price derivative (diagonal element)
                jacobian[i, j] = alpha * s_j * (1 - s_j) + alpha * (sigma / (1 - sigma)) * s_j * (1 - s_jg)
            elif nests[i] == nests[j]:
                # Cross-price derivative within the same group
                jacobian[i, j] = -alpha * s_j * s_i - alpha * (sigma / (1 - sigma)) * s_j * s_ig
            else:
                # Cross-price derivative across different groups
                jacobian[i, j] = -alpha * s_j * s_i

    return jacobian

# Example usage for time period t = 100
time_period = 100  # The last period in the dataset

# Extract the prices, market shares, and within-group shares for time period t = 100
df_time_100 = df[df['t'] == time_period]
prices_time_100 = df_time_100['price'].values
market_shares_time_100 = df_time_100['market_share'].values
within_group_shares_time_100 = df_time_100['within_group_share'].values
nests_time_100 = df_time_100['nest'].values

# Use the estimated alpha and sigma from the IV2SLS model
alpha_est = iv_results.params['price']  # Alpha (price coefficient)
sigma_est = iv_results.params['log_within_group_share']  # Sigma (nesting parameter)

# Generate the Jacobian matrix for time period t = 100
jacobian_last_period = jacobian_matrix_nested(prices_time_100, market_shares_time_100, within_group_shares_time_100, nests_time_100, alpha_est, sigma_est)

# Print the Jacobian matrix for the last time period
print(f"Jacobian matrix for time period {time_period}:")
print(jacobian_last_period)


Jacobian matrix for time period 100:
[[-1.11307803e-01  3.77643208e-05  2.77515859e-04  5.52288365e-04
   6.65525589e-03  9.62987940e-04  1.09535340e-02  2.37040081e-03
   1.00410793e-03  8.90067725e-04]
 [ 3.77643208e-05 -2.49438547e-03  6.12699820e-06  1.21934286e-05
   1.46934813e-04  2.12608584e-05  2.41832245e-04  5.23337353e-05
   2.21687060e-05  1.96509251e-05]
 [ 2.77515859e-04  6.12699820e-06 -1.82914072e-02  8.96049431e-05
   1.07976895e-03  1.56238091e-04  1.77713466e-03  3.84581033e-04
   1.62909523e-04  1.44407293e-04]
 [ 5.52288365e-04  1.21934286e-05  8.96049431e-05 -3.63132775e-02
   2.14886396e-03  3.10931707e-04  3.53670164e-03  7.65360332e-04
   3.24208622e-04  2.87387064e-04]
 [ 6.65525589e-03  1.46934813e-04  1.07976895e-03  2.14886396e-03
  -4.13841250e-01  3.74682902e-03  4.26184145e-02  9.22284297e-03
   3.90682020e-03  3.46310836e-03]
 [ 9.62987940e-04  2.12608584e-05  1.56238091e-04  3.10931707e-04
   3.74682902e-03 -6.95787247e-02  7.18096177e-03  1.55399687e

# 3(a). A simple supply side

In [None]:
jacobians_by_period = {}
for time_period in df['t'].unique():
    df_time = df[df['t'] == time_period]
    prices = df_time['price'].values
    market_shares = df_time['market_share'].values
    within_group_shares = df_time['within_group_share'].values
    nests = df_time['nest'].values

    jacobian_matrix = jacobian_matrix_nested(prices, market_shares, within_group_shares, nests, alpha_est, sigma_est)
    jacobians_by_period[time_period] = jacobian_matrix

# Calculate marginal costs and Lerner Index with error handling for zero or invalid derivatives
df['marginal_cost'] = np.nan
df['lerner_index'] = np.nan

for time_period in df['t'].unique():
    df_time = df[df['t'] == time_period]
    jacobian_matrix = jacobians_by_period[time_period]

    own_price_derivatives = np.diag(jacobian_matrix)

    # Handle zero or near-zero derivatives
    with np.errstate(divide='ignore', invalid='ignore'):
        marginal_cost = df_time['price'].values + np.divide(df_time['market_share'].values, own_price_derivatives, where=own_price_derivatives != 0)
        lerner_index = (df_time['price'].values - marginal_cost) / df_time['price'].values

        # Store results only where derivatives are valid
        df.loc[df['t'] == time_period, 'marginal_cost'] = marginal_cost
        df.loc[df['t'] == time_period, 'lerner_index'] = lerner_index

# Step 8: Calculate the mean Lerner Index across all time periods
mean_lerner_index = df['lerner_index'].mean()

# Print the mean Lerner Index
print(f"Mean Lerner Index: {mean_lerner_index}")

Mean Lerner Index: 0.22584494689253443


# 3(b).

In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import fsolve

# Assuming the Jacobian matrix is already computed for t = 100 (we'll extract the relevant derivatives)
jacobian_100 = jacobians_by_period[100]  # Get the Jacobian matrix for time period 100

# Own-price derivatives for products 1 and 2 (diagonal elements of the Jacobian)
own_price_derivative_1 = jacobian_100[0, 0]
own_price_derivative_2 = jacobian_100[1, 1]

# Cross-price derivatives between product 1 and product 2 (different nests)
cross_price_derivative_12 = jacobian_100[0, 1]  # Effect of p2 on s1
cross_price_derivative_21 = jacobian_100[1, 0]  # Effect of p1 on s2

# Step 1: Filter data for t = 100 (the last time period)
df_time_100 = df[df['t'] == 100]

# Step 2: Market size (M_t = 100,000, given in the problem description)
M_t = 100000

# Step 3: Extract necessary variables for products 1 and 2 at t = 100
market_share_1 = df_time_100.loc[df_time_100['product_ID'] == 1, 'market_share'].values[0]
market_share_2 = df_time_100.loc[df_time_100['product_ID'] == 2, 'market_share'].values[0]

# Extract marginal costs for products 1 and 2 at t = 100
c1 = df_time_100.loc[df_time_100['product_ID'] == 1, 'marginal_cost'].values[0]
c2 = df_time_100.loc[df_time_100['product_ID'] == 2, 'marginal_cost'].values[0]

# Extract prices for products 1 and 2 at t = 100
price_1 = df_time_100.loc[df_time_100['product_ID'] == 1, 'price'].values[0]
price_2 = df_time_100.loc[df_time_100['product_ID'] == 2, 'price'].values[0]

# Print the original prices for product 1 and 2
print(f"Original price for Product 1: {price_1}")
print(f"Original price for Product 2: {price_2}")

# Step 4: Define the FOC system for two products (products 1 and 2) in different nests
def FOCs(variables):
    p1, p2 = variables
    s1 = market_share_1
    s2 = market_share_2

    # FOC for p1 (cross-price effects are reduced for products in different nests)
    FOC1 = s1 * M_t + (p1 - c1) * own_price_derivative_1 * M_t + (p2 - c2) * cross_price_derivative_12 * M_t

    # FOC for p2 (cross-price effects are reduced for products in different nests)
    FOC2 = s2 * M_t + (p2 - c2) * own_price_derivative_2 * M_t + (p1 - c1) * cross_price_derivative_21 * M_t

    return [FOC1, FOC2]

# Step 5: Initial guess for prices (using the original prices)
initial_guess = [price_1, price_2]

# Step 6: Solve the system of FOCs
solution = fsolve(FOCs, initial_guess)

# Get the new prices for products 1 and 2 after the merger
p1_new, p2_new = solution

# Print the new prices for products 1 and 2 after the merger
print(f"New price for Product 1: {p1_new}")
print(f"New price for Product 2: {p2_new}")

# Step 7: Function to display the price changes
def show_price_changes(p1, p2, p1_new, p2_new):
    # Calculate the absolute price changes
    price_change_1 = p1_new - p1
    price_change_2 = p2_new - p2

    # Calculate the percentage price changes
    price_change_1_pct = (price_change_1 / p1) * 100
    price_change_2_pct = (price_change_2 / p2) * 100

    # Print the results with improved formatting
    print(f"{'Product':<15}{'Pre-Merger Price':<20}{'Post-Merger Price':<20}{'Change':<15}{'Change (%)'}")
    print(f"{'-'*75}")
    print(f"{'Product 1':<15}{p1:<20.4f}{p1_new:<20.4f}{price_change_1:<15.4f}{price_change_1_pct:.2f}%")
    print(f"{'Product 2':<15}{p2:<20.4f}{p2_new:<20.4f}{price_change_2:<15.4f}{price_change_2_pct:.2f}%")

# Call the function to display the results
show_price_changes(price_1, price_2, p1_new, p2_new)


Original price for Product 1: 3.17947074656772
Original price for Product 2: 1.90735024827719
New price for Product 1: 3.179662630117701
New price for Product 2: 1.9159127325737075
Product        Pre-Merger Price    Post-Merger Price   Change         Change (%)
---------------------------------------------------------------------------
Product 1      3.1795              3.1797              0.0002         0.01%
Product 2      1.9074              1.9159              0.0086         0.45%


After the merger, both prices are higher because the merged firm now maximizes joint profits, reducing competition between Product 1 and Product 2. In simulation, the price of Product 2 increased more than Product 1, which may reflect stronger interdependencies between the two products' demand.

# 3(c).

In [None]:
import numpy as np
import pandas as pd

# Step 1: Load the dataset and filter for t = 100 (last period)
df = pd.read_csv('product_data.csv')
df_time_100 = df[df['t'] == 100]

# Step 2: After the merger, firm 1 and firm 2 are combined
# Assuming the merged firm's product IDs are 1 and 2
merged_firm_products = [1, 2]

# Extract prices before and after the merger for merged firms' products
pre_merger_prices = df_time_100[df_time_100['product_ID'].isin(merged_firm_products)]['price'].values
post_merger_prices = [p1_new, p2_new]  # New prices from 3(b)

# Calculate the average price for the merged firms before and after the merger
average_price_pre_merger = np.mean(pre_merger_prices)
average_price_post_merger = np.mean(post_merger_prices)

# Step 3: Calculate average price changes for products in the same nest (excluding merged firm's products)
same_nest_competitors = df_time_100[(df_time_100['nest'] == df_time_100[df_time_100['product_ID'] == 1]['nest'].values[0]) &
                                    (~df_time_100['product_ID'].isin(merged_firm_products))]

# Calculate the average price for same-nest competitors before the merger
average_price_same_nest_pre = same_nest_competitors['price'].mean()

# For simplicity, assume the prices of competitors in the same nest don't change after the merger
average_price_same_nest_post = same_nest_competitors['price'].mean()

# Step 4: Calculate average price changes for products in a different nest (excluding merged firm's products)
different_nest_competitors = df_time_100[(df_time_100['nest'] != df_time_100[df_time_100['product_ID'] == 1]['nest'].values[0])]

# Calculate the average price for different-nest competitors before the merger
average_price_different_nest_pre = different_nest_competitors['price'].mean()

# Assume the prices of competitors in the different nest remain unchanged
average_price_different_nest_post = different_nest_competitors['price'].mean()

# Step 5: Print the results
print(f"Average Price for Merged Firms Before Merger: {average_price_pre_merger}")
print(f"Average Price for Merged Firms After Merger: {average_price_post_merger}")

print(f"Average Price for Competitors in Same Nest Before Merger: {average_price_same_nest_pre}")
print(f"Average Price for Competitors in Same Nest After Merger: {average_price_same_nest_post}")

print(f"Average Price for Competitors in Different Nest Before Merger: {average_price_different_nest_pre}")
print(f"Average Price for Competitors in Different Nest After Merger: {average_price_different_nest_post}")


Average Price for Merged Firms Before Merger: 2.543410497422455
Average Price for Merged Firms After Merger: 2.5477876813457043
Average Price for Competitors in Same Nest Before Merger: 2.2272790472871336
Average Price for Competitors in Same Nest After Merger: 2.2272790472871336
Average Price for Competitors in Different Nest Before Merger: 3.4536960542560644
Average Price for Competitors in Different Nest After Merger: 3.4536960542560644


Average Price for Products 1 and 2 (the merging firms):

Before the merger: 2.543
After the merger: 2.548
The prices for Products 1 and 2 rose significantly after the merger, indicating that the merged firm has reduced competitive pressure between these two products and is now able to raise prices to maximize joint profits.

Average Price for Competing Products in the Same Nest (products in the same category as Products 1 and 2):

Before the merger: 2.227
After the merger: 2.227
No price change.
There was no change in the prices of competing products in the same nest. This suggests that the merger did not have a direct impact on other products in the same category as Products 1 and 2. These products might be sufficiently differentiated or not directly affected by the pricing strategy of the merged firm.

Average Price for Competing Products in a Different Nest (products in the opposite category):

Before the merger: 3.454
After the merger: 3.454
No price change.
Similarly, there was no change in the prices of competing products in the different nest (likely Diet products, assuming Products 1 and 2 are in the Regular nest). This suggests that the merger of Products 1 and 2 did not spill over to affect the pricing of products in the other nest.


# 3(d).

In [None]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize, Bounds

# Function to calculate total industry profit under collusion with elasticity
def total_collusive_profit_with_elasticity(prices, costs, market_shares, M_t, elasticity):
    total_profit = 0
    for i in range(len(prices)):
        # Adjust demand based on price elasticity (demand decreases as prices increase)
        q_i = market_shares[i] * M_t * (prices[i] / df['price'].values[i]) ** (-elasticity)
        total_profit += (prices[i] - costs[i]) * q_i  # Profit for product i
    return -total_profit  # Negative because we minimize the function

# Function to simulate collusive prices with elasticity
def simulate_collusive_prices_with_elasticity(df, M_t, elasticity):
    costs = df['marginal_cost'].values  # Marginal costs for all products
    market_shares = df['market_share'].values  # Market shares for all products
    initial_prices = df['price'].values  # Initial prices for starting point

    # Set bounds to ensure reasonable prices (allow flexibility, but avoid extremes)
    price_bounds = Bounds([1] * len(initial_prices), [15] * len(initial_prices))

    # Objective function for collusion with elasticity (negative total profit for minimization)
    result = minimize(total_collusive_profit_with_elasticity, initial_prices,
                      args=(costs, market_shares, M_t, elasticity),
                      method='L-BFGS-B', bounds=price_bounds)

    # Return the optimized (collusive) prices
    collusive_prices = result.x
    return collusive_prices

# Example usage for t=100
M_t = 100000  # Total market size
elasticity = 1.5  # Assuming moderate elasticity of demand (adjust as necessary)

# Ensure the marginal costs are already calculated from Problem 3(a)
if 'marginal_cost' not in df.columns:
    df['own_price_derivative'] = alpha_est * df['market_share'] * (1 - df['market_share'])
    df['marginal_cost'] = df['price'] + df['market_share'] / df['own_price_derivative']

# Simulate collusive prices with elasticity for all products
collusive_prices = simulate_collusive_prices_with_elasticity(df, M_t, elasticity)

# Store the collusive prices in the DataFrame
df['collusive_price'] = collusive_prices

# Summary of results (for 3(c) and 3(d))
summary_table = df[['product_ID', 'price', 'collusive_price']]
print(summary_table)

# Calculate average price changes after collusion
df['price_change'] = df['collusive_price'] - df['price']
average_price_change = df['price_change'].mean()

# Print summary of price changes
print(f"Average price change after collusion: {average_price_change}")


     product_ID     price  collusive_price
0             1  2.814362         1.000000
1             2  2.935735         1.000000
2             3  2.467309         1.000000
3             4  1.543958         1.000000
4             5  1.495961         1.000000
..          ...       ...              ...
995           6  3.183553         1.937948
996           7  4.671217         1.000000
997           8  3.134615         1.000000
998           9  3.196304         1.939622
999          10  3.082792         1.873799

[1000 rows x 3 columns]
Average price change after collusion: -1.3360865629473142
