# Problem Set 3
Author: Stefano Sperti
Date: October 2025

In [2]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import scipy.stats as stats
import random   
import pyblp

#### Parameters for simulation

In [3]:
beta1 = 1
mu2 = 4
sigma2 = 1
mu3 = mu2
sigma3 = sigma2
alpha = -2
gamma0 = 0.5
gamma1 = 0.25
J =4

#### TRUE DATA

In [7]:
T = 600
x = np.abs(np.random.normal(0, 1, (T, J)))

w = np.abs(np.random.normal(0, 1, (T, J)))

In [8]:
xi = np.empty((T, J))
omega = np.empty((T, J))
sigma = [[1, 0.25], [0.25, 1]]
mu = [0, 0]
for t in range(T):
    for j in range(J):
        xi[t, j], omega[t, j] = np.random.multivariate_normal(mu, sigma)

Multinomial Logit Market Shares
$$
s_{ij}(\theta) 
= \Pr(d_{ij} = 1) 
= \int 
\frac{\exp\!\left(\beta_i x_j\right)}
{1 + \sum_{k \in \mathcal{J}} \exp\!\left(\beta_i x_k\right)}
\, f(\beta_i \mid \theta) \, \mathrm{d}\beta_i.
$$

Then, Let's differentiate with respect to the price variable. Then, for any product $m\in\mathcal J$,
$$
\frac{\partial s_{ij}(\theta)}{\partial p_m}
= \int \frac{\partial s_{ij}(\beta_i)}{\partial p_m}\, f(\beta_i\mid\theta)\,
\mathrm d\beta_i,
\qquad\text{with}\qquad
\frac{\partial s_{ij}(\beta_i)}{\partial p_m}
= s_{ij}(\beta_i)\,\big(\mathbf 1\{j=m\}-s_{im}(\beta_i)\big)\,\beta_i^{(p)}.
$$
Hence,
$$
\boxed{\;
\frac{\partial s_{ij}(\theta)}{\partial p_m}
= \int s_{ij}(\beta_i)\,\big(\mathbf 1\{j=m\}-s_{im}(\beta_i)\big)\,
\beta_i^{(p)}\, f(\beta_i\mid\theta)\,\mathrm d\beta_i.
\;}
$$

In particular,
$$
\frac{\partial s_{ij}(\theta)}{\partial p_j}
= \int s_{ij}(\beta_i)\big(1-s_{ij}(\beta_i)\big)\,\beta_i^{(p)}
\, f(\beta_i\mid\theta)\,\mathrm d\beta_i,
\quad
\frac{\partial s_{ij}(\theta)}{\partial p_m}
= -\int s_{ij}(\beta_i)s_{im}(\beta_i)\,\beta_i^{(p)}
\, f(\beta_i\mid\theta)\,\mathrm d\beta_i \;\;(m\neq j).
$$



In [None]:
def derivative_share_matrix(beta1, mu2, sigma2, mu3, sigma3, xi, x, J, T, prices):
    N = 100
    beta2_simulated = np.random.normal(mu2, sigma2, N)
    beta3_simulated = np.random.normal(mu3, sigma3, N)
    share_matrix_derivatives = np.zeros((J, J, T))
    for j in range(J):
        for m in range(J):
            for t in range(T):
                if j == m:
                    derivative = 0
                    for n in range(N):
                            exp_utilities = np.exp(beta1 * x[j, t] + beta2_simulated[n]* ((j ==0)|(j==1)) + beta3_simulated[n]*((j==2)|(j==3)) + xi[j,t] + alpha * prices[t, j])
                            sum_exp_utilities = np.sum(exp_utilities, axis=0, keepdims=True)
                            probabilities = exp_utilities / (1 + sum_exp_utilities)
                            derivative += probabilities * (1 - probabilities)
                    derivative_mean = derivative / N
                else:
                    for n in range(N):
                        exp_utilities_jt  = np.exp(beta1 * x[j, t] + beta2_simulated[n]* ((j ==0)|(j==1)) + beta3_simulated[n]*((j==2)|(j==3)) + xi[j,t] + alpha * prices[t, j])
                        exp_utilities_mt = np.exp(beta1 * x[m, t] + beta2_simulated[n]* ((m ==0)|(m==1)) + beta3_simulated[n]*((m==2)|(m==3)) + xi[m,t] + alpha * prices[t, m])
                        sum_exp_utilities = np.sum(exp_utilities, axis=0, keepdims=True)
                        probabilities_jt = exp_utilities_jt / (1 + sum_exp_utilities)
                        probabilities_mt = exp_utilities_mt / (1 + sum_exp_utilities)
                        derivative += -probabilities_jt * probabilities_mt   
                    derivative_mean = derivative / N
                share_matrix_derivatives[j, m, t] = derivative_mean


In [None]:
share_matrix = np.empty((T, J))
for t in range(T):
    for j in range(J):
        exp_utilities = np.exp(beta1 * x[t, j] + mu2 * ((j ==0)|(j==1)) + mu3 * ((j==2)|(j==3)) + xi[t,j])
        sum_exp_utilities = np.sum(exp_utilities)
        share_matrix[t, j] = exp_utilities / (1 + sum_exp_utilities)
share_matrix_derivatives = derivative_share_matrix(beta1, mu2, sigma2, mu3, sigma3, xi.T, x.T, J, T, prices=np.random.normal(5,1,(T,J)))

### Simulate data with pyblp

In [None]:
import numpy as np
import pandas as pd
import pyblp

pyblp.options.verbose = False         # reduce pyBLP chatter
pyblp.options.digits = 3              # pretty printing digits
rng = np.random.default_rng(2025)     # reproducible RNG

# ============================================================
# E1. DATA GENERATION VIA pyBLP.Simulation
# ------------------------------------------------------------
# Conventions:
#   Utility: δ_j = β_x * x_j  – α * p_j  + ξ_j, with α > 0
#   => price coefficient in utility is -α
#   We target -2.0 as requested, so set α = +2.0 (alpha_mag).
# ============================================================

T = 600                  # markets
J = 4                    # products per market (4 single-product firms)
alpha_mag = 2.0          # price coefficient in utility is -2.0
beta_x = 1.0             # true coefficient on x
rho_true = 0.55          # nested-logit correlation
c0, c1 = 5.0, 1.0        # linear cost: mc = c0 + c1 * w + ω
corr_xi_omega = 0.25     # corr(ξ, ω)

print("============================================================")
print("EXERCISE 1 —  with pyBLP.Simulation")
print("------------------------------------------------------------")
id_data = pyblp.build_id_data(T=T, J=J, F=4)
base = pd.DataFrame(pyblp.data_to_dict(id_data))   
# Ownership and nests: 1..4 single-product firms; nests: 0(wired) for 1&2, 1(sat) for 3&4
base["firm_ids"] = np.tile(np.array([1, 2, 3, 4]), T)
base["nesting_ids"] = np.tile(np.array([0, 0, 1, 1]), T)

# Exogenous shifters: |N(0,1)| for x (demand) and w (cost)
base["x"] = np.abs(rng.normal(size=len(base)))
base["w"] = np.abs(rng.normal(size=len(base)))

# Formulations: X1(demand), X2(None=logit/nested), X3(supply)
form_demand = pyblp.Formulation("1 + prices + x")  # constant, price, x
form_supply = pyblp.Formulation("1 + w")
formulations = (form_demand, None, form_supply)

# Configure the simulation (true parameters)
simulation = pyblp.Simulation(
    product_formulations=formulations,
    product_data=base,
    beta=[0.0, -alpha_mag, beta_x],   # intercept, price, x  (price coef is -alpha_mag = -2.0)
    gamma=[c0, c1],                    # cost side: constant and w
    rho=rho_true,                      # nested logit
    xi_variance=1.0, omega_variance=1.0, correlation=corr_xi_omega,
    costs_type="linear",
    seed=1234
)

# Compute equilibrium-consistent prices & shares
sim_res = simulation.replace_endogenous()
product_data = pd.DataFrame(pyblp.data_to_dict(sim_res.product_data))

# Within-market product index (0..J-1) for summary tables
product_data["position"] = product_data.groupby("market_ids").cumcount()

# Preview: first market (avoid columns that might not exist in older pyBLP)
print("Simulated price/share preview (first market):")
m0 = product_data.query("market_ids == 1").copy()
cols = [c for c in ["market_ids", "position", "firm_ids", "nesting_ids", "x", "w", "prices", "shares"] if c in m0.columns]
print(m0[cols].round(4).to_string(index=False))

EXERCISE 1 —  with pyBLP.Simulation
------------------------------------------------------------
Simulated price/share preview (first market):
market_ids  position firm_ids nesting_ids      x      w  prices  shares
         1         0        1           0 2.4419 0.2274  5.8336  0.0003
         1         1        2           0 0.7654 1.6747  6.4671  0.0000
         1         2        3           1 0.7597 0.1034  4.7754  0.0001
         1         3        4           1 0.2670 3.0356  8.0942  0.0000
