In [1]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
from numpy.linalg import inv

import pandas as pd
pd.set_option('display.float_format', '{:.5f}'.format)

print("changed third")


changed third


# for simulation 

In [None]:
# input: market characteristics (x), and instrument 
def price_gen(char, cost_shifter): #for a given x and instrument 
    price = 2*char + cost_shifter + np.abs(np.random.normal(0,5, len(char)))
    return np.clip(price, 0.1, None)
# output: engogenous price for each market set

# input: ALL parameters + product characteristics data 
def utility_gen(linear_guess, nonlinear_guess, product_char, price, n_consumer): #return a matrix of utility 
    alpha, beta = linear_guess[0], linear_guess[1]
    sigma = nonlinear_guess
    utility_out = []
    noise_variance = np.random.normal(0, 1, n_consumer)
    alpha_i = np.full(n_consumer, alpha) 
    beta_i = beta + sigma * noise_variance #true beta varied by noises scaled by sigma
    utility_out = np.outer(beta_i, product_char) - np.outer(alpha_i, price) #by formula 
    # print(utility_out[:5]) #print first 5 utilities
    return utility_out.T #tranposed for easier indexing (products as rows, consumers as columns)

# input: utilities in each market 
def choice_prob(m_utilities): #for each market
    # print("initial utilities", m_utilities[:5])
    no_consumer, no_product = m_utilities.shape[0], m_utilities.shape[1]
    adjusted_utilities = np.clip(m_utilities, -10, 50) #clip to avoid overflow
    # Exponentiate utilities
    # print("Adjusted utilities:", adjusted_utilities) #print first 5 adjusted utilities
    exp_utilities = np.exp(adjusted_utilities)
    # print("Exponentiated utilities:", exp_utilities) #print first 5 exponentiated utilities
    # Denominator: sum over products for each consumer + 1 (outside option)
    denom = np.sum(exp_utilities, axis=0) + 1
    # print("denom", denom)
    # Choice probabilities for each product (rows: consumers, cols: products)
    prob_matrix = exp_utilities / denom
    # print("Choice probability matrix:", prob_matrix) #print first 5 choice probabilities
    return prob_matrix


# for estimation 

In [3]:
# input: a matrix of utilities for each consumer x product

# using utility_gen to generate utilities (given guess) for each consumer x product
def predicted_share(utilities_matrix, delta_guess): #for each market
    # print("initial", utilities_matrix[:5])
    # print("delta guess", delta_guess[:5])
    no_consumer = utilities_matrix.shape[1]
    # print(utilities_matrix)
    adjusted_utilities = utilities_matrix + delta_guess[:,None]
    # print("adjusted utilities", adjusted_utilities)
    # print("adjusted", adjusted_utilities[:5])
    choice_prob_matrix = choice_prob(adjusted_utilities)
    # print("choice matrix", choice_prob_matrix)
    pred_share = choice_prob_matrix.sum(axis=1)/choice_prob_matrix.shape[1]
    pred_share = np.maximum(pred_share, 1e-10)  # Prevent division by zero
    return pred_share
# output: predicted share for each product in each market

# input: a matrix of utilities for each consumer x product + observed share 
def contraction_mapping(given_utility, observed_share, tolerance): #return converged mean utility 
    delta_guess = np.zeros(given_utility.shape[0])  # Initialize delta guess for each product
    # print("initial delta guess", delta_guess)
    difference = 1
    while difference > tolerance:
        adjusted_share = predicted_share(given_utility, delta_guess)
        safe_observed_share = np.clip(observed_share, 1e-30, None)  # Prevent division by zero
        # print("safe observed share", safe_observed_share)
        # print("adjusted share", adjusted_share)
        # print(delta_guess)
        delta_new = delta_guess + np.log(safe_observed_share) - np.log(adjusted_share)
        # print(safe_observed_share, safe_adjusted_share)
        difference = np.max(np.abs(delta_new - delta_guess))
        # print("delta guess", delta_guess)
        delta_guess = delta_new
    return delta_guess #return converged guess 
# output: converged mean utility for each product in each market

# input: market datase + all guesses 
def mean_utility(input_dataset, sigma_guess, parameter_guess, tolerance, no_consumer_sim): #return mean utility for all 
    no_market = np.unique(input_dataset['market_id']).size
    appending = []
    for i in range(no_market):
        market_i = input_dataset[input_dataset['market_id'] == i]
        # print("market_i printed", market_i)
        observed_share = market_i[market_i['market_id'] == i]['share'].values
        # simulate utilities given guess
        sim_utilities = np.array(utility_gen(parameter_guess, sigma_guess, market_i['prod_char'].values, market_i['price'].values, no_consumer_sim))
        # print("simulated utilities", sim_utilities)
        # print("simulated utilities", sim_utilities)
        appending.extend(contraction_mapping(sim_utilities, observed_share, tolerance))
        # print("done with market", i)
    # print("found appending", appending)
    return appending 
# output: an array of mean utility to appending into input dataset

def estimate_parameter(input_dataset, instrument_choice, mean_utility): #return estimated alpha and beta parameters 
    X = input_dataset[['prod_char','price']].values 
    Z = input_dataset[[instrument_choice,'price']].values
    y = mean_utility
    # First stage
    Pi = inv(Z.T @ Z) @ (Z.T @ X)
    X_hat = Z @ Pi #generate predicted X ==
    # Second stage
    theta = inv(X_hat.T @ X_hat) @ (X_hat.T @ y)
    alpha_hat, beta_hat = -theta[1], theta[0]
    return alpha_hat, beta_hat 



In [4]:
# utility = utility_gen([-1,5],0.4,[2,3,5,2],[1,1,1,1],5)
# # print("utility out:",utility)

# # print(utility + np.array([1, 2, 1, 1])[:,None])
# choice_matrix = choice_prob(utility)
# outer_market_share = choice_matrix.sum(axis=1)/choice_matrix.shape[1]
# # print("Outer market share:", outer_market_share)

# utility = utility_gen([-1,5],0.4,[2,4,1,7],[1,1,1,1],5)

# print("delta converged", contraction_mapping(utility, outer_market_share, 0.1))
# market_data = pd.read_csv('simulated_market_data.csv')
# delta_array = mean_utility(market_data, 0.4, [-1, 5], 0.01, 100)
# print(estimate_parameter(market_data, 'instrument_cost', delta_array))