In [3]:
import numpy as np
import pandas as pd
from scipy.optimize import minimize
from scipy.stats import norm
import matplotlib.pyplot as plt

%matplotlib inline

# Functions

In [491]:
# Trimming function
def trim(X, percent):
    
    alpha = (1 - percent)/2
    n, k = np.shape(X)
    t_ind = np.zeros((n, k))
    
    for i in range(0, k):
        upper_bd = np.percentile(X[:,i], (1 - alpha)*100)
        lower_bd = np.percentile(X[:,i], (alpha)*100)
        t_ind[:, i] = [int(lower_bd < x < upper_bd) for x in X[:,i]]
        
    return np.prod(t_ind, axis = 1)

# Conditional Expectation
def CE_1(Y, X, arg, r):
    
    n_arg = np.shape(arg)[0]
    n = np.shape(X)[0]
    h = (n**(-r)) * np.std(X, axis = 0, ddof = 1)
    e = np.zeros((n_arg, 1))
    
    for j in range(0, n_arg):
        k = np.divide(norm.pdf(np.divide((arg[j] - X), h)), h)
        k = np.prod(k, axis = 1)   
        e[j] = (Y.T*k/n)/np.mean(k)
        
    return e

# Semiparametric Least Squares objective
def SLS_1(b, Y, X, X_ind):
    
    v = X * np.matrix(b).T
    EY = CE_1(Y, v, v, 1/5)
    residual = np.power((Y - EY), 2)
    
    return (-0.5 * np.matrix(X_ind)*residual)

# Regress Y on X with SLS
def run_semiparametric_regression(Y, X, guess, trim_percent = 0.98):
    
    obj_f = lambda x_0: -SLS_1(x_0, Y, X, trim(X, trim_percent))
    result = minimize(obj_f, guess, method='BFGS')
    return result

# Hessian to Covariance matrix
def convert_hessian_to_cov(Y, X, results):
    
    sigma_2_hat = np.mean(np.power(Y - X*np.matrix(results.x).T, 2))
    return results.hess_inv * sigma_2_hat

# Marginal effects at a point using CE_1
def compute_marginal_effect(Y, X, ind, point, beta, delta = 0.01):

    point_nudge = np.copy(point)
    point_nudge[0, ind] = point_nudge[0, ind] + delta
    point_nudge = np.matrix(point_nudge)
    
    v_hat = X*beta
    v_hat_avg = point*beta
    v_hat_avg_nudge = point_nudge*beta
    
    return np.asscalar(CE_1(Y, v_hat, v_hat_avg_nudge, 1/5) - CE_1(Y, v_hat, v_hat_avg, 1/5))/delta

# Range of marginal effects 
def calculate_me_range(Y, X, result, ind, point, delta = .001, delta_range = [0.0001, 0.01], parameter_range = [0, 0.014], n_1 = 200, n_2 = 5):

    beta = np.matrix(result.x).T
    
    me_results = np.zeros(shape = (n_1, 2, n_2))
    
    linspace_1 = np.linspace(parameter_range[0], parameter_range[1], num = n_1)
    linspace_2 = np.linspace(delta_range[0], delta_range[1], num = n_2)
    
    for j in range(0, n_2):

        delta = linspace_2[j]
        #primp_avgt = np.linspace(0.001, 0.5, num = n_2)[j]

        for i in range(0, n_1):
  
            point_temp = np.copy(point)
            point_temp[0, ind] = linspace_1[i]
            
            me_vec = compute_marginal_effect(Y, X, ind, point_temp, beta, delta = delta)

            me_results[i,:,j] = np.array([linspace_1[i], np.asscalar(me_vec)])
            
    return me_results   

# T-stats with first coefficient fixed
def find_tstats(Y, X, results):
    
    V = convert_hessian_to_cov(Y, X, results)

    n = np.shape(results.x)[0]
    theta = results.x/results.x[0]
    t_stats = np.zeros(shape = (n))
    t_stats[0] = np.nan # first t-stat is unknown

    for i in range(1, n):
        t_stats[i] = theta[i] / np.sqrt(V[i,i])

    return t_stats

# Load Data

In [839]:
sample_size = 1000 # None => All obs

data_df = pd.read_csv('../data/processed/regression_data_levels.csv').dropna().sample(n = sample_size)
data_df['PrImp_Pct*Rebate_Dummy'] = data_df['PrImp_Pct'] * data_df['Rebate_Dummy']
data_df['PrImp_AvgAmt*Rebate_Dummy'] = data_df['PrImp_AvgAmt'] * data_df['Rebate_Dummy']
data_df['PrImp_ExpAmt*Rebate_Dummy'] = data_df['PrImp_ExpAmt'] * data_df['Rebate_Dummy']
data_df['PrImp_AvgT*Rebate_Dummy'] = data_df['PrImp_AvgT'] * data_df['Rebate_Dummy']
data_df['All_AvgT*Rebate_Dummy'] = data_df['All_AvgT'] * data_df['Rebate_Dummy']

# Regressions 

In [840]:
# Fits
fit1_formula = 'MktShare ~ PrImp_Pct + PrImp_AvgAmt + PrImp_AvgT'
fit2_formula = 'MktShare ~ PrImp_ExpAmt + PrImp_AvgT'
fit3_formula = 'MktShare ~ PrImp_Pct + PrImp_AvgAmt + All_AvgT'
fit4_formula = 'MktShare ~ PrImp_ExpAmt + All_AvgT'

# fit1_formula = 'MktShare ~ PrImp_Pct + PrImp_Pct*Rebate_Dummy + PrImp_AvgAmt + PrImp_AvgAmt*Rebate_Dummy + PrImp_AvgT + PrImp_AvgT*Rebate_Dummy'
# fit2_formula = 'MktShare ~ PrImp_ExpAmt + PrImp_ExpAmt*Rebate_Dummy + PrImp_AvgT + PrImp_AvgT*Rebate_Dummy'
# fit3_formula = 'MktShare ~ PrImp_Pct + PrImp_Pct*Rebate_Dummy + PrImp_AvgAmt + PrImp_AvgAmt*Rebate_Dummy + All_AvgT + All_AvgT*Rebate_Dummy'
# fit4_formula = 'MktShare ~ PrImp_ExpAmt + PrImp_ExpAmt*Rebate_Dummy + All_AvgT + All_AvgT*Rebate_Dummy'


formulaCols = lambda x: x.replace(' ', '').replace('~', '+').split('+') 
fit_formulae = [fit1_formula, fit2_formula, fit3_formula, fit4_formula]
fit_formulae = [formulaCols(x) for x in fit_formulae]

# Store results
fit_results = [None] * len(fit_formulae)

## Results

In [841]:
for i in range(0, len(fit_formulae)):
    
    ## Get results
    print('Regressing with fit %d...' % i )
    
    data = data_df[fit_formulae[i]]
    X = np.matrix(data)[:, 1:]
    Y = np.matrix(data)[:, 0]

    guess = X[1, :]
    results = run_semiparametric_regression(Y, X, guess)
    
    fit_results[i] = results
    
    ## Update results
    # Normalize results with first coefficient
    fit_results[i].x = fit_results[i].x / fit_results[i].x[0]
    
    # Add dictionary of coefficients 
    fit_results[i].coeffs  = {fit_formulae[i][1:][j]: fit_results[i].x[j] for j in range(0, len(fit_formulae[i])-1)}
    
    # Add dictionary of standard errors 
    V = convert_hessian_to_cov(Y, X, fit_results[i])
    fit_results[i].stderrs = {fit_formulae[i][1:][j]: np.sqrt(V[j,j]) for j in range(0, len(fit_formulae[i])-1)}
    
print('Complete')  

Regressing with fit 0...
Regressing with fit 1...
Regressing with fit 2...
Regressing with fit 3...
Complete


## Marginal Effects

In [842]:
# marginal effects for each var with others at percentiles

delta = .01

for i in range(0, len(fit_formulae)):
    
    data = data_df[fit_formulae[i]]
    X = np.matrix(data)[:, 1:]
    Y = np.matrix(data)[:, 0]
    
    fit_results[i].marginal_effects = {}
    
    for j in range(0, len(fit_results[i].x)):  
        
        temp_dict = {}
        
        for percentile in range(20, 81, 20):

            X_percentile = np.percentile(X, percentile, axis = 0)
            
            temp_dict[percentile] = compute_marginal_effect(Y, X, j, np.matrix(X_percentile), np.matrix(fit_results[i].x).T, delta = delta)
            
        fit_results[i].marginal_effects[fit_formulae[i][j+1]] = temp_dict

# Export

In [843]:
from openpyxl import Workbook, load_workbook, utils
from openpyxl.styles import Alignment, Font
import string

In [844]:
def get_sig_stars(coeff, stderr, p_value_labels):
    t_stat = coeff/stderr
    p_val  = 1 - norm.cdf(np.abs(t_stat))
    below_ind = np.where([p_val < x for x in p_value_labels.keys()])[0]
    below_vals = [list(p_value_labels.keys())[i] for i in below_ind]
    if not below_vals:
        return ''
    else:
        min_p_val = np.min(below_vals)
        return p_value_labels[min_p_val]

# Params
p_value_labels = {0.05: '*', 0.01: '**', 0.001: '***'}
workbook_file_loc = '../analysis/results/semiparametric_regressions.xlsx'

In [845]:
## Open workbook
wb = Workbook()

## Coefficient Results

In [846]:
ws = wb.create_sheet(title = 'Coefficient Results')
ws.column_dimensions["B"].width = 30

In [847]:
## Label regressors 

# Get regressors besides for market share
fit_regressors = sorted(list(set(sum(fit_formulae, []))-set(['MktShare'])))
fit_regressors

regressor_cells = {}

# First label's row
regressor_label_row = 4

for i in range(0, len(fit_regressors)):

    cell = 'B' + str(regressor_label_row)
   
    ws[cell] = fit_regressors[i]
    ws[cell].alignment = Alignment(horizontal = 'right')
    ws[cell].font = Font(bold = True)
    
    regressor_cells[fit_regressors[i]] = regressor_label_row
    regressor_label_row += 2

In [848]:
## Label regressand
start_cell = 'C2'
end_cell   = string.ascii_uppercase[2*len(fit_results)] + '2'

ws.merge_cells(start_cell + ':' + end_cell)

ws[start_cell] = 'MktShare'
ws[start_cell].alignment = Alignment(horizontal = 'center')
ws[start_cell].font = Font(bold = True)

In [849]:
## Label fits
for i in range(0, len(fit_results)):
    
    cell_row = 3
    cell_col = string.ascii_uppercase[2*i + 2]
    cell = cell_col + str(cell_row)
    
    ws[cell] = 'Fit ' + str(i+1)
    ws[cell].alignment = Alignment(horizontal = 'center')
    ws[cell].font = Font(underline = 'single')
    
    # adjust cell widths
    ws.column_dimensions[cell_col].width = 15
    ws.column_dimensions[string.ascii_uppercase[2*i + 3]].width = 5

In [850]:
## Enter results
for i in range(0, len(fit_results)):
    
    fit_column = string.ascii_uppercase[2*i + 2]
    
    for regressor in fit_results[i].coeffs.keys():
        
        coeff = fit_results[i].coeffs[regressor]
        stderr = fit_results[i].stderrs[regressor]
        
        regressor_label_row = regressor_cells[regressor]
        cell = fit_column + str(regressor_label_row)
        cell_below = fit_column + str(regressor_label_row + 1)
        
        if coeff == 1: 
            coeff = str(np.round(coeff, decimals = 4))
        else:
            coeff = str(np.round(coeff, decimals = 4)) + get_sig_stars(coeff, stderr, p_value_labels)
        
        ws[cell] = coeff
        ws[cell].alignment = Alignment(horizontal = 'center')
        
        ws[cell_below] = stderr
        ws[cell_below].alignment = Alignment(horizontal = 'center')

## Marginal Effects

In [851]:
# Set up table
for i in range(0, len(fit_results)):
    
    ws = wb.create_sheet(title = 'Fit ' + str(i+1) +' Marginal Effects')
    ws.column_dimensions["B"].width = 30
    
    fit_i_regressors = list(fit_results[i].marginal_effects.keys())
    percentiles = sorted(list((next(iter(fit_results[i].marginal_effects.values()))).keys()))
    
    ## Label regressors
    
    # First label's row
    regressor_label_row = 4
    
    regressor_cells = {}

    for j in range(0, len(fit_i_regressors)):

        cell = 'B' + str(regressor_label_row)

        ws[cell] = fit_i_regressors[j]
        ws[cell].alignment = Alignment(horizontal = 'right')
        ws[cell].font = Font(bold = True)

        regressor_cells[fit_i_regressors[j]] = regressor_label_row
        regressor_label_row += 2
        
    ## Table title label
    start_cell = 'C2'
    end_cell   = string.ascii_uppercase[2*len(percentiles)] + '2'

    ws.merge_cells(start_cell + ':' + end_cell)

    ws[start_cell] = 'Marginal Effects (MktShare)'
    ws[start_cell].alignment = Alignment(horizontal = 'center')
    ws[start_cell].font = Font(bold = True)
    
    ## Label percentiles
    for j in range(0, len(percentiles)):
        
        cell_row = 3
        cell_col = string.ascii_uppercase[2*j + 2]
        cell = cell_col + str(cell_row)

        ws[cell] = 'Pct ' + str(percentiles[j])
        ws[cell].alignment = Alignment(horizontal = 'center')
        ws[cell].font = Font(underline = 'single')
        
        # adjust cell widths
        ws.column_dimensions[cell_col].width = 15
        ws.column_dimensions[string.ascii_uppercase[2*j + 3]].width = 5

    ## Enter results
    for regressor in fit_results[i].marginal_effects.keys():
    
        me_pct_dict = fit_results[i].marginal_effects[regressor]
        
        for k in range(0, len(percentiles)):

            pct_col = string.ascii_uppercase[2*k + 2]    
            cell = pct_col + str(regressor_cells[regressor])

            ws[cell] = np.round(fit_results[i].marginal_effects[regressor][percentiles[k]], decimals = 6)
            ws[cell].alignment = Alignment(horizontal = 'center')

## Save and Close

In [852]:
del wb['Sheet']
wb.save(workbook_file_loc)
wb.close()