In [223]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy import stats
import matplotlib.pyplot as plt

In [224]:
# generate data tables from the full data
def calcEffi(df, x):
    tmp = df[(df["session.code"] == x["session.code"]) & (df["subsession.round_number"] == x["subsession.round_number"]) & (df["group.id_in_subsession"] == x["group.id_in_subsession"])]
    buyer_value = 0
    for index, row in tmp.iterrows():
        if row["player.is_final_winner"]:
            buyer_value = row["player.item_value"]
    return buyer_value / np.max(tmp["player.item_value"]) * 100

def genData(df_d, df_h):

    df_d["is_honolulu"] = 0
    df_d["high_cost"] = (df_d["session.config.discount_b"] == 0.019)
    df_d["high_cost_honolulu"] = df_d["high_cost"] * df_d["is_honolulu"]
    df_d["is_5_bidder"] = df_d["session.config.name"].str.contains("5_")
    df_d["5_bidder_honolulu"] = df_d["is_5_bidder"] * df_d["is_honolulu"]
    df_d["round_honolulu"] = df_d["subsession.round_number"] * df_d["is_honolulu"]
    df_d["round_squared"] = df_d["subsession.round_number"] ** 2
    df_d["time_elapsed"] = df_d["group.dutch_time_elapsed"]
    df_d["final_price"] = df_d["group.dutch_final_price"]
    df_d["effi"] = df_d.apply(lambda x: calcEffi(df_d, x), axis=1)

    df_h["is_honolulu"] = 1
    df_h["high_cost"] = (df_h["session.config.discount_b"] == 0.019)
    df_h["high_cost_honolulu"] = df_h["high_cost"] * df_h["is_honolulu"]
    df_h["is_5_bidder"] = df_h["session.config.name"].str.contains("5_")
    df_h["5_bidder_honolulu"] = df_h["is_5_bidder"] * df_h["is_honolulu"]
    df_h["round_honolulu"] = df_h["subsession.round_number"] * df_h["is_honolulu"]
    df_h["round_squared"] = df_h["subsession.round_number"] ** 2
    df_h["time_elapsed"] = df_h["group.dutch_time_elapsed"] + df_h["group.english_time_elapsed"]
    df_h["final_price"] = df_h["group.final_price"]
    df_h["effi"] = df_h.apply(lambda x: calcEffi(df_h, x), axis=1)

    # for auctineer utility
    cols = [
    "group.auctioneer_utility", 
    "is_honolulu", "high_cost", "high_cost_honolulu", "is_5_bidder", "5_bidder_honolulu",
    "subsession.round_number", "round_honolulu", "round_squared", 
    "group.id_in_subsession", "session.code", # use these variables to determine one observation and drop duplicates
    ]
    df_ua = pd.concat([df_d[cols], df_h[cols]], ignore_index=True)
    df_ua = df_ua.drop_duplicates()

    # for bidder utility
    cols = [
    "player.item_value", "final_price", "time_elapsed", # use these variables to compute accurate bidder utility
    "is_honolulu", "high_cost", "high_cost_honolulu", "is_5_bidder", "5_bidder_honolulu", 
    "subsession.round_number", "round_honolulu", "round_squared", 
    "group.id_in_subsession", "participant.code", "player.is_final_winner", "session.code" # use these variables to determine one observation and drop duplicates
    ]
    df_ub = pd.concat([df_d[cols], df_h[cols]], ignore_index=True)
    df_ub = df_ub[df_ub["player.is_final_winner"] == 1]
    df_ub = df_ub.drop(["player.is_final_winner"], axis=1)

    # for auction duration
    cols = [
    "time_elapsed", 
    "is_honolulu", "high_cost", "high_cost_honolulu", "is_5_bidder", "5_bidder_honolulu", 
    "subsession.round_number", "round_honolulu", "round_squared", 
    "group.id_in_subsession", "session.code", # use these variables to determine one observation and drop duplicates
    ]
    df_dura = pd.concat([df_d[cols], df_h[cols]], ignore_index=True)
    df_dura = df_dura.drop_duplicates()

    # for selling price
    cols = [
    "final_price", # use these variables to compute accurate bidder utility
    "is_honolulu", "high_cost", "high_cost_honolulu", "is_5_bidder", "5_bidder_honolulu", 
    "subsession.round_number", "round_honolulu", "round_squared", 
    "group.id_in_subsession", "session.code" # use these variables to determine one observation and drop duplicates
    ]
    df_sell = pd.concat([df_d[cols], df_h[cols]], ignore_index=True)
    df_sell = df_sell.drop_duplicates()

    # for efficiency
    cols = [
    "effi",
    "is_honolulu", "high_cost", "high_cost_honolulu", "is_5_bidder", "5_bidder_honolulu", 
    "subsession.round_number", "round_honolulu", "round_squared", 
    "group.id_in_subsession", "session.code" # use these variables to determine one observation and drop duplicates
    ]
    df_effi = pd.concat([df_d[cols], df_h[cols]], ignore_index=True)
    df_effi = df_effi.drop_duplicates()

    return df_ua, df_ub, df_dura, df_sell, df_effi

In [225]:
df_d = pd.read_csv("Dutch_paydiff_norm.csv")
df_h = pd.read_csv("Honolulu_paydiff_norm.csv")
df_ua, df_ub, df_dura, df_sell, df_effi = genData(df_d, df_h)

## Functions

### Regression Models

In [226]:
# regression for auctioneer utility y ~ const + is_honolulu + high_cost + honolulu_high_cost
def regUa(df):

    cols = [
        "is_honolulu", "high_cost", "high_cost_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["group.auctioneer_utility"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

# regression for auctioneer utility y ~ const + is_honolulu + is_5_bidder + 5_bidder_honolulu
def regUa2(df):

    cols = [
        "is_honolulu", "is_5_bidder", "5_bidder_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["group.auctioneer_utility"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

In [227]:
def discount(value, bid, t, cost):
    if cost:
        b = 0.019
    else:
        b = 0.009
    if value >= bid:
        return (value - bid) * (1 - b * t)
    else:
        return (value - bid) * (1 + b * t)

# regression for bidder utility y ~ const + is_honolulu + high_cost + honolulu_high_cost
def regUb(df):

    cols = [
        "is_honolulu", "high_cost", "high_cost_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df.apply(lambda x: discount(x["player.item_value"], x["final_price"], x["time_elapsed"], x["high_cost"]), axis=1).astype(float)
    x = df[cols].astype(float)
    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

# regression for bidder utility y ~ const + is_honolulu + is_5_bidder + 5_bidder_honolulu
def regUb2(df):

    cols = [
        "is_honolulu", "is_5_bidder", "5_bidder_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df.apply(lambda x: discount(x["player.item_value"], x["final_price"], x["time_elapsed"], x["high_cost"]), axis=1).astype(float)
    x = df[cols].astype(float)
    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

In [228]:
# regression for auction duration y ~ const + is_honolulu + high_cost + honolulu_high_cost
def regDura(df):

    cols = [
        "is_honolulu", "high_cost", "high_cost_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["time_elapsed"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

# regression for auction duration y ~ const + is_honolulu + is_5_bidder + 5_bidder_honolulu
def regDura2(df):

    cols = [
        "is_honolulu", "is_5_bidder", "5_bidder_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["time_elapsed"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

In [229]:
# regression for selling price y ~ const + is_honolulu + high_cost + honolulu_high_cost
def regSell(df):

    cols = [
        "is_honolulu", "high_cost", "high_cost_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["final_price"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

# regression for selling price y ~ const + is_honolulu + is_5_bidder + 5_bidder_honolulu
def regSell2(df):

    cols = [
        "is_honolulu", "is_5_bidder", "5_bidder_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["final_price"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

In [230]:
# regression for efficiency y ~ const + is_honolulu + high_cost + honolulu_high_cost
def regEffi(df):

    cols = [
        "is_honolulu", "high_cost", "high_cost_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["effi"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

# regression for efficiency y ~ const + is_honolulu + is_5_bidder + 5_bidder_honolulu
def regEffi2(df):

    cols = [
        "is_honolulu", "is_5_bidder", "5_bidder_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["effi"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

### Resampling by Treatments and Blocked Bootstrapping

In [231]:
# generate a blocked bootstrap sample regression
def sampleReg(df, is_test_cost, is_test_high, type):

    # data for each treatment
    df2h = df[(df["is_5_bidder"] == 0) & (df["high_cost"] == 1)]
    df2l = df[(df["is_5_bidder"] == 0) & (df["high_cost"] == 0)]
    df5h = df[(df["is_5_bidder"] == 1) & (df["high_cost"] == 1)]
    df5l = df[(df["is_5_bidder"] == 1) & (df["high_cost"] == 0)]
    
    # test the difference between high/low cost or between high/low number of bidders
    if is_test_cost:
        # test for high or low number of bidders when testing the cost, or test for high or low cost when testing the number of bidders
        if is_test_high:
            df1 = df5h
            df2 = df5l
        else:
            df1 = df2h
            df2 = df2l
    else:
        if is_test_high:
            df1 = df5h
            df2 = df2h
        else:
            df1 = df5l
            df2 = df2l

    # resampling data on the session level (the block) for each treatment
    sessions1 = df1["session.code"].unique()
    sessions1_sample = pd.DataFrame({"session.code" : np.random.choice(sessions1, size=sessions1.size, replace=True)})
    df_sample1 = sessions1_sample.merge(df1, how="left", on="session.code")
    sessions2 = df2["session.code"].unique()
    sessions2_sample = pd.DataFrame({"session.code" : np.random.choice(sessions2, size=sessions2.size, replace=True)})
    df_sample2 = sessions2_sample.merge(df2, how="left", on="session.code")
    df_sample = pd.concat([df_sample1, df_sample2])

    if is_test_cost: # reg cost
        if is_test_high: # for high(5) number of bidders
            model = eval("reg%s(df_sample)" % type)
        else: # for low(2) number of bidders
            model = eval("reg%s(df_sample)" % type)
    else: # reg number of bidders
        if is_test_high: # for high cost
            model = eval("reg%s2(df_sample)" % type)
        else: # for low cost
            model = eval("reg%s2(df_sample)" % type)
    
    return model

In [232]:
def baselineReg(df, is_test_cost, is_test_high, type):
    if is_test_cost: # reg cost
        if is_test_high: # for high(5) number of bidders
            model = eval("reg%s(df[df[\"is_5_bidder\"] == 1])" % type)
        else: # for low(2) number of bidders
            model = eval("reg%s(df[df[\"is_5_bidder\"] == 0])" % type)
    else: # reg number of bidders
        if is_test_high: # for high cost
            model = eval("reg%s2(df[df[\"high_cost\"] == 1])" % type)
        else: # for low cost
            model = eval("reg%s2(df[df[\"high_cost\"] == 0])" % type)
    
    return model


In [233]:
# bootstrap regression for `rep` times
def bootstrap(df, is_test_cost, is_test_high, type, rep):

    cols = ["hh", "hl", "dh", "dl"]
    bs_predictions = pd.DataFrame(columns=cols)
    for i in range(rep):
        # predictions for honolulu high/low, dutch high/low
        # constant is on the first column !!!
        model = sampleReg(df, is_test_cost, is_test_high, type)
        hh = model.fit().predict([1, 1, 1, 1]) # honolulu high cost or honolulu 5-bidder
        hl = model.fit().predict([1, 1, 0, 0]) # honolulu low cost or honolulu 2-bidder
        dh = model.fit().predict([1, 0, 1, 0]) # dutch high cost or dutch 5-bidder
        dl = model.fit().predict([1, 0, 0, 0]) # dutch low cost or dutch 2-bidder
        bs_predictions.loc[len(bs_predictions)] = [hh, hl, dh, dl]
        
    return bs_predictions

In [234]:
# bootstrap t test for complex predictions
def bootstrapT(df, is_test_cost, is_test_high, type, rep):

    # baseline test statistic -- theta_base
    model_base = baselineReg(df, is_test_cost, is_test_high, type)
    hh = model_base.fit().predict([1, 1, 1, 1])[0] # honolulu high cost or honolulu 5-bidder
    hl = model_base.fit().predict([1, 1, 0, 0])[0] # honolulu low cost or honolulu 2-bidder
    dh = model_base.fit().predict([1, 0, 1, 0])[0] # dutch high cost or dutch 5-bidder
    dl = model_base.fit().predict([1, 0, 0, 0])[0] # dutch low cost or dutch 2-bidder
    df_t = model_base.fit().df_resid
    
    # bootstrap test statistics -- theta_bs
    bs_predictions = bootstrap(df, is_test_cost, is_test_high, type, rep)
    
    # bootstrap standard error, p-value
    if (type == "Sell") and (is_test_cost == 0):
        theta_base1 = hh / dh
        theta_bs1 = bs_predictions["hh"] / bs_predictions["dh"]
        se_bs1 = np.std(theta_bs1, ddof=1)
        print("5-bidder")
        if is_test_high:
            # print(np.sum(abs(theta_bs1 - theta_base1) > abs(theta_base1 - 0.988)) / len(theta_bs1))
            print([theta_base1 - 0.988, se_bs1, stats.t.sf(abs(theta_base1 - 0.988) / se_bs1, df=df_t) * 2]) # test for H5/D5 = 0.988, high cost
        else:
            # print(np.sum(abs(theta_bs1 - theta_base1) > abs(theta_base1 - 0.991)) / len(theta_bs1))
            print([theta_base1 - 0.991, se_bs1, stats.t.sf(abs(theta_base1 - 0.991) / se_bs1, df=df_t) * 2]) # test for H5/D5 = 0.981, low cost

        theta_base2 = hl / dl
        theta_bs2 = bs_predictions["hl"] / bs_predictions["dl"]
        se_bs2 = np.std(theta_bs2, ddof=1)
        print("2-bidder")
        if is_test_high:
            # print(np.sum(abs(theta_bs2 - theta_base2) > abs(theta_base2 - 0.917)) / len(theta_bs2)) 
            print([theta_base2 - 0.917, se_bs2, stats.t.sf(abs(theta_base2 - 0.917) / se_bs2, df=df_t) * 2]) # test for H2/D2 = 0.917, high cost
        else:
            # print(np.sum(abs(theta_bs2 - theta_base2) > abs(theta_base2 - 1.011)) / len(theta_bs2))
            print([theta_base2 - 1.011, se_bs2, stats.t.sf(abs(theta_base2 - 1.011) / se_bs2, df=df_t) * 2]) # test for H2/D2 = 1.011, low cost
        

    else:
        if (type == "Ub") and (is_test_cost == 0):
            theta_base = hl / dl - hh / dh
            theta_bs = bs_predictions["hl"] / bs_predictions["dl"] - bs_predictions["hh"] / bs_predictions["dh"]
        else:
            theta_base = hh / dh - hl / dl
            theta_bs = bs_predictions["hh"] / bs_predictions["dh"] - bs_predictions["hl"] / bs_predictions["dl"]
        # print(np.sum((theta_bs > 2 * theta_base)) / len(theta_bs))
        se_bs = np.std(theta_bs, ddof=1)
        print([theta_base, se_bs, stats.t.sf(theta_base / se_bs, df=df_t)])

In [262]:
# bootstrap standard error for simple predictions
def bootstrapT2(df, is_test_cost, is_test_high, order, equal, type, rep):
    
    # baseline test statistic -- theta_base
    model_base = baselineReg(df, is_test_cost, is_test_high, type)
    hh = model_base.fit().predict([1, 1, 1, 1])[0] # honolulu high cost or honolulu 5-bidder
    hl = model_base.fit().predict([1, 1, 0, 0])[0] # honolulu low cost or honolulu 2-bidder
    dh = model_base.fit().predict([1, 0, 1, 0])[0] # dutch high cost or dutch 5-bidder
    dl = model_base.fit().predict([1, 0, 0, 0])[0] # dutch low cost or dutch 2-bidder
    df_t = model_base.fit().df_resid
    
    # bootstrap test statistics -- theta_bs
    bs_predictions = bootstrap(df, is_test_cost, is_test_high, type, rep)

    # bootstrap standard error, p-value
    if order == "HDh":
        theta_base = hh - dh
        theta_bs = bs_predictions["hh"] - bs_predictions["dh"]
    elif order == "HDl":
        theta_base = hl - dl
        theta_bs = bs_predictions["hl"] - bs_predictions["dl"]
    elif order == "DHh":
        theta_base = dh - hh
        theta_bs = bs_predictions["dh"] - bs_predictions["hh"]
    elif order == "DHl":
        theta_base = dl - hl
        theta_bs = bs_predictions["dl"] - bs_predictions["hl"]
    se_bs = np.std(theta_bs, ddof=1)
    if equal:
        print([theta_base, se_bs, stats.t.sf(abs(theta_base) / se_bs, df=df_t) * 2])
    else:
        print([theta_base, se_bs, stats.t.sf(theta_base / se_bs, df=df_t)])

### Pooled Resampling and Bootstrapping for Efficiency

In [236]:
def regEffiPool(df):
    cols = [
        "is_honolulu", 
        # "subsession.round_number", "round_honolulu", "round_squared",
    ]
    y = df["effi"].astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

In [237]:
def sampleRegPool(df, type):

    # resampling data on the session level (the block) for each treatment
    sessions = df["session.code"].unique()
    sessions_sample = pd.DataFrame({"session.code" : np.random.choice(sessions, size=sessions.size, replace=True)})
    df_sample = sessions_sample.merge(df, how="left", on="session.code")

    model = eval("reg%sPool(df_sample)" % type)
    
    return model

In [238]:
def baselineRegPool(df, type):
    
    model = eval("reg%sPool(df)" % type)
    
    return model

In [239]:
def bootstrapPool(df, type, rep):

    cols = ["h", "d"]
    bs_predictions = pd.DataFrame(columns=cols)
    for i in range(rep):
        # predictions for honolulu, dutch
        # constant is on the first column !!!
        model = sampleRegPool(df, type)
        h = model.fit().predict([1, 1]) # honolulu
        d = model.fit().predict([1, 0]) # dutch
        bs_predictions.loc[len(bs_predictions)] = [h, d]
        
    return bs_predictions

In [240]:
def bootstrapPoolT(df, type, rep):
    
    # baseline test statistic -- theta_base
    model_base = baselineRegPool(df, type)
    h = model_base.fit().predict([1, 1])[0] # honolulu
    d = model_base.fit().predict([1, 0])[0] # dutch
    df_t = model_base.fit().df_resid

    # bootstrap test statistic -- theta_bs
    bs_predictions = bootstrapPool(df, type, rep)

    # bootstrap standard error, p-value
    theta_base = h - d
    theta_bs = bs_predictions["h"] - bs_predictions["d"]
    se_bs = np.std(theta_bs, ddof=1)
    print([theta_base, se_bs, stats.t.sf(theta_base / se_bs, df=df_t) * 2])   

## Baseline Regression and Prediction

### Auctioneer utility

reg cost, 2 bidders

In [84]:
model = baselineReg(df_ua, is_test_cost=1, is_test_high=0, type="Ua").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                               OLS Regression Results                               
Dep. Variable:     group.auctioneer_utility   R-squared:                       0.029
Model:                                  OLS   Adj. R-squared:                  0.027
Method:                       Least Squares   F-statistic:                     13.91
Date:                      Sat, 30 Sep 2023   Prob (F-statistic):           6.10e-09
Time:                              23:05:42   Log-Likelihood:                -4757.6
No. Observations:                      1404   AIC:                             9523.
Df Residuals:                          1400   BIC:                             9544.
Df Model:                                 3                                         
Covariance Type:                  nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------

reg cost, 5 bidders

In [261]:
model = baselineReg(df_ua, is_test_cost=1, is_test_high=1, type="Ua").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                               OLS Regression Results                               
Dep. Variable:     group.auctioneer_utility   R-squared:                       0.141
Model:                                  OLS   Adj. R-squared:                  0.138
Method:                       Least Squares   F-statistic:                     44.14
Date:                      Thu, 05 Oct 2023   Prob (F-statistic):           2.02e-26
Time:                              01:40:04   Log-Likelihood:                -2853.9
No. Observations:                       812   AIC:                             5716.
Df Residuals:                           808   BIC:                             5735.
Df Model:                                 3                                         
Covariance Type:                  nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------

### Bidder utility

reg cost, 2 bidders

In [86]:
model = baselineReg(df_ub, is_test_cost=1, is_test_high=0, type="Ub").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.144
Model:                            OLS   Adj. R-squared:                  0.142
Method:                 Least Squares   F-statistic:                     77.64
Date:                Sat, 30 Sep 2023   Prob (F-statistic):           1.99e-46
Time:                        23:09:20   Log-Likelihood:                -4945.6
No. Observations:                1392   AIC:                             9899.
Df Residuals:                    1388   BIC:                             9920.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                  8.0967      0

reg cost, 5 bidders

In [87]:
model = baselineReg(df_ub, is_test_cost=1, is_test_high=1, type="Ub").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.106
Model:                            OLS   Adj. R-squared:                  0.103
Method:                 Least Squares   F-statistic:                     32.06
Date:                Sat, 30 Sep 2023   Prob (F-statistic):           1.37e-19
Time:                        23:11:01   Log-Likelihood:                -2678.4
No. Observations:                 812   AIC:                             5365.
Df Residuals:                     808   BIC:                             5384.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                  1.5018      0

reg bidders, high cost

In [89]:
model = baselineReg(df_ub, is_test_cost=0, is_test_high=1, type="Ub").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.133
Model:                            OLS   Adj. R-squared:                  0.131
Method:                 Least Squares   F-statistic:                     57.79
Date:                Sat, 30 Sep 2023   Prob (F-statistic):           9.32e-35
Time:                        23:11:55   Log-Likelihood:                -3759.1
No. Observations:                1133   AIC:                             7526.
Df Residuals:                    1129   BIC:                             7546.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 4.5438      0.35

reg bidders, low cost

In [90]:
model = baselineReg(df_ub, is_test_cost=0, is_test_high=0, type="Ub").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.199
Model:                            OLS   Adj. R-squared:                  0.196
Method:                 Least Squares   F-statistic:                     88.15
Date:                Sat, 30 Sep 2023   Prob (F-statistic):           5.85e-51
Time:                        23:12:47   Log-Likelihood:                -3853.4
No. Observations:                1071   AIC:                             7715.
Df Residuals:                    1067   BIC:                             7735.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                 8.0967      0.48

### Auction duration

reg bidders, high cost

In [93]:
model = baselineReg(df_dura, is_test_cost=0, is_test_high=1, type="Dura").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:           time_elapsed   R-squared:                       0.239
Model:                            OLS   Adj. R-squared:                  0.237
Method:                 Least Squares   F-statistic:                     118.7
Date:                Sat, 30 Sep 2023   Prob (F-statistic):           7.03e-67
Time:                        23:15:51   Log-Likelihood:                -4282.2
No. Observations:                1140   AIC:                             8572.
Df Residuals:                    1136   BIC:                             8593.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                29.8980      0.54

reg bidders, low cost

In [94]:
model = baselineReg(df_dura, is_test_cost=0, is_test_high=0, type="Dura").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:           time_elapsed   R-squared:                       0.310
Model:                            OLS   Adj. R-squared:                  0.308
Method:                 Least Squares   F-statistic:                     160.8
Date:                Sat, 30 Sep 2023   Prob (F-statistic):           4.52e-86
Time:                        23:16:28   Log-Likelihood:                -3949.0
No. Observations:                1076   AIC:                             7906.
Df Residuals:                    1072   BIC:                             7926.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                29.4875      0.51

### Selling price

reg bidders, high cost

In [102]:
model = baselineReg(df_sell, is_test_cost=0, is_test_high=1, type="Sell").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:            final_price   R-squared:                       0.421
Model:                            OLS   Adj. R-squared:                  0.420
Method:                 Least Squares   F-statistic:                     275.7
Date:                Sun, 01 Oct 2023   Prob (F-statistic):          2.06e-134
Time:                        16:53:53   Log-Likelihood:                -4153.8
No. Observations:                1140   AIC:                             8316.
Df Residuals:                    1136   BIC:                             8336.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                20.6333      0.48

reg bidders, low cost

In [103]:
model = baselineReg(df_sell, is_test_cost=0, is_test_high=0, type="Sell").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:            final_price   R-squared:                       0.444
Model:                            OLS   Adj. R-squared:                  0.442
Method:                 Least Squares   F-statistic:                     284.9
Date:                Sun, 01 Oct 2023   Prob (F-statistic):          5.91e-136
Time:                        16:55:48   Log-Likelihood:                -3940.2
No. Observations:                1076   AIC:                             7888.
Df Residuals:                    1072   BIC:                             7908.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                        coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------------
const                21.0731      0.51

### Efficiency

reg cost, 2 bidders

In [267]:
model = baselineReg(df_effi, is_test_cost=1, is_test_high=0, type="Effi").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:                   effi   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.001
Method:                 Least Squares   F-statistic:                     1.454
Date:                Thu, 05 Oct 2023   Prob (F-statistic):              0.226
Time:                        01:56:18   Log-Likelihood:                -5802.9
No. Observations:                1404   AIC:                         1.161e+04
Df Residuals:                    1400   BIC:                         1.163e+04
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                 95.4430      0

reg cost, 5 bidders

In [268]:
model = baselineReg(df_effi, is_test_cost=1, is_test_high=1, type="Effi").fit()
print(model.summary())
print("\nhh\thl\tdh\tdl\n%.2f\t%.2f\t%.2f\t%.2f" % (model.predict([1, 1, 1, 1]), model.predict([1, 1, 0, 0]), model.predict([1, 0, 1, 0]), model.predict([1, 0, 0, 0])))

                            OLS Regression Results                            
Dep. Variable:                   effi   R-squared:                       0.020
Model:                            OLS   Adj. R-squared:                  0.016
Method:                 Least Squares   F-statistic:                     5.376
Date:                Thu, 05 Oct 2023   Prob (F-statistic):            0.00115
Time:                        01:56:21   Log-Likelihood:                -2955.2
No. Observations:                 812   AIC:                             5918.
Df Residuals:                     808   BIC:                             5937.
Df Model:                           3                                         
Covariance Type:            nonrobust                                         
                         coef    std err          t      P>|t|      [0.025      0.975]
--------------------------------------------------------------------------------------
const                 95.4755      0

pooled

In [269]:
model = baselineRegPool(df_effi, type="Effi").fit()
print(model.summary())

                            OLS Regression Results                            
Dep. Variable:                   effi   R-squared:                       0.003
Model:                            OLS   Adj. R-squared:                  0.003
Method:                 Least Squares   F-statistic:                     7.392
Date:                Thu, 05 Oct 2023   Prob (F-statistic):            0.00660
Time:                        02:05:37   Log-Likelihood:                -8877.5
No. Observations:                2216   AIC:                         1.776e+04
Df Residuals:                    2214   BIC:                         1.777e+04
Df Model:                           1                                         
Covariance Type:            nonrobust                                         
                  coef    std err          t      P>|t|      [0.025      0.975]
-------------------------------------------------------------------------------
const          95.8454      0.399    239.916      

## Blocked Bootstrap

### Auctioneer utility

H/D (high cost) > H/D (low cost) for 5 bidders

In [58]:
np.random.seed(1)
bootstrapT(df_ua, is_test_cost=1, is_test_high=1, type="Ua", rep=1000)

[0.04359385888230305, 0.07952604359443904, 0.2918630028998678]


H > D (high cost) for 2 bidders, H > D (high cost) for 5 bidders

In [77]:
np.random.seed(1)
bootstrapT2(df_ua, is_test_cost=1, is_test_high=0, order="HDh", equal=False, type="Ua", rep=1000)

[-1.0923669174360686, 0.7755580755214634, 0.9203961837243062]


In [76]:
np.random.seed(1)
bootstrapT2(df_ua, is_test_cost=1, is_test_high=1, order="HDh", equal=False, type="Ua", rep=1000)

[-5.600926518328652, 1.7385471775967, 0.9993371252336474]


H > D (low cost) for 2 bidders, H > D (low cost) for 5 bidders

In [246]:
np.random.seed(1)
bootstrapT2(df_ua, is_test_cost=1, is_test_high=0, order="HDl", equal=False, type="Ua", rep=1000)

[-3.142741451485314, 0.5357300267322749, 0.9999999972230117]


In [79]:
np.random.seed(1)
bootstrapT2(df_ua, is_test_cost=1, is_test_high=1, order="HDl", equal=False, type="Ua", rep=1000)

[-7.297101809729607, 2.0499169023805477, 0.9998035233045061]


H > D (2 bidders) for high cost, H > D (5 bidders) for high cost

In [263]:
np.random.seed(1)
bootstrapT2(df_ua, is_test_cost=0, is_test_high=1, order="HDl", equal=False, type="Ua", rep=1000)

[-1.0923669174359674, 0.7632014950621325, 0.9236897184259946]


In [264]:
np.random.seed(1)
bootstrapT2(df_ua, is_test_cost=0, is_test_high=1, order="HDh", equal=False, type="Ua", rep=1000)

[-5.600926518328446, 1.7421802447543804, 0.9993291623356657]


H > D (2 bidders) for low cost, H > D (5 bidders) for low cost

In [265]:
np.random.seed(1)
bootstrapT2(df_ua, is_test_cost=0, is_test_high=0, order="HDl", equal=False, type="Ua", rep=1000)

[-3.1427414514853567, 0.5213963138239214, 0.9999999988566189]


In [266]:
np.random.seed(1)
bootstrapT2(df_ua, is_test_cost=0, is_test_high=0, order="HDh", equal=False, type="Ua", rep=1000)

[-7.297101809729593, 2.054021090991965, 0.9998009785690786]


### Bidder utility

H/D (high cost) > H/D (low cost) for 2 bidders

In [59]:
np.random.seed(1)
bootstrapT(df_ub, is_test_cost=1, is_test_high=0, type="Ub", rep=1000)

[0.3791630484891684, 0.30273041336887013, 0.10530344309985001]


H/D (5 bidders) < H/D (2 bidders) for high cost 

In [60]:
np.random.seed(1)
bootstrapT(df_ub, is_test_cost=0, is_test_high=1, type="Ub", rep=1000)

[0.81271008260383, 0.4167056725956958, 0.025692495494407896]


H/D (5 bidders) < H/D (2 bidders) for low cost (mean and variance are not consistent)

In [61]:
np.random.seed(1)
bootstrapT(df_ub, is_test_cost=0, is_test_high=0, type="Ub", rep=1000)

[-3.441220611194904, 18.520132364515376, 0.5736853509581998]


H > D (2 bidders) for high cost, H > D (5 bidders) for high cost

In [81]:
np.random.seed(1)
bootstrapT2(df_ub, is_test_cost=0, is_test_high=1, order="HDl", equal=False, type="Ub", rep=1000)

[5.145851209296156, 0.75636001455435, 8.268368745155891e-12]


In [82]:
np.random.seed(1)
bootstrapT2(df_ub, is_test_cost=0, is_test_high=1, order="HDh", equal=False, type="Ub", rep=1000)

[1.049930845543658, 0.7974207553217564, 0.09411015742512437]


H > D (2 bidders) for low cost, H > D (5 bidders) for low cost

In [83]:
np.random.seed(1)
bootstrapT2(df_ub, is_test_cost=0, is_test_high=0, order="HDl", equal=False, type="Ub", rep=1000)

[6.099483354729518, 0.6615288322368609, 7.688337578048523e-20]


In [84]:
np.random.seed(1)
bootstrapT2(df_ub, is_test_cost=0, is_test_high=0, order="HDh", equal=False, type="Ub", rep=1000)

[6.299183506026972, 1.800733430489206, 0.00024392815223056924]


### Auction duration

H/D (5 bidders) > H/D (2 bidders) for high cost

In [62]:
np.random.seed(1)
bootstrapT(df_dura, is_test_cost=0, is_test_high=1, type="Dura", rep=1000)

[0.5951095007455524, 0.20982680825445396, 0.002323259888660759]


H/D (5 bidders) > H/D (2 bidders) for low cost

In [63]:
np.random.seed(1)
bootstrapT(df_dura, is_test_cost=0, is_test_high=0, type="Dura", rep=1000)

[0.5263001055146553, 0.11863755491259295, 5.050188806842169e-06]


D > H (2 bidders) for high cost, D > H (5 bidders) for high cost

In [85]:
np.random.seed(1)
bootstrapT2(df_dura, is_test_cost=0, is_test_high=1, order="DHl", equal=False, type="Dura", rep=1000)

[9.25647981630254, 1.6490445691052467, 1.2471714339282987e-08]


In [86]:
np.random.seed(1)
bootstrapT2(df_dura, is_test_cost=0, is_test_high=1, order="DHh", equal=False, type="Dura", rep=1000)

[-4.082802934857378, 2.875434623029792, 0.9220430701621795]


D > H (2 bidders) for low cost, D > H (5 bidders) for low cost

In [87]:
np.random.seed(1)
bootstrapT2(df_dura, is_test_cost=0, is_test_high=0, order="DHl", equal=False, type="Dura", rep=1000)

[9.853151901160825, 0.9458419002971261, 1.422340383477502e-24]


In [88]:
np.random.seed(1)
bootstrapT2(df_dura, is_test_cost=0, is_test_high=0, order="DHh", equal=False, type="Dura", rep=1000)

[-2.498656942127603, 0.984898870467112, 0.9943387062050213]


### Selling price

H/D (5 bidders) = 0.988 for high cost, H/D (2 bidders) = 0.917 for high cost

In [201]:
np.random.seed(1)
bootstrapT(df_sell, is_test_cost=0, is_test_high=1, type="Sell", rep=1000)

5-bidder
[-0.06759022852639274, 0.007277401811619307, 7.774673457314394e-20]
2-bidder
[-0.09214808831447485, 0.02468091132292367, 0.00019813620649469305]


H/D (5 bidders) = 0.991 for low cost, H/D (2 bidders) = 1.011 for low cost

In [194]:
np.random.seed(1)
bootstrapT(df_sell, is_test_cost=0, is_test_high=0, type="Sell", rep=1000)

5-bidder
[-0.1338765448865954, 0.04413147246190608, 0.0024748580062545745]
2-bidder
[-0.2704699597613438, 0.04091290279617724, 6.012741561156781e-11]


### Efficiency

H = D (high cost) for 2 bidders, H = D (low cost) for 2 bidders

In [195]:
np.random.seed(1)
bootstrapT2(df_effi, is_test_cost=1, is_test_high=0, order="HDh", equal=True, type="Effi", rep=1000)

[1.9178963415900228, 0.45837509424526396, 3.0408783792573194e-05]


In [196]:
np.random.seed(1)
bootstrapT2(df_effi, is_test_cost=1, is_test_high=0, order="HDl", equal=True, type="Effi", rep=1000)

[1.3198913028827235, 0.9023783156593289, 0.14377922957424488]


H = D (high cost) for 5 bidders, H = D (low cost) for 5 bidders

In [197]:
np.random.seed(1)
bootstrapT2(df_effi, is_test_cost=1, is_test_high=1, order="HDh", equal=True, type="Effi", rep=1000)

[-0.14892516960524915, 0.6440122821830686, 0.8171823539025131]


In [198]:
np.random.seed(1)
bootstrapT2(df_effi, is_test_cost=1, is_test_high=1, order="HDl", equal=True, type="Effi", rep=1000)

[3.017039457493965, 1.752481613948663, 0.0855281553103055]


### Efficiency pooled

H = D

In [241]:
np.random.seed(1)
bootstrapPoolT(df_effi, "Effi", 1000)

[1.536020718960728, 0.5140132329613755, 0.0028362782323746853]
