In [33]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
import statsmodels.api as sm
from scipy import stats
import matplotlib.pyplot as plt

## Top-bottom Data 

In [34]:
def genData(df_d, df_h):

    df_d["is_honolulu"] = 0
    df_d["is_high_cost"] = (df_d["session.config.discount_b"] == 0.019)
    df_d["is_5_bidder"] = df_d["session.config.name"].str.contains("5_")
    df_d["mid_paydiff"] = df_d.apply(
        lambda x: np.median(
            df_d[(df_d["is_high_cost"] == x["is_high_cost"]) & (df_d["is_5_bidder"] == x["is_5_bidder"])]["paydiff_pct"].unique()
        ), 
        axis=1,
    )
    df_d["is_top"] = (df_d["paydiff_pct"] > df_d["mid_paydiff"])

    df_h["is_honolulu"] = 1
    df_h["is_high_cost"] = (df_h["session.config.discount_b"] == 0.019)
    df_h["is_5_bidder"] = df_h["session.config.name"].str.contains("5_")
    df_h["mid_paydiff"] = df_h.apply(
        lambda x: np.median(
            df_h[(df_h["is_high_cost"] == x["is_high_cost"]) & (df_h["is_5_bidder"] == x["is_5_bidder"])]["paydiff_pct"].unique()
        ), 
        axis=1,
    )
    df_h["is_top"] = (df_h["paydiff_pct"] > df_h["mid_paydiff"])

    # for dutch auction bids
    cols = [
    "group.dutch_final_price", "predict.player.bid",
    "is_high_cost", "is_5_bidder", "is_top",
    "session.code",
    ]
    droplist = [] # ['4uwxoyym', 'twmqn1o6', '3yrhqskd', 'wgrvj2qu']
    df_dbid = df_d[(df_d["player.is_dutch_winner"] == 1) & (~df_d["participant.code"].isin(droplist))][cols]

    # for dutch stage bids
    cols = [
    "group.dutch_final_price", "predict.player.optimal_dutch_bid",
    "is_high_cost", "is_5_bidder", "is_top",
    "session.code"
    ]
    droplist = [] # ['ece1g596', '4uwxoyym', 'co853jdf', 'ovsmhled']
    df_dstagebid = df_h[(df_h["player.is_dutch_winner"] == 1) & (~df_h["participant.code"].isin(droplist))][cols]

    return df_dbid, df_dstagebid

In [35]:
df_d = pd.read_csv("Dutch_paydiff_norm_zerocost.csv")
df_h = pd.read_csv("Honolulu_paydiff_norm_zerocost.csv")
df_dbid, df_dstagebid = genData(df_d, df_h)

In [36]:
def regDbid(df):
    cols = [
        "is_top",
    ]
    y = (df["group.dutch_final_price"] - df["predict.player.bid"]).astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

def regDstagebid(df):
    cols = [
        "is_top",
    ]
    y = (df["group.dutch_final_price"] - df["predict.player.optimal_dutch_bid"]).astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

In [37]:
def sampleReg(df, type):

    # resampling data on the session level (the block) for each treatment
    sessions = df["session.code"].unique()
    sessions_sample = pd.DataFrame({"session.code" : np.random.choice(sessions, size=sessions.size, replace=True)})
    df_sample = sessions_sample.merge(df, how="left", on="session.code")

    model = eval("reg%s(df_sample)" % type)

    return model

In [38]:
def baselineReg(df, is_5_bidder, is_high_cost, type):
    
    tmp = df[(df["is_5_bidder"] == is_5_bidder) & (df["is_high_cost"] == is_high_cost)]

    model = eval("reg%s(tmp)" % type)

    return model

In [39]:
def bootstrap(df, is_5_bidder, is_high_cost, type, rep):
    
    tmp = df[(df["is_5_bidder"] == is_5_bidder) & (df["is_high_cost"] == is_high_cost)]
    cols = ["const", "coef"]
    bs_predictions = pd.DataFrame(columns=cols)
    for i in range(rep):
        # constant is on the first column !!!
        model = sampleReg(tmp, type)
        bs_predictions.loc[len(bs_predictions)] = model.fit().params.tolist()
        
    return bs_predictions

In [40]:
def bootstrapT(df, is_5_bidder, is_high_cost, type, rep):

    # baseline test statistic -- theta_base
    model_base = baselineReg(df, is_5_bidder, is_high_cost, type)
    df_t = model_base.fit().df_resid

    # bootstrap test statistics -- theta_bs
    bs_predictions = bootstrap(df, is_5_bidder, is_high_cost, type, rep)

    # bootstrap standard error, p-value
    theta_base = model_base.fit().params["is_top"]
    theta_bs = bs_predictions["coef"]
    se_bs = np.std(theta_bs, ddof=1)
    print([theta_base, se_bs, stats.t.sf(abs(theta_base) / se_bs, df=df_t) * 2])

### Dutch auction bids

2H

In [41]:
np.random.seed(1)
bootstrapT(df_dbid, is_5_bidder=0, is_high_cost=1, type="Dbid", rep=1000)

[-3.596100948368835, 1.8535943435019555, 0.05316969546177638]


2L

In [42]:
np.random.seed(1)
bootstrapT(df_dbid, is_5_bidder=0, is_high_cost=0, type="Dbid", rep=1000)

[-3.3261270467108925, 1.4955581465064067, 0.026812440623370428]


5H

In [43]:
np.random.seed(1)
bootstrapT(df_dbid, is_5_bidder=1, is_high_cost=1, type="Dbid", rep=1000)

[-1.83654270016818, 0.5014717469994258, 0.00031707488965900595]


5L

In [44]:
np.random.seed(1)
bootstrapT(df_dbid, is_5_bidder=1, is_high_cost=0, type="Dbid", rep=1000)

[-6.212659411316322, 0.9738994300186226, 1.2767564097253191e-09]


### Dutch stage bids

2H

In [45]:
np.random.seed(1)
bootstrapT(df_dstagebid, is_5_bidder=0, is_high_cost=1, type="Dstagebid", rep=1000)

[2.459872387155345, 0.9363472081663206, 0.009011200581033923]


2L

In [46]:
np.random.seed(1)
bootstrapT(df_dstagebid, is_5_bidder=0, is_high_cost=0, type="Dstagebid", rep=1000)

[0.08203985742943265, 1.4138587693143405, 0.953765583442802]


5H

In [47]:
np.random.seed(1)
bootstrapT(df_dstagebid, is_5_bidder=1, is_high_cost=1, type="Dstagebid", rep=1000)

[0.9169817285057028, 2.531794966539822, 0.7175804432537387]


5L

In [48]:
np.random.seed(1)
bootstrapT(df_dstagebid, is_5_bidder=1, is_high_cost=0, type="Dstagebid", rep=1000)

[-0.9665612998346347, 1.2375343163710908, 0.43573219675710617]


## Predicted / Not Predicted Winners Data

In [49]:
def genData(df_d, df_h):

    df_d["is_honolulu"] = 0
    df_d["is_high_cost"] = (df_d["session.config.discount_b"] == 0.019)
    df_d["is_5_bidder"] = df_d["session.config.name"].str.contains("5_")

    df_h["is_honolulu"] = 1
    df_h["is_high_cost"] = (df_h["session.config.discount_b"] == 0.019)
    df_h["is_5_bidder"] = df_h["session.config.name"].str.contains("5_")

    # for dutch auction bids
    cols = [
    "group.dutch_final_price", "predict.player.bid",
    "is_high_cost", "is_5_bidder", "predict.player.is_dutch_winner",
    "session.code",
    ]
    df_dbid = df_d[df_d["player.is_dutch_winner"] == 1][cols]

    # for dutch stage bids
    cols = [
    "group.dutch_final_price", "predict.player.optimal_dutch_bid",
    "is_high_cost", "is_5_bidder", "predict.player.is_dutch_winner",
    "session.code"
    ]
    df_dstagebid = df_h[df_h["player.is_dutch_winner"] == 1][cols]

    return df_dbid, df_dstagebid

In [50]:
df_d = pd.read_csv("Dutch_paydiff_norm_zerocost.csv")
df_h = pd.read_csv("Honolulu_paydiff_norm_zerocost.csv")
df_dbid, df_dstagebid = genData(df_d, df_h)

In [51]:
def regDstagebid(df):
    cols = [
        "predict.player.is_dutch_winner",
    ]
    y = (df["group.dutch_final_price"] - df["predict.player.optimal_dutch_bid"]).astype(float)
    x = df[cols].astype(float)

    x = sm.add_constant(x) # constant is added to the first column !!!
    model = sm.OLS(y, x, missing="raise")

    return model

In [52]:
def sampleReg(df, type):

    # resampling data on the session level (the block) for each treatment
    sessions = df["session.code"].unique()
    sessions_sample = pd.DataFrame({"session.code" : np.random.choice(sessions, size=sessions.size, replace=True)})
    df_sample = sessions_sample.merge(df, how="left", on="session.code")

    model = eval("reg%s(df_sample)" % type)

    return model

In [53]:
def baselineReg(df, is_5_bidder, is_high_cost, type):
    
    tmp = df[(df["is_5_bidder"] == is_5_bidder) & (df["is_high_cost"] == is_high_cost)]

    model = eval("reg%s(tmp)" % type)

    return model

In [54]:
def bootstrap(df, is_5_bidder, is_high_cost, type, rep):
    
    tmp = df[(df["is_5_bidder"] == is_5_bidder) & (df["is_high_cost"] == is_high_cost)]
    cols = ["const", "coef"]
    bs_predictions = pd.DataFrame(columns=cols)
    for i in range(rep):
        # constant is on the first column !!!
        model = sampleReg(tmp, type)
        bs_predictions.loc[len(bs_predictions)] = model.fit().params.tolist()
        
    return bs_predictions

In [57]:
def bootstrapT(df, is_5_bidder, is_high_cost, type, rep):

    # baseline test statistic -- theta_base
    model_base = baselineReg(df, is_5_bidder, is_high_cost, type)
    df_t = model_base.fit().df_resid

    # bootstrap test statistics -- theta_bs
    bs_predictions = bootstrap(df, is_5_bidder, is_high_cost, type, rep)

    # bootstrap standard error, p-value
    theta_base = model_base.fit().params["predict.player.is_dutch_winner"]
    theta_bs = bs_predictions["coef"]
    se_bs = np.std(theta_bs, ddof=1)
    print([theta_base, se_bs, stats.t.sf(abs(theta_base) / se_bs, df=df_t) * 2])

### Dutch stage bids

2H

In [58]:
np.random.seed(1)
bootstrapT(df_dstagebid, is_5_bidder=0, is_high_cost=1, type="Dstagebid", rep=1000)

[-5.708956389540005, 0.7362698532598597, 1.1019886091295812e-13]


2L

In [61]:
np.random.seed(1)
bootstrapT(df_dstagebid, is_5_bidder=0, is_high_cost=0, type="Dstagebid", rep=1000)

[-3.390902753238714, 0.399450292129194, 8.672204498564953e-16]


5H

In [62]:
np.random.seed(1)
bootstrapT(df_dstagebid, is_5_bidder=1, is_high_cost=1, type="Dstagebid", rep=1000)

[-3.384085145296054, 0.9598858042624803, 0.0005198261683893278]


5L

In [63]:
np.random.seed(1)
bootstrapT(df_dstagebid, is_5_bidder=1, is_high_cost=0, type="Dstagebid", rep=1000)

[-5.443556912537044, 0.6410084325115663, 5.2473418880818155e-15]
