In [333]:
import pandas as pd
import time

### Import raw data

In [334]:
str_date = "2022-11-04-19"
DISCOUNT_C = 0.019
DISCOUNT_C_HYPO = 0.019
P_MAX = 50
B_LOW = 0.009
B_HIGH = 0.019
df = pd.read_csv("all_apps_wide_" + str_date + ".csv")

In [335]:
def sessionnameToNum(x):
    if x == "02_D1H2":
        return 212
    elif x == "02_D2H1":
        return 221
    elif x == "05_D1H2":
        return 512
    elif x == "05_D2H1":
        return 521

def utctimeToNum(x):
    year = time.strptime(x, "%Y/%m/%d %H:%M:%S").tm_year
    month = time.strptime(x, "%Y/%m/%d %H:%M:%S").tm_mon
    day = time.strptime(x, "%Y/%m/%d %H:%M:%S").tm_mday
    hour = time.strptime(x, "%Y/%m/%d %H:%M:%S").tm_hour
    return year * 1000000 + month * 10000 + day * 100 + hour

def labelToNum(x):
    return int(x[x.find("p") + 1: ])

df["session.treatment_num"] = df["session.config.name"].map(sessionnameToNum)
df["participant.date_num"] = df["participant.time_started_utc"].map(utctimeToNum)
df["participant.label_num"] = df["participant.label"].map(labelToNum)

### Functions for roundwise data

In [336]:
def findInfo(dfrow):
    return dfrow[[
        "participant.code",
        "participant.label",
        "participant.label_num",
        "participant.payoff",
        "participant.dutch_payoff",
        "participant.honolulu_payoff",
        "participant.time_started_utc",
        "participant.date_num",
        "session.code",
        "session.treatment_num",
        "session.config.real_world_currency_per_point",
        "session.config.skip_intro_dutch",
        "session.config.discount_a",
        "session.config.start_round_dutch",
        "session.config.name",
        "session.config.num_test_rounds",
        "session.config.start_price",
        "session.config.price_tick",
        "session.config.start_round_honolulu",
        "session.config.skip_intro_honolulu",
        "session.config.num_formal_rounds",
        "session.config.discount_b",
        "session.config.participation_fee",
    ]]

def findAuction(dfrow, appname, round):
    list_name_h = [
        "subsession.is_dutch_first",
        "subsession.round_number",
        "group.id_in_subsession",
        "player.id_in_group",
        "subsession.price_start",
        "player.item_value",
        "player.is_dutch_winner",
        "group.have_dutch_winner",
        "group.dutch_time_elapsed",
        "group.dutch_final_price",
        "player.contest_status",
        "group.have_contest_winner",
        "player.is_english_winner",
        "player.english_dropout_elpased",
        "group.english_time_elapsed",
        "group.english_final_price",
        "player.is_final_winner",
        "group.final_price",
        "player.payoff",
        #"group.num_active",
        #"group.dutch_start_time",
        #"group.num_contest_no_decision",
        #"group.english_start_time",     
    ]
    list_name_d = [
        "subsession.is_dutch_first",
        "subsession.round_number",
        "group.id_in_subsession",
        "player.id_in_group",
        "subsession.price_start",
        "player.item_value",
        "player.is_dutch_winner",
        "group.have_dutch_winner",
        "group.dutch_time_elapsed",
        "group.dutch_final_price",
        "player.is_final_winner",
        "player.payoff",
        #"group.num_active",
        #"group.dutch_start_time",  
    ]
    tmp = pd.DataFrame()
    if "Honolulu" in appname:
        tmp[list_name_h] = dfrow[list(appname + "." + round + "." + i for i in list_name_h)]
    elif "Dutch" in appname:
        tmp[list_name_d] = dfrow[list(appname + "." + round + "." + i for i in list_name_d)]
    return tmp

def findSurvey(dfrow, appname):
    list_name = [
        "auc0",
        "auc1",
        "auc2",
        "auc3",
        "auc4",
        "auc5",
        "auc6",
    ]
    tmp = pd.DataFrame()
    tmp[list_name] = dfrow[list(appname + ".1.player." + i for i in list_name)]
    return tmp

In [337]:
def genRoundwiseData(df):

    df_h = pd.DataFrame()
    df_d = pd.DataFrame()
    df_surveyh = pd.DataFrame()
    df_surveyd = pd.DataFrame()

    for i in range(len(df)):
        dfrow = df.iloc[[i]]
        if "2_" in dfrow["session.config.name"].iat[0]:
            appname_d = "Dutch2"
            appname_h = "Honolulu2"
        elif "5_" in dfrow["session.config.name"].iat[0]:
            appname_d = "Dutch5"
            appname_h = "Honolulu5"

        dfrow_info = findInfo(dfrow)

        for j in range(1, 41):
            tmp = findAuction(dfrow, appname_h, str(j))
            df_h = pd.concat([df_h, (pd.concat([dfrow_info, tmp], axis=1))])
        for j in range(1, 41):
            tmp = findAuction(dfrow, appname_d, str(j))
            df_d = pd.concat([df_d, (pd.concat([dfrow_info, tmp], axis=1))])

        tmp = findSurvey(dfrow, "Feedback_Honolulu")
        df_surveyh = pd.concat([df_surveyh, (pd.concat([dfrow_info, tmp], axis=1))])
        tmp = findSurvey(dfrow, "Feedback_Dutch")
        df_surveyd = pd.concat([df_surveyd, (pd.concat([dfrow_info, tmp], axis=1))])

    df_h = df_h.loc[df_h["player.item_value"].notnull()]
    df_d = df_d.loc[df_d["player.item_value"].notnull()]

    final_price = df_h["group.final_price"].values
    time_elapsed = df_h["group.dutch_time_elapsed"].values + df_h["group.english_time_elapsed"].values
    tick = df_h["session.config.price_tick"].values
    df_h["group.auctioneer_utility"] = final_price * (1 - DISCOUNT_C * time_elapsed / tick) # use DISCOUNT_C_HYPO to generate hypothetical utility

    final_price = df_d["group.dutch_final_price"].values
    time_elapsed = df_d["group.dutch_time_elapsed"].values
    tick = df_d["session.config.price_tick"].values
    df_d["group.auctioneer_utility"] = final_price * (1 - DISCOUNT_C * time_elapsed / tick) # use DISCOUNT_C_HYPO to generate hypothetical utility

    return df_h, df_d, df_surveyh, df_surveyd

### Functions for model prediction

In [338]:
import numpy as np
from scipy.optimize import fsolve
from scipy.integrate import odeint, quad
from scipy.optimize import minimize_scalar
from scipy.interpolate import interp1d

In [339]:
class honolulu(object):
    def __init__(self, a, b, c, n):
        self.n = n
        self.a = a
        self.b = b
        self.c = c
        if c == 0:
            self.s = 1
        else:
            self.s = self.max_U()

    # F(x) = x
    # G(x) = F(x) ^ (n - 1)
    # g(x) = G'(x)
    # H(x) = integral(y[0, x])(G(y))
    # G(v, x) = n * F(v) * F(x) ^ (n - 1)
    # g(v, x) = d^2 G(v, x) / dvdx

    def K(self, pvar, v, s):
        # K(p, v, s) = 2 * b * (H(v) - H(p)) - G(p) * (a - b * (s - v))
        return (
            2 * self.b 
            * (v ** self.n - pvar ** self.n) / self.n 
            - pvar ** (self.n - 1) 
            * (self.a - self.b * (s - v))
        )

    def k(self, v, s):
        # p = k(v, s) solves K(p, v, s) = 0
        K_fun = lambda pvar: self.K(pvar, v, s)
        sol = fsolve(K_fun, v / 2)[0]
        return sol

    def p(self, v, s):
        # p = min{s, k(v, s)}
        return np.minimum(s, self.k(v, s))

    def U(self, s):
        # integral(x[0, p(v, s)], v[0, 1])(p(v, s) * (a - c * (s - p(v, s))) * g(v, x)) + 
        # integral(x[p(v, s), v], v[0, 1])(x * (a - c * (s + x - 2 * p(v, s))) * g(v, x)) 
        # double integration is too slow, manually compute the integration with respect to x first
        integrand = (
            lambda v: (self.n - 1) * v ** self.n * (self.a - self.c * (s - 2 * self.p(v, s)))
            - self.n * (self.n - 1) / (self.n + 1) * self.c * v ** (self.n + 1)
            + self.p(v, s) ** self.n * (self.a - self.c * s + 2 * self.c * self.p(v, s) / (self.n + 1))
        )
        sol = quad(integrand, 0, 1)[0]
        return sol

    def max_U(self):
        obj_fun = lambda s: - self.U(s)
        sol = minimize_scalar(obj_fun, bounds=(0, 1), method="bounded")
        return sol["x"]
    
    def EUa(self):
        return self.U(self.s)

    def ER(self):
        # integral(x[0, p(v, s)], v[0, 1])(p(v, s) * g(v, x)) + 
        # integral(x[p(v, s), v], v[0, 1])(x * g(v, x))
        # double integration is too slow, manually compute the integration with respect to x first
        integrand = (
            lambda v: self.p(v, self.s) ** self.n
            + (self.n - 1) * v ** self.n
        )
        sol = quad(integrand, 0, 1)[0]
        return sol

    def ED(self):
        # integral(x[0, p(v, s)], v[0, 1])((s - p(v, s)) * g(v, x)) + 
        # integral(x[p(v, s), v], v[0, 1])((s + x - 2 * p(v, s)) * g(v, x))
        # double integration is too slow, manually compute the integration with respect to x first
        integrand = (
            lambda v: self.p(v, self.s) ** self.n
            + (self.n - 1) * v ** self.n
            + self.n * (self.s - 2 * self.p(v, self.s)) * v ** (self.n - 1)
        )
        sol = quad(integrand, 0, 1)[0]
        return sol
    
    def eUb(self, v):
        # G(p(v, s)) * (v - p(v, s)) * (a - b * (s - p(v, s))) +
        # integral(x[p(v, s), v])((v - x) * (a -  b * (s + x - 2 * p(v, s))) * g(x))
        integrand = (
            lambda x: (v - x)
            * (self.a - self.b * (self.s + x - 2 * self.p(v, self.s)))
            * (self.n - 1) * x ** (self.n - 2)
        )
        return (
            self.p(v, self.s) ** (self.n - 1)
            * (v - self.p(v, self.s))
            * (self.a - self.b * (self.s - self.p(v, self.s)))
            + quad(integrand, self.p(v, self.s), v)[0]
        )

    def EUb(self):
        integrand = lambda v: self.eUb(v)
        return quad(integrand, 0, 1)[0]

    def ePb(self, v):
        # p(v, s) ^ n + integral(x[p(v, s), v])(x * g(x))
        integrand = lambda x: x * (self.n - 1) * x ** (self.n - 2)
        return self.p(v, self.s) ** self.n + quad(integrand, self.p(v, self.s), v)[0]

In [340]:
class dutch(object):
    def __init__(self, a, b, c, n):
        self.a = a
        self.b = b
        self.c = c
        self.n = n
        self.vs = np.linspace(1e-16, 1, 10000)

    # F(x) = x
    # G(x) = F(x) ^ (n - 1)
    # g(x) = G'(x)
    # H(x) = F(x) ^ n
    # h(x) = H'(x)

    def db_dv(self, beta, v):
        # beta'(v) = g(v)* (v - beta(v)) * (a - b * (1 - beta(v))) / (G(v) * (a + 2 * b * beta(v) - b - b * v))
        return (
            (self.n - 1) * v ** (self.n - 2)
            * (v - beta)
            * (self.a - self.b * (1 - beta))
            / (v ** (self.n - 1) * (self.a + 2 * self.b * beta - self.b - self.b * v))
        )

    def solve_ode(self):
        beta0 = 0
        return odeint(self.db_dv, beta0, self.vs).flatten()

    def beta(self, v):
        return interp1d(self.vs, self.solve_ode(), fill_value="extrapolate")(v)

    def EUa(self):
        # integral(v[0, 1])(beta(v) * (a - c * (1 - beta(v))) * h(v))
        integrand = (
            lambda v: self.beta(v)
            * (self.a - self.c * (1 - self.beta(v)))
            * self.n * v ** (self.n - 1)
        )
        return quad(integrand, 0, 1)[0]

    def ER(self):
        # integral(v[0, 1])(beta(v) * h(v))
        integrand = lambda x: self.beta(x) * self.n * x ** (self.n - 1)
        return quad(integrand, 0, 1)[0]
    
    def ED(self):
        # 1- integral(v[0, 1])(beta(v) * h(v))
        integrand = lambda v: self.beta(v) * self.n * v ** (self.n - 1)
        return 1 - quad(integrand, 0, 1)[0]

    def eUb(self, v):
        # v ^ (n - 1) * (v - beta(v)) * (a - b * beta(v))
        return (
            v ** (self.n - 1) * (v - self.beta(v)) * (self.a - self.b * self.beta(v))
        )
    def EUb(self):
        integrand = lambda v: self.eUb(v)
        return quad(integrand, 0, 1)[0]

    def ePb(self, v):
        # v ^ (n - 1) * beta(v)
        return v ** (self.n - 1) * self.beta(v)

### Generating model predictions

In [341]:
d02_l = dutch(1, B_LOW * P_MAX, DISCOUNT_C * P_MAX, 2)
d02_h = dutch(1, B_HIGH * P_MAX, DISCOUNT_C * P_MAX, 2)
d05_l = dutch(1, B_LOW * P_MAX, DISCOUNT_C * P_MAX, 5)
d05_h = dutch(1, B_HIGH * P_MAX, DISCOUNT_C * P_MAX, 5)
h02_l = honolulu(1, B_LOW * P_MAX, DISCOUNT_C * P_MAX, 2)
h02_h = honolulu(1, B_HIGH * P_MAX, DISCOUNT_C * P_MAX, 2)
h05_l = honolulu(1, B_LOW * P_MAX, DISCOUNT_C * P_MAX, 5)
h05_h = honolulu(1, B_HIGH * P_MAX, DISCOUNT_C * P_MAX, 5)

In [342]:
df_h, df_d, df_surveyh, df_surveyd = genRoundwiseData(df)

In [343]:
def genGroupDict(df):
    
    group_dict = dict()

    for i in range(len(df)):
        session_code = str(df.iloc[i]["session.code"])
        round_number = str(df.iloc[i]["subsession.round_number"])
        group_id = str(df.iloc[i]["group.id_in_subsession"])
        tmp_key = session_code + "_" + round_number + "_" + group_id

        if tmp_key not in group_dict:
            group_dict[tmp_key] = {
                "index": [],
                "item_value": [],
                "group_size": None,
                "discount_b": None,
                "tick": None,
            }

        item_value = df.iloc[i]["player.item_value"]
        group_dict[tmp_key]["index"].append(i)
        group_dict[tmp_key]["item_value"].append(item_value)

        session_name = df.iloc[i]["session.config.name"]
        if "2_" in session_name:
            group_dict[tmp_key]["group_size"] = 2
        elif "5_" in session_name:
            group_dict[tmp_key]["group_size"] = 5

        discount_b = df.iloc[i]["session.config.discount_b"]
        group_dict[tmp_key]["discount_b"] = discount_b

        tick = df.iloc[i]["session.config.price_tick"]
        group_dict[tmp_key]["tick"] = tick

    return group_dict

In [344]:
def bidderUtility(diff, t, tick, b, is_winner):
    if is_winner:
        if diff >= 0:
            return diff * (1 - b * t / tick)
        else:
            return diff * (1 + b * t / tick)
    else:
        return 0

In [345]:
dict_d = genGroupDict(df_d)
df_d[[
    "predict.player.bid",
    "predict.player.is_dutch_winner",
    "predict.player.payoff",
    "predict.group.dutch_time_elapsed",
    "predict.group.dutch_final_price",
    "predict.group.auctioneer_utility",
]] = np.nan

for key in dict_d:
    if dict_d[key]["group_size"] == 2:
        if dict_d[key]["discount_b"] == B_HIGH:
            predict_bid = d02_h.beta(np.array(dict_d[key]["item_value"]) / P_MAX) * P_MAX
        elif dict_d[key]["discount_b"] == B_LOW:
            predict_bid = d02_l.beta(np.array(dict_d[key]["item_value"]) / P_MAX) * P_MAX
    elif dict_d[key]["group_size"] == 5:
        if dict_d[key]["discount_b"] == B_HIGH:
            predict_bid = d05_h.beta(np.array(dict_d[key]["item_value"]) / P_MAX) * P_MAX
        elif dict_d[key]["discount_b"] == B_LOW:
            predict_bid = d05_l.beta(np.array(dict_d[key]["item_value"]) / P_MAX) * P_MAX
    final_price = np.max(predict_bid)
    time_elapsed = (P_MAX - final_price) * dict_d[key]["tick"]
    is_winner = np.array([i == final_price for i in predict_bid]).astype(int)
    
    index = dict_d[key]["index"]
    for i in range(len(index)):
        df_d["predict.player.bid"].iat[index[i]] = predict_bid[i]
        df_d["predict.player.is_dutch_winner"].iat[index[i]] = is_winner[i]
        df_d["predict.player.payoff"].iat[index[i]] = bidderUtility(dict_d[key]["item_value"][i] - final_price, time_elapsed, dict_d[key]["tick"], dict_d[key]["discount_b"], is_winner[i])
        df_d["predict.group.dutch_time_elapsed"].iat[index[i]] = time_elapsed
        df_d["predict.group.dutch_final_price"].iat[index[i]] = final_price
        df_d["predict.group.auctioneer_utility"].iat[index[i]] = final_price * (1 - DISCOUNT_C * (P_MAX - final_price)) # use DISCOUNT_C_HYPO to generate hypothetical utility

In [346]:
dict_h = genGroupDict(df_h)
df_h[[
    "predict.player.optimal_dutch_bid",
    "predict.player.is_dutch_winner",
    "predict.player.contest_status",
    "predict.player.english_dropout_elapsed",
    "predict.player.is_english_winner",
    "predict.player.is_final_winner",
    "predict.player.payoff",
    "predict.group.dutch_time_elapsed",
    "predict.group.dutch_final_price",
    "predict.group.english_time_elapsed",
    "predict.group.english_final_price",
    "predict.group.final_price",
    "predict.group.auctioneer_utility",
]] = np.nan

for key in dict_h:
    item_value = dict_h[key]["item_value"]
    if dict_h[key]["group_size"] == 2:
        if dict_h[key]["discount_b"] == B_HIGH:
            start_price = round(h02_h.s * P_MAX)
            predict_bid = [h02_h.p(item_value[i] / P_MAX, start_price / P_MAX) * P_MAX for i in range(2)]
        elif dict_h[key]["discount_b"] == B_LOW:
            start_price = round(h02_l.s * P_MAX)
            predict_bid = [h02_l.p(item_value[i] / P_MAX, start_price / P_MAX) * P_MAX for i in range(2)]
    elif dict_h[key]["group_size"] == 5:
        if dict_h[key]["discount_b"] == B_HIGH:
            start_price = round(h05_h.s * P_MAX)
            predict_bid = [h05_h.p(item_value[i] / P_MAX, start_price / P_MAX) * P_MAX for i in range(5)]
        elif dict_h[key]["discount_b"] == B_LOW:
            start_price = round(h05_l.s * P_MAX)
            predict_bid = [h05_h.p(item_value[i] / P_MAX, start_price / P_MAX) * P_MAX for i in range(5)]
    dutch_final_price = np.minimum(start_price, np.max(predict_bid))
    dutch_time_elapsed = (start_price - dutch_final_price) * dict_h[key]["tick"]
    is_dutch_winner = np.array([i >= dutch_final_price for i in predict_bid]).astype(int)
    contest_status = np.array([i >= dutch_final_price for i in item_value]).astype(int)
    n_contestant = np.sum(contest_status)
    if n_contestant == 1:
        english_final_price = dutch_final_price
        is_english_winner = contest_status
        english_dropout_elapsed = [np.nan for i in contest_status]
    elif n_contestant > 1:
        max_value = np.max(item_value)
        is_english_winner = np.array([i == max_value for i in item_value]).astype(int)
        n_english_winner = np.sum(is_english_winner)
        if n_english_winner > 1:
            english_final_price = max_value
        elif n_english_winner == 1:
            english_final_price = np.max([(i < max_value) * i for i in item_value])
        tmp = []
        for i in range(len(item_value)):
            if contest_status[i]:
                if is_english_winner[i]:
                    if n_english_winner == 1:
                        tmp.append((english_final_price - dutch_final_price) * dict_h[key]["tick"])
                    elif n_english_winner > 1:
                        tmp.append((item_value[i] - dutch_final_price) * dict_h[key]["tick"])
                else:
                    tmp.append((item_value[i] - dutch_final_price) * dict_h[key]["tick"])
            else:
                tmp.append(np.nan)
        english_dropout_elapsed = np.array(tmp)
    english_time_elapsed = (english_final_price - dutch_final_price) * dict_h[key]["tick"]
    is_final_winner = is_english_winner
    final_price = english_final_price
    
    index = dict_h[key]["index"]
    for i in range(len(index)):
        df_h["predict.player.optimal_dutch_bid"].iat[index[i]] = predict_bid[i]
        df_h["predict.player.is_dutch_winner"].iat[index[i]] = is_dutch_winner[i]
        df_h["predict.player.contest_status"].iat[index[i]] = contest_status[i]
        df_h["predict.player.english_dropout_elapsed"].iat[index[i]] = english_dropout_elapsed[i]
        df_h["predict.player.is_english_winner"].iat[index[i]] = is_english_winner[i]
        df_h["predict.player.is_final_winner"].iat[index[i]] = is_final_winner[i]
        df_h["predict.player.payoff"].iat[index[i]] = bidderUtility(dict_h[key]["item_value"][i] - final_price, dutch_time_elapsed + english_time_elapsed, dict_h[key]["tick"], dict_h[key]["discount_b"], is_final_winner[i])
        df_h["predict.group.dutch_time_elapsed"].iat[index[i]] = dutch_time_elapsed
        df_h["predict.group.dutch_final_price"].iat[index[i]] = dutch_final_price
        df_h["predict.group.english_time_elapsed"].iat[index[i]] = english_time_elapsed
        df_h["predict.group.english_final_price"].iat[index[i]] = english_final_price
        df_h["predict.group.final_price"].iat[index[i]] = final_price
        df_h["predict.group.auctioneer_utility"].iat[index[i]] = final_price * (1 - DISCOUNT_C * (start_price + english_final_price - 2 * dutch_final_price)) # use DISCOUNT_C_HYPO to generate hypothetical utility

In [347]:
df_d.to_csv("Dutch_new_" + str_date + ".csv", header=True, index=False)
df_h.to_csv("Honolulu_new_" + str_date + ".csv", header=True, index=False)
df_surveyd.to_csv("Feedback_Dutch_new_" + str_date + ".csv", header=True, index=False)
df_surveyh.to_csv("Feedback_Honolulu_new_" + str_date + ".csv", header=True, index=False)