This notebook pre-processes the data of Study 7 of the paper.

Click the "Show Code" buttons to see the code associated with each output.

In [1]:
import pandas as pd
import numpy as np
import janitor
from datetime import datetime
import pytz

STUDY_NAME = "Study 7"

def print_prereg_time_utc(dt):
    local_tz = pytz.timezone('US/Pacific')
    target_tz = pytz.timezone('UTC')
    dt = datetime.strptime(dt, "%m/%d/%Y %H:%M %p")
    preregstring = target_tz.normalize(local_tz.localize(dt)).strftime("%B %d, %Y at %H:%M %p")
    return f"{STUDY_NAME} was preregistered on {preregstring}."

def cet_to_utc(dt):
    local_tz = pytz.timezone('CET')
    target_tz = pytz.timezone('UTC')
    dt = datetime.strptime(dt, "%d/%m/%Y %H:%M")
    return target_tz.normalize(local_tz.localize(dt))

def print_collection_information(df):
    starttime = df.sort_values("StartDate").StartDate.iloc[0]
    endtime = df.sort_values("EndDate").EndDate.iloc[-1]
    startstring = cet_to_utc(starttime).strftime("%B %d, %Y at %H:%M %p")
    endstring = cet_to_utc(endtime).strftime("%B %d, %Y at %H:%M %p")
    return f"{STUDY_NAME} was started on {startstring} and ended on {endstring}."

df = pd.read_csv(f"../Data/{STUDY_NAME}/RawData.csv")

Pre-registration information:

In [2]:
print_prereg_time_utc("03/27/2020 06:28 AM")

'Study 7 was preregistered on March 27, 2020 at 13:28 PM.'

Data collection information:

In [3]:
print_collection_information(df)

'Study 7 was started on March 27, 2020 at 14:37 PM and ended on March 29, 2020 at 11:09 AM.'

Number of participants in raw Qualtrics Export:

In [4]:
df.shape[0]

640

Number of total responses (i.e., respondents who did not fail the attention check and were allowed to take the study):

In [5]:
df["Has_Response"] = (df.Death == 1) & (df.HeadCount == 1)
df_clean = df[df.Has_Response].reset_index(drop=True)
df_clean.shape[0]

502

In [6]:
def get_bid_placed(x):
    """
    Get the final bid submitted by participants.
    """
    for i in [10, 9, 8, 7, 6, 5, 4, 3, 1]:
        if pd.notnull(x[f"{i}_Bid"]):
            return x[f"{i}_Bid"]


df_clean["Bid_Placed"] = df_clean.apply(get_bid_placed, axis=1)
df_clean["Dispersion"] = df_clean.dispersion.map({"lower": "Lower", "higher": "Higher"})
df_clean["Memory_Max_Bid"] = df_clean.MaxBid
df_clean["Excluded"] = (
    (df_clean.MaxBid < 32) | (df_clean.GetCard != 1) | (df_clean.GetCash != 10)
)
df_clean["Above_Actual_Max"] = df_clean.Memory_Max_Bid > 37
df_clean["Condition"] = df_clean.Dispersion + " Dispersion"
cols = [
    "workerId",
    "Bid_Placed",
    "Dispersion",
    "Condition",
    "Memory_Max_Bid",
    "Above_Actual_Max",
    "Excluded",
]
df_clean[cols].clean_names().to_csv(f"../Data/{STUDY_NAME}/CleanData.csv", index=None)