This notebook pre-processes the data of Study 6 of the paper.

Click the "Show Code" buttons to see the code associated with each output.

In [1]:
import pandas as pd
import numpy as np
import janitor
from datetime import datetime
import pytz

STUDY_NAME = "Study 6"


def print_prereg_time_utc(dt):
    local_tz = pytz.timezone("US/Pacific")
    target_tz = pytz.timezone("UTC")
    dt = datetime.strptime(dt, "%m/%d/%Y %H:%M %p")
    preregstring = target_tz.normalize(local_tz.localize(dt)).strftime(
        "%B %d, %Y at %H:%M %p"
    )
    return f"{STUDY_NAME} was preregistered on {preregstring}."


def cet_to_utc(dt):
    local_tz = pytz.timezone("CET")
    target_tz = pytz.timezone("UTC")
    dt = datetime.strptime(dt, "%d/%m/%Y %H:%M")
    return target_tz.normalize(local_tz.localize(dt))


def print_collection_information(df):
    starttime = df.sort_values("StartDate").StartDate.iloc[0]
    endtime = df.sort_values("EndDate").EndDate.iloc[-1]
    startstring = cet_to_utc(starttime).strftime("%B %d, %Y at %H:%M %p")
    endstring = cet_to_utc(endtime).strftime("%B %d, %Y at %H:%M %p")
    return f"{STUDY_NAME} was started on {startstring} and ended on {endstring}."


df = pd.read_csv(f"../Data/{STUDY_NAME}/RawData.csv")

Pre-registration information:

In [2]:
print_prereg_time_utc("07/16/2019 12:05 PM")

'Study 6 was preregistered on July 16, 2019 at 19:05 PM.'

Data collection information:

In [3]:
print_collection_information(df)

'Study 6 was started on July 16, 2019 at 19:47 PM and ended on July 17, 2019 at 01:29 AM.'

Number of participants in raw Qualtrics Export:

In [4]:
df.shape[0]

502

In [5]:
bonus_to_box = {"30¢": 1, "38¢": 2, "26¢": 3, "20¢": 4, "32¢": 5}


df["Box_Opened"] = df.BonusAmount.map(bonus_to_box)  # Mapping the box opened.

df["BonusAmountNumeric"] = df.BonusAmount.apply(lambda x: x[:-1]).astype(float)  #

df["Memory_Max"] = (
    df[["MaxVal_Blue", "MaxVal_Red"]].fillna(0).sum(axis=1)
)  # Memory for maximum bonus
df["Memory_Max_Recoded"] = df.Memory_Max.apply(
    lambda x: x * 100 if x < 1 else x
)  # Some people entered the maximum in cents (rather than dollars)


df["Condition"] = df.dispersion.map(
    {"same": "Equal dispersion", "higher": "Higher dispersion"}
)
df["Label_Category"] = df.manipulated.map({"red": "Blue", "blue": "Red"})
df["Excluded"] = (df.Memory_Max < 32).map({False: "No", True: "Yes"})
df["Excluded_If_Recoded"] = (df.Memory_Max_Recoded < 32).map({False: "No", True: "Yes"})
cols = [
    "StartDate",
    "EndDate",
    "BonusAmount",
    "workerId",
    "Box_Opened",
    "BonusAmountNumeric",
    "Memory_Max",
    "Memory_Max_Recoded",
    "Condition",
    "Label_Category",
    "Excluded",
    "Excluded_If_Recoded",
]
df_clean = df[cols].clean_names()
df_clean.to_csv(f"../Data/{STUDY_NAME}/CleanData.csv", index=None, encoding="utf-8")