In [162]:
import pandas as pd
import numpy as np

pd.options.plotting.backend = "plotly"
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots



import plotly.io as pio
import plotly.express as px

pio.templates.default = "simple_white"

In [163]:
control_columns = [
    "Participant id",
    "Number of subordinates",
    "Age",
    "Sex",
]

data_usa_raw = (
    pd.read_csv("../data/raw/usa/all_apps_wide_2023-11-14_USA.csv")
    .merge(
        pd.read_csv("../data/raw/usa/prolific_demographics.csv")[control_columns],
        left_on="participant.label",
        right_on="Participant id",
        how="left",
    )
)
data_deu_1_raw = (
    pd.read_csv("../data/raw/deutsch/all_apps_wide_2023-11-14_DEU.csv")
    .merge(
        pd.read_csv("../data/raw/deutsch/prolific_demographics.csv")[control_columns],
        left_on="participant.label",
        right_on="Participant id",
        how="left",
    )
)
data_deu_2_raw = (
    pd.read_csv("../data/raw/deutsch runde 2/all_apps_wide_2023-11-16_DEU.csv")
    .merge(
        pd.read_csv("../data/raw/deutsch runde 2/prolific_demographics.csv")[
            control_columns
        ],
        left_on="participant.label",
        right_on="Participant id",
        how="left",
    )
)

In [164]:
# page times

page_times_usa_raw = pd.read_csv("../data/raw/usa/PageTimes-2023-11-14_USA.csv")
page_times_deu_1_raw = pd.read_csv("../data/raw/deutsch/PageTimes-2023-11-14_DEU.csv")
page_times_deu_2_raw = pd.read_csv(
    "../data/raw/deutsch runde 2/PageTimes-2023-11-16_DEU.csv"
)

In [165]:
merged_times = pd.concat(
    [page_times_usa_raw, page_times_deu_1_raw, page_times_deu_2_raw], axis=0
)

time_delta = (
    (
        merged_times.groupby("participant_code")["epoch_time_completed"].max()
        - merged_times.groupby("participant_code")["epoch_time_completed"].min()
    )
    .to_frame()
    .reset_index()
)

In [166]:
relevant_columns = [
    "survey.1.player.positive_szenarios",
    "survey.1.player.treatment_pa",
    "survey.1.player.PRS1T1",
    "survey.1.player.PRS1T2",
    "survey.1.player.PRS2T1",
    "survey.1.player.PRS2T2",
    "survey.1.player.PRS3T1",
    "survey.1.player.PRS3T2",
    "survey.1.player.PRS4T1",
    "survey.1.player.PRS4T2",
    "survey.1.player.FRS1T1",
    "survey.1.player.FRS1T2",
    "survey.1.player.FRS2T1",
    "survey.1.player.FRS2T2",
    "survey.1.player.FRS3T1",
    "survey.1.player.FRS3T2",
    "survey.1.player.FRS4T1",
    "survey.1.player.FRS4T2",
    "survey.1.player.ARS1T1",
    "survey.1.player.ARS1T2",
    "survey.1.player.ARS2T1",
    "survey.1.player.ARS2T2",
    "survey.1.player.ARS3T1",
    "survey.1.player.ARS3T2",
    "survey.1.player.ARS4T1",
    "survey.1.player.ARS4T2",
    "survey.1.player.ETHS1T1",
    "survey.1.player.ETHS1T2",
    "survey.1.player.ETHS2T1",
    "survey.1.player.ETHS2T2",
    "survey.1.player.ETHS3T1",
    "survey.1.player.ETHS3T2",
    "survey.1.player.ETHS4T1",
    "survey.1.player.ETHS4T2",
    "survey.1.player.PROS1T1",
    "survey.1.player.PROS1T2",
    "survey.1.player.PROS2T1",
    "survey.1.player.PROS2T2",
    "survey.1.player.PROS3T1",
    "survey.1.player.PROS3T2",
    "survey.1.player.PROS4T1",
    "survey.1.player.PROS4T2",
    "survey.1.player.algo_aversion",
    "origin",
    "overall_time",
    "Number of subordinates",
    "Age",
    "Sex",
]

In [171]:
data_all = (
    pd.concat(
        [
            data_usa_raw.assign(origin="USA"),
            data_deu_1_raw.assign(origin="DEU"),
            data_deu_2_raw.assign(origin="DEU"),
        ],
        axis=0,
    )
    .merge(time_delta, left_on="participant.code", right_on="participant_code")
    .rename(columns={"epoch_time_completed": "overall_time"})
    .loc[lambda df_: df_["participant._current_page_name"] == "End"]
    # .loc[:, relevant_columns]
    .rename(
        columns=dict(
            zip(
                relevant_columns,
                [x.replace("survey.1.player.", "") for x in relevant_columns],
            )
        )
    )
    .rename(
        columns={
            "Number of subordinates": "number_of_subordinates",
            "Age": "age",
            "Sex": "sex",
        }
    )
)
MIN_TIME = data_all.overall_time.quantile(0.25)  # 5 min also worked ok
MAX_TIME = data_all.overall_time.quantile(0.75)  # 15 also min worked ok
data_all = data_all.loc[
    data_all.overall_time.between(left=MIN_TIME, right=MAX_TIME, inclusive="both")
]
data_usa = data_all.loc[data_all.origin == "USA"]
data_deu = data_all.loc[data_all.origin == "DEU"]

Scale explanation:
- FRS1T1 -->  feel responsible senario 1, team lead 1 (familiar solution)
- ARS1T1 -->  act responsible senario 1, team lead 1 (familiar solution)
- FRS1T2 -->  feel responsible senario 1, team lead 2 (new solution)
- ARS1T2 -->  act responsible senario 1, team lead 2 (new solution)

In [172]:
scenario_dict_outcome = {
    "1": {"positive": "1_and_4", "negative": "2_and_3"},
    "2": {"positive": "2_and_3", "negative": "1_and_4"},
    "3": {"positive": "2_and_3", "negative": "1_and_4"},
    "4": {"positive": "1_and_4", "negative": "2_and_3"},
}

scenario_dict_pa = {
    "1": {"yes": "1_and_4", "no": "2_and_3"},
    "2": {"yes": "2_and_3", "no": "1_and_4"},
    "3": {"yes": "2_and_3", "no": "1_and_4"},
    "4": {"yes": "1_and_4", "no": "2_and_3"},
}


def get_mean_scales(data, scale, scenario_nr, solution_type, outcome, pa):
    scale_string = ""
    if scale == "feel":
        scale_string += "FR"
    elif scale == "act":
        scale_string += "AR"
    elif scale == "risk":
        scale_string += "PR"

    scale_string += f"S{scenario_nr}"

    if solution_type == "familiar":
        scale_string += "T1"
    else:
        scale_string += "T2"

    outcome_filter = scenario_dict_outcome[str(scenario_nr)][outcome]
    pa_filter = scenario_dict_pa[str(scenario_nr)][pa]

    return data.loc[
        (data.positive_szenarios == outcome_filter)
        & (data.treatment_pa == pa_filter),
        scale_string,
    ].mean()

## Plot Fig 1 from Nordbye

In [173]:
x_axis_values_list = [
    ["Familiar solution", "New solution"],
    ["Follow advise", "Not follow advise"],
    ["Hold on", "Change decision"],
    ["Wait and see", "Take action"],
]

d3_colors = px.colors.qualitative.D3
colors_dict = {
    "feel responsible + positive outcome + PA yes": d3_colors[0],
    "feel responsible + positive outcome + PA no": d3_colors[1],
    "feel responsible + negative outcome + PA yes": d3_colors[2],
    "feel responsible + negative outcome + PA no": d3_colors[3],
    "act responsible + positive outcome + PA yes": d3_colors[4],
    "act responsible + positive outcome + PA no": d3_colors[5],
    "act responsible + negative outcome + PA yes": d3_colors[6],
    "act responsible + negative outcome + PA no": d3_colors[7],
}


def create_line_subplot(data, output_file_name, show_fig, fig_title):
    fig = make_subplots(rows=2, cols=2)

    for scenario, x_axis_values in zip(range(1, 5), x_axis_values_list):
        for responsibility_scale in ["feel", "act"]:
            for outcome in ["positive", "negative"]:
                for pa_available in ["yes", "no"]:
                    fig.add_trace(
                        go.Scatter(
                            x=x_axis_values,
                            y=[
                                get_mean_scales(
                                    data=data,
                                    scale=responsibility_scale,
                                    scenario_nr=scenario,
                                    solution_type="familiar",
                                    outcome=outcome,
                                    pa=pa_available,
                                ),
                                get_mean_scales(
                                    data=data,
                                    scale=responsibility_scale,
                                    scenario_nr=scenario,
                                    solution_type="new",
                                    outcome=outcome,
                                    pa=pa_available,
                                ),
                            ],
                            mode="lines+markers",
                            name=f"{responsibility_scale} responsible + {outcome} outcome + PA {pa_available}",
                            legendgroup=f"{responsibility_scale} responsible + {outcome} outcome + PA {pa_available}",
                            marker_color=colors_dict[
                                f"{responsibility_scale} responsible + {outcome} outcome + PA {pa_available}"
                            ],
                            showlegend=True if scenario == 1 else False,
                        ),
                        row=1 if scenario in [1, 2] else 2,
                        col=1 if scenario in [1, 3] else 2,
                    )
    fig.update_yaxes(range=[1, 7], dtick=1)
    fig.update_layout(width=900 + 200, height=600 + 200, title=fig_title)
    fig.write_html(f"{output_file_name}.html")
    if show_fig:
        fig.show()

In [174]:
create_line_subplot(data=data_all, output_file_name="scenarios_all", show_fig=True, fig_title="All")

In [175]:
create_line_subplot(
    data=data_deu, output_file_name="scenarios_deu", show_fig=True, fig_title="DEU"
)

In [176]:
create_line_subplot(
    data=data_usa, output_file_name="scenarios_usa", show_fig=True, fig_title="USA"
)

## who is perceied to feel / act more responsible?

In [177]:
# who is perceied to feel more responsible?
t1_columns_feel = [f"FRS{x}T1" for x in range(1, 5)]
t2_columns_feel = [f"FRS{x}T2" for x in range(1, 5)]

# who is perceied to act more responsible?
t1_columns_act = [f"ARS{x}T1" for x in range(1, 5)]
t2_columns_act = [f"ARS{x}T2" for x in range(1, 5)]

pd.DataFrame(
    {
        "t1_feel": data_all[t1_columns_feel].to_numpy().reshape(-1),
        "t2_feel": data_all[t2_columns_feel].to_numpy().reshape(-1),
        "t1_act": data_all[t1_columns_act].to_numpy().reshape(-1),
        "t2_act": data_all[t2_columns_act].to_numpy().reshape(-1),
    }
).describe()

Unnamed: 0,t1_feel,t2_feel,t1_act,t2_act
count,360.0,360.0,360.0,360.0
mean,4.713889,5.988889,5.130556,4.708333
std,1.814159,1.242196,1.587612,1.567759
min,1.0,1.0,1.0,1.0
25%,3.0,5.0,4.0,4.0
50%,5.0,6.0,6.0,5.0
75%,6.0,7.0,6.0,6.0
max,7.0,7.0,7.0,7.0


--> t2 (B) will feel more responsible and acted less responsible

## Regression

In [178]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [179]:
scenario_dict_pa
# scenario_dict_outcome

{'1': {'yes': '1_and_4', 'no': '2_and_3'},
 '2': {'yes': '2_and_3', 'no': '1_and_4'},
 '3': {'yes': '2_and_3', 'no': '1_and_4'},
 '4': {'yes': '1_and_4', 'no': '2_and_3'}}

In [180]:
def get_regression_df(szenario_nr, dependent_variable):
    t1_df = (
        data_all[
            [
                "positive_szenarios",
                "treatment_pa",
                "algo_aversion",
                f"{dependent_variable}S{szenario_nr}T1",
            ]
        ]
        .assign(
            outcome_positive=data_all.positive_szenarios.replace(
                scenario_dict_outcome[str(szenario_nr)]["positive"], 1
            ).replace(scenario_dict_outcome[str(szenario_nr)]["negative"], 0),
            pa_available=data_all.treatment_pa.replace(
                scenario_dict_pa[str(szenario_nr)]["yes"], 1
            ).replace(scenario_dict_pa[str(szenario_nr)]["no"], 0),
            participant=range(data_all.shape[0]),
            active_teamlead=1,
            origin_usa=data_all.origin == "USA",
        )
        .rename(columns={f"{dependent_variable}S{szenario_nr}T1": dependent_variable})
        .astype({"origin_usa": "int"})
    )

    t2_df = (
        data_all[
            [
                "positive_szenarios",
                "treatment_pa",
                "algo_aversion",
                f"{dependent_variable}S{szenario_nr}T2",
            ]
        ]
        .assign(
            outcome_positive=data_all.positive_szenarios.replace(
                scenario_dict_outcome[str(szenario_nr)]["positive"], 1
            ).replace(scenario_dict_outcome[str(szenario_nr)]["negative"], 0),
            pa_available=data_all.treatment_pa.replace(
                scenario_dict_pa[str(szenario_nr)]["yes"], 1
            ).replace(scenario_dict_pa[str(szenario_nr)]["no"], 0),
            participant=range(data_all.shape[0]),
            active_teamlead=0,
            origin_usa=data_all.origin == "USA",
        )
        .rename(columns={f"{dependent_variable}S{szenario_nr}T2": dependent_variable})
        .astype({"origin_usa": "int"})
    )

    return pd.concat([t1_df, t2_df], axis=0).loc[
        :,
        [
            dependent_variable,
            "outcome_positive",
            "pa_available",
            "active_teamlead",
            "participant",
            "algo_aversion",
            "origin_usa",
        ],
    ]

In [181]:
# NEXT: CONTROLL FOR ALGORITHMIC AVERSION!!

for scenario in range(1, 5):
    for dependent_variable in ["AR", "FR"]:
        regression_df = get_regression_df(scenario, dependent_variable)

        mod = smf.ols(
            formula=f"{dependent_variable} ~ outcome_positive * pa_available * active_teamlead + algo_aversion + origin_usa + 1|participant",
            data=regression_df,
        )
        res = mod.fit()
        # reg_table = res.summary().tables[1]
        if (
            min(
                res.pvalues["pa_available"],
                res.pvalues["outcome_positive:pa_available"],
                res.pvalues["outcome_positive:pa_available:active_teamlead"],
            )
            < 0.05
        ):
            print(f"{scenario=}, {dependent_variable=}")
            print(res.summary().tables[1])
            print(20 * "-")

scenario=3, dependent_variable='AR'
                                                    coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------------
Intercept                                         4.8425      0.699      6.930      0.000       3.463       6.222
outcome_positive                                  0.1162      0.361      0.322      0.748      -0.596       0.829
pa_available                                     -0.8881      0.391     -2.273      0.024      -1.660      -0.117
outcome_positive:pa_available                     1.0654      0.548      1.945      0.053      -0.016       2.147
active_teamlead                                   0.3600      0.362      0.995      0.321      -0.354       1.074
outcome_positive:active_teamlead                  0.2169      0.507      0.428      0.669      -0.783       1.217
pa_available:active_teamlead                      0.

In [182]:
for scenario in range(1, 5):
    for dependent_variable in ["PR"]:
        regression_df = get_regression_df(scenario, dependent_variable)

        mod = smf.ols(
            formula=f"{dependent_variable} ~  pa_available * active_teamlead + algo_aversion + 1|participant",
            data=regression_df,
        )
        res = mod.fit()
        if (
            min(
                res.pvalues["pa_available"],
                res.pvalues["pa_available:active_teamlead"],
            )
            < 0.05
        ):
            print(f"{scenario=}, {dependent_variable=}")
            print(res.summary().tables[1])
            print(20 * "-")

scenario=4, dependent_variable='PR'
                                   coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------------------------
Intercept                        3.9075      0.747      5.229      0.000       2.433       5.382
pa_available                     0.7701      0.395      1.947      0.053      -0.010       1.551
active_teamlead                  0.5385      0.421      1.281      0.202      -0.291       1.368
pa_available:active_teamlead    -1.1659      0.559     -2.087      0.038      -2.268      -0.063
algo_aversion                    0.0495      0.115      0.430      0.668      -0.178       0.277
1 | participant                  0.0020      0.005      0.373      0.709      -0.009       0.013
--------------------


## Vignette 1 -- Choosing a familiar or new solution

In [183]:
## what is perceived more risky?

get_mean_scales(
    data=data_all,
    responsibility_scale="risk",
    scenario_nr=1,
    solution_type="familiar",
    outcome=outcome,
    pa=pa_available,
)

NameError: name 'outcome' is not defined