In [1]:
import pandas as pd
import numpy as np

pd.options.plotting.backend = "plotly"
import plotly
import plotly.graph_objects as go
from plotly.subplots import make_subplots



import plotly.io as pio
import plotly.express as px

pio.templates.default = "simple_white"

In [6]:
data_usa_raw = pd.read_csv("../data/raw/usa/all_apps_wide_2023-11-14_USA.csv")
data_deu_1_raw = pd.read_csv("../data/raw/deutsch/all_apps_wide_2023-11-14_DEU.csv")
data_deu_2_raw = pd.read_csv(
    "../data/raw/deutsch runde 2/all_apps_wide_2023-11-16_DEU.csv"
)

In [7]:
relevant_columns = [
    "survey.1.player.positive_szenarios",
    "survey.1.player.treatment_pa",
    "survey.1.player.PRS1T1",
    "survey.1.player.PRS1T2",
    "survey.1.player.PRS2T1",
    "survey.1.player.PRS2T2",
    "survey.1.player.PRS3T1",
    "survey.1.player.PRS3T2",
    "survey.1.player.PRS4T1",
    "survey.1.player.PRS4T2",
    "survey.1.player.FRS1T1",
    "survey.1.player.FRS1T2",
    "survey.1.player.FRS2T1",
    "survey.1.player.FRS2T2",
    "survey.1.player.FRS3T1",
    "survey.1.player.FRS3T2",
    "survey.1.player.FRS4T1",
    "survey.1.player.FRS4T2",
    "survey.1.player.ARS1T1",
    "survey.1.player.ARS1T2",
    "survey.1.player.ARS2T1",
    "survey.1.player.ARS2T2",
    "survey.1.player.ARS3T1",
    "survey.1.player.ARS3T2",
    "survey.1.player.ARS4T1",
    "survey.1.player.ARS4T2",
    "survey.1.player.ETHS1T1",
    "survey.1.player.ETHS1T2",
    "survey.1.player.ETHS2T1",
    "survey.1.player.ETHS2T2",
    "survey.1.player.ETHS3T1",
    "survey.1.player.ETHS3T2",
    "survey.1.player.ETHS4T1",
    "survey.1.player.ETHS4T2",
    "survey.1.player.PROS1T1",
    "survey.1.player.PROS1T2",
    "survey.1.player.PROS2T1",
    "survey.1.player.PROS2T2",
    "survey.1.player.PROS3T1",
    "survey.1.player.PROS3T2",
    "survey.1.player.PROS4T1",
    "survey.1.player.PROS4T2",
    "origin"
]

In [8]:
data_all = (
    pd.concat(
        [
            data_usa_raw.assign(origin="USA"),
            data_deu_1_raw.assign(origin="DEU"),
            data_deu_2_raw.assign(origin="DEU"),
        ],
        axis=0,
    )
    .loc[lambda df_: df_["participant._current_page_name"] == "End"]
    .loc[:, relevant_columns]
    .rename(
        columns=dict(
            zip(
                relevant_columns,
                [x.replace("survey.1.player.", "") for x in relevant_columns],
            )
        )
    )
)

data_usa = data_all.loc[data_all.origin == "USA"]
data_deu = data_all.loc[data_all.origin == "DEU"]

Scale explanation:
- FRS1T1 -->  feel responsible senario 1, team lead 1 (familiar solution)
- ARS1T1 -->  act responsible senario 1, team lead 1 (familiar solution)
- FRS1T2 -->  feel responsible senario 1, team lead 2 (new solution)
- ARS1T2 -->  act responsible senario 1, team lead 2 (new solution)

In [9]:
scenario_dict_outcome = {
    "1": {"positive": "1_and_4", "negative": "2_and_3"},
    "2": {"positive": "2_and_3", "negative": "1_and_4"},
    "3": {"positive": "2_and_3", "negative": "1_and_4"},
    "4": {"positive": "1_and_4", "negative": "2_and_3"},
}

scenario_dict_pa = {
    "1": {"yes": "1_and_4", "no": "2_and_3"},
    "2": {"yes": "2_and_3", "no": "1_and_4"},
    "3": {"yes": "2_and_3", "no": "1_and_4"},
    "4": {"yes": "1_and_4", "no": "2_and_3"},
}


def get_mean_scales(data, scale, scenario_nr, solution_type, outcome, pa):
    scale_string = ""
    if scale == "feel":
        scale_string += "FR"
    elif scale == "act":
        scale_string += "AR"
    elif scale == "risk":
        scale_string += "PR"

    scale_string += f"S{scenario_nr}"

    if solution_type == "familiar":
        scale_string += "T1"
    else:
        scale_string += "T2"

    outcome_filter = scenario_dict_outcome[str(scenario_nr)][outcome]
    pa_filter = scenario_dict_pa[str(scenario_nr)][pa]

    return data.loc[
        (data.positive_szenarios == outcome_filter)
        & (data.treatment_pa == pa_filter),
        scale_string,
    ].mean()

## Plot Fig 1 from Nordbye

In [10]:
x_axis_values_list = [
    ["Familiar solution", "New solution"],
    ["Follow advise", "Not follow advise"],
    ["Hold on", "Change decision"],
    ["Wait and see", "Take action"],
]

d3_colors = px.colors.qualitative.D3
colors_dict = {
    "feel responsible + positive outcome + PA yes": d3_colors[0],
    "feel responsible + positive outcome + PA no": d3_colors[1],
    "feel responsible + negative outcome + PA yes": d3_colors[2],
    "feel responsible + negative outcome + PA no": d3_colors[3],
    "act responsible + positive outcome + PA yes": d3_colors[4],
    "act responsible + positive outcome + PA no": d3_colors[5],
    "act responsible + negative outcome + PA yes": d3_colors[6],
    "act responsible + negative outcome + PA no": d3_colors[7],
}


def create_line_subplot(data, output_file_name, show_fig, fig_title):
    fig = make_subplots(rows=2, cols=2)

    for scenario, x_axis_values in zip(range(1, 5), x_axis_values_list):
        for responsibility_scale in ["feel", "act"]:
            for outcome in ["positive", "negative"]:
                for pa_available in ["yes", "no"]:
                    fig.add_trace(
                        go.Scatter(
                            x=x_axis_values,
                            y=[
                                get_mean_scales(
                                    data=data,
                                    scale=responsibility_scale,
                                    scenario_nr=scenario,
                                    solution_type="familiar",
                                    outcome=outcome,
                                    pa=pa_available,
                                ),
                                get_mean_scales(
                                    data=data,
                                    scale=responsibility_scale,
                                    scenario_nr=scenario,
                                    solution_type="new",
                                    outcome=outcome,
                                    pa=pa_available,
                                ),
                            ],
                            mode="lines+markers",
                            name=f"{responsibility_scale} responsible + {outcome} outcome + PA {pa_available}",
                            legendgroup=f"{responsibility_scale} responsible + {outcome} outcome + PA {pa_available}",
                            marker_color=colors_dict[
                                f"{responsibility_scale} responsible + {outcome} outcome + PA {pa_available}"
                            ],
                            showlegend=True if scenario == 1 else False,
                        ),
                        row=1 if scenario in [1, 2] else 2,
                        col=1 if scenario in [1, 3] else 2,
                    )
    fig.update_yaxes(range=[1, 7], dtick=1)
    fig.update_layout(width=900 + 200, height=600 + 200, title=fig_title)
    fig.write_html(f"{output_file_name}.html")
    if show_fig:
        fig.show()

In [11]:
create_line_subplot(data=data_all, output_file_name="scenarios_all", show_fig=True, fig_title="All")

In [12]:
create_line_subplot(
    data=data_deu, output_file_name="scenarios_deu", show_fig=True, fig_title="DEU"
)

In [13]:
create_line_subplot(
    data=data_usa, output_file_name="scenarios_usa", show_fig=True, fig_title="USA"
)

## who is perceied to feel / act more responsible?

In [14]:
# who is perceied to feel more responsible?
t1_columns_feel = [f"FRS{x}T1" for x in range(1, 5)]
t2_columns_feel = [f"FRS{x}T2" for x in range(1, 5)]

# who is perceied to act more responsible?
t1_columns_act = [f"ARS{x}T1" for x in range(1, 5)]
t2_columns_act = [f"ARS{x}T2" for x in range(1, 5)]

pd.DataFrame(
    {
        "t1_feel": data_all[t1_columns_feel].to_numpy().reshape(-1),
        "t2_feel": data_all[t2_columns_feel].to_numpy().reshape(-1),
        "t1_act": data_all[t1_columns_act].to_numpy().reshape(-1),
        "t2_act": data_all[t2_columns_act].to_numpy().reshape(-1),
    }
).describe()

Unnamed: 0,t1_feel,t2_feel,t1_act,t2_act
count,680.0,680.0,680.0,680.0
mean,4.786765,5.948529,5.213235,4.685294
std,1.887124,1.290727,1.566438,1.632085
min,1.0,1.0,1.0,1.0
25%,3.0,5.0,4.0,4.0
50%,5.0,6.0,6.0,5.0
75%,6.0,7.0,6.0,6.0
max,7.0,7.0,7.0,7.0


--> t2 (B) will feel more responsible and acted less responsible

## Regression

In [15]:
import statsmodels.api as sm
import statsmodels.formula.api as smf

In [16]:
scenario_dict_pa
# scenario_dict_outcome

{'1': {'yes': '1_and_4', 'no': '2_and_3'},
 '2': {'yes': '2_and_3', 'no': '1_and_4'},
 '3': {'yes': '2_and_3', 'no': '1_and_4'},
 '4': {'yes': '1_and_4', 'no': '2_and_3'}}

In [17]:
def get_regression_df(szenario_nr, dependent_variable):
    t1_df = (
        data_all[
            [
                "positive_szenarios",
                "treatment_pa",
                f"{dependent_variable}S{szenario_nr}T1",
            ]
        ]
        .assign(
            outcome_positive=data_all.positive_szenarios.replace(
                scenario_dict_outcome[str(szenario_nr)]["positive"], 1
            ).replace(scenario_dict_outcome[str(szenario_nr)]["negative"], 0),
            pa_available=data_all.treatment_pa.replace(
                scenario_dict_pa[str(szenario_nr)]["yes"], 1
            ).replace(scenario_dict_pa[str(szenario_nr)]["no"], 0),
            participant=range(data_all.shape[0]),
            active_teamlead=1,
            origin_usa=data_all.origin == "USA",
        )
        .rename(columns={f"{dependent_variable}S{szenario_nr}T1": dependent_variable})
        .astype({"origin_usa": "int"})
    )

    t2_df = (
        data_all[
            [
                "positive_szenarios",
                "treatment_pa",
                f"{dependent_variable}S{szenario_nr}T2",
            ]
        ]
        .assign(
            outcome_positive=data_all.positive_szenarios.replace(
                scenario_dict_outcome[str(szenario_nr)]["positive"], 1
            ).replace(scenario_dict_outcome[str(szenario_nr)]["negative"], 0),
            pa_available=data_all.treatment_pa.replace(
                scenario_dict_pa[str(szenario_nr)]["yes"], 1
            ).replace(scenario_dict_pa[str(szenario_nr)]["no"], 0),
            participant=range(data_all.shape[0]),
            active_teamlead=0,
            origin_usa=data_all.origin == "USA",
        )
        .rename(columns={f"{dependent_variable}S{szenario_nr}T2": dependent_variable})
        .astype({"origin_usa": "int"})
    )

    return pd.concat([t1_df, t2_df], axis=0).loc[
        :,
        [
            dependent_variable,
            "outcome_positive",
            "pa_available",
            "active_teamlead",
            "participant",
            "origin_usa",
        ],
    ]

In [18]:
# NEXT: CONTROLL FOR ALGORITHMIC AVERSION!!

for scenario in range(1, 5):
    for dependent_variable in ["AR", "FR"]:
        regression_df = get_regression_df(scenario, dependent_variable)

        mod = smf.ols(
            formula=f"np.log({dependent_variable}) ~ outcome_positive * pa_available * active_teamlead + 1|participant",
            data=regression_df,
        )
        res = mod.fit()
        # reg_table = res.summary().tables[1]
        if (
            min(
                res.pvalues["pa_available"],
                res.pvalues["outcome_positive:pa_available"],
                res.pvalues["outcome_positive:pa_available:active_teamlead"],
            )
            < 0.05
        ):
            print(f"{scenario=}, {dependent_variable=}")
            print(res.summary().tables[1])
            print(20 * "-")

scenario=3, dependent_variable='FR'
                                                    coef    std err          t      P>|t|      [0.025      0.975]
-----------------------------------------------------------------------------------------------------------------
Intercept                                         1.7598      0.064     27.541      0.000       1.634       1.885
outcome_positive                                  0.0790      0.080      0.988      0.324      -0.078       0.236
pa_available                                      0.0127      0.076      0.167      0.868      -0.138       0.163
outcome_positive:pa_available                    -0.0185      0.113     -0.164      0.870      -0.241       0.204
active_teamlead                                  -0.2302      0.076     -3.044      0.003      -0.379      -0.081
outcome_positive:active_teamlead                  0.0470      0.113      0.416      0.678      -0.175       0.270
pa_available:active_teamlead                     -0.

In [19]:
for scenario in range(1, 5):
    for dependent_variable in ["PR"]:
        regression_df = get_regression_df(scenario, dependent_variable)

        mod = smf.ols(
            formula=f"np.log({dependent_variable}) ~  pa_available * active_teamlead + 1|participant",
            data=regression_df,
        )
        res = mod.fit()
        # reg_table = res.summary().tables[1]
        if (
            min(
                res.pvalues["pa_available"],
                res.pvalues["pa_available:active_teamlead"],
            )
            < 0.05
        ):
            print(f"{scenario=}, {dependent_variable=}")
            print(res.summary().tables[1])
            print(20 * "-")

In [20]:
summary_df = pd.DataFrame(reg_table.data, columns=reg_table.header)

NameError: name 'reg_table' is not defined

## Vignette 1 -- Choosing a familiar or new solution

In [None]:
## what is perceived more risky?

get_mean_scales(
    data=data_all,
    responsibility_scale="risk",
    scenario_nr=1,
    solution_type="familiar",
    outcome=outcome,
    pa=pa_available,
)