In [None]:
import pandas as pd

from cprs.config import BLD

# Read the arrow file
df_main = pd.read_feather(BLD / "data" / "data_clean.arrow")

# Keeping specific variables

In [None]:
list_of_columns = df_main.columns.tolist()
print(list_of_columns)

In [None]:
df_main

In [None]:
for j in range(1, 6):
    columns_to_keep = (
        [col for col in df_main.columns if col.startswith(f"cs2_compr{j}")]
        + [col for col in df_main.columns if col.startswith(f"cs3_compr{j}")]
        + [col for col in df_main.columns if col.startswith(f"cs4_compr{j}")]
        + [
            "PLAYER_NUM",
            "LAB_SESSION",
            "GROUPID_ALL",
            "failed_attem1",
            "failed_attem2",
            "failed_attem3",
            "participant_id_in_session",
            "player_cubicle",
            "groupid1",
            "groupid2",
            "groupid3",
        ]
        + [col for col in df_main.columns if col.startswith("quest_")]
    )

    df_main = df_main[columns_to_keep]

In [None]:
df_main

In [None]:
list_of_columns = df_main.columns.tolist()
print(list_of_columns)

### Social Optimum Strategy

In [None]:
df_main["social_optimum1"] = df_main["quest_optimumN"].apply(
    lambda x: 1 if x == 3 else 0,
)
df_main["social_optimum2"] = df_main["quest_optimumA"].apply(
    lambda x: 1 if x == 3 else 0,
)
df_main["social_optimum3"] = df_main["quest_optimumS"].apply(
    lambda x: 1 if x == 3 else 0,
)

### Nash and socially acceptable

In [None]:
df_main["nash_strategy"] = df_main["quest_strategy"].apply(lambda x: 1 if x == 5 else 0)
df_main["prosocial_perception"] = df_main["quest_slider_guess"].apply(
    lambda x: 1 if x > 26.3 else 0,
)
df_main["social_appropriate"] = df_main["quest_soc_approp_pos"].apply(
    lambda x: 1 if x >= 2 else 0,
)
df_main["social_appropriate"] = df_main["quest_soc_approp_neg"].apply(
    lambda x: 1 if x < 2 else 0,
)
df_main["prosocial_perception"] = df_main["quest_slider_guess"].apply(
    lambda x: 1 if x > 26.3 else 0,
)

### EAI

In [None]:
for col in [
    "quest_eai_7",
    "quest_eai_8",
    "quest_eai_9",
    "quest_eai_10",
    "quest_eai_13",
    "quest_eai_14",
    "quest_eai_17",
    "quest_eai_18",
    "quest_eai_19",
    "quest_eai_20",
]:
    df_main[col] = df_main[col].apply(lambda x: 8 - x)

score_columns = [
    "quest_eai_" + str(i) for i in range(1, 25)
]  # Adjust if the range is different
df_main["score"] = df_main[score_columns].sum(axis=1)
df_main["GEA"] = df_main["score"] / 24

print(df_main["GEA"].agg(["mean", "median", "min", "max", "count"]))

In [None]:
# Create binary variable 'pro_env' based on 'GEA'
df_main["pro_env"] = df_main["GEA"].apply(lambda x: 1 if x > 4 else 0)

# Compute group average 'GEA' score
df_main["GEA_gav"] = df_main.groupby("GROUPID_ALL")["GEA"].transform("mean")

# Create binary variable 'group_pro_env' based on 'GEA_gav'
df_main["group_pro_env"] = df_main["GEA_gav"].apply(lambda x: 1 if x > 4 else 0)

In [None]:
df_main

### GPS

In [None]:
# Risk Preferences
df_main = df_main.rename(columns={"quest_gps71": "RISK"})

# Time Preferences
df_main = df_main.rename(columns={"quest_gps72_a": "TIME"})

# Altruistic Preferences
df_main = df_main.rename(columns={"quest_gps72_d": "ALTRUISM"})

# Negative Reciprocity
negrec_components = ["quest_gps72_b", "quest_gps72_c", "quest_gps73_b"]
df_main["NEGREC"] = df_main[negrec_components].mean(axis=1)

# Positive Reciprocity
df_main = df_main.rename(columns={"quest_gps73_a": "POREC"})

# Trust
df_main = df_main.rename(columns={"quest_gps73_c": "TRUST"})

In [None]:
education_map = {
    "Schulabschluss": 1,
    "Ausbildung": 2,
    "Bachelor-Abschluss": 3,
    "Master-Abschluss": 4,
    "Promotion (PhD.)": 5,
    "Sonstige": 0,
}

# Create a new column 'highest_ed' with mapped values
df_main["highest_ed"] = df_main["quest_highest_ed"].map(education_map)

In [None]:
df_main = df_main.rename(
    columns={
        "quest_q30": "climate_shock_concer",
        "quest_q32": "climate_shock_impact",
        "quest_q33": "climate_shock_cause",
        "quest_q59": "climate_shock_main_increased_risk_reason",
    },
)

In [None]:
df_main.loc[df_main["quest_study_field"] == 3, "quest_study_field"] = 2

In [None]:
# Mapping 'quest_study_field' values to labels
field_labels = {1: "1: Yes Related", 2: "2: Not Related"}
df_main["quest_study_field_label"] = df_main["quest_study_field"].map(field_labels)