## Test effects of individual emotion regulation differences on behavioral results

In [32]:
import numpy as np
import pandas as pd
from scipy import stats
from statsmodels.stats.multitest import multipletests


In [33]:
base = "C:/Users/juhoffmann/Desktop/SubliminalVideoPriming/data"
accuracy_path = f"{base}/formatted/accuracy_long_format.csv"
rt_path = f"{base}/formatted/reaction_times_long_format.csv"
questionnaire_path = f"{base}/demographics/all.xlsx"

accuracy = pd.read_csv(accuracy_path)
rt = pd.read_csv(rt_path)
questions = pd.read_excel(questionnaire_path)

accuracy.head()
rt.head()

Unnamed: 0,Participant,Group,Condition,ReactionTime
0,Sub-007,1.0,happy_happy,0.573668
1,Sub-007,1.0,happy_sad,0.688039
2,Sub-007,1.0,sad_happy,0.696108
3,Sub-007,1.0,sad_sad,0.594425
4,Sub-008,1.0,happy_happy,0.815579


In [34]:
def normalize_id(x):
    if pd.isna(x):
        return np.nan
    x = str(x).strip()
    x = x.replace("Sub-", "sub-").replace("SUB-", "sub-").replace("sub_", "sub-")
    digits = "".join([c for c in x if c.isdigit()])
    return f"sub-{digits.zfill(3)}" if digits else x

questions["ID"] = questions["ID"].apply(normalize_id)
accuracy["ID"] = accuracy["Participant"].apply(normalize_id)
rt["ID"] = rt["Participant"].apply(normalize_id)

In [35]:
questions = questions.replace({"na": np.nan, "NA": np.nan, "": np.nan})
questions["Antidepressants"] = questions["Antidepressants"].fillna(0)
questions.head()
accuracy.head()
rt.head()

Unnamed: 0,Participant,Group,Condition,ReactionTime,ID
0,Sub-007,1.0,happy_happy,0.573668,sub-007
1,Sub-007,1.0,happy_sad,0.688039,sub-007
2,Sub-007,1.0,sad_happy,0.696108,sub-007
3,Sub-007,1.0,sad_sad,0.594425,sub-007
4,Sub-008,1.0,happy_happy,0.815579,sub-008


In [36]:
merged = rt[rt["ID"].isin(questions["ID"].unique())].merge(questions, left_on="ID", right_on="ID", how="left")
merged = accuracy[accuracy["ID"].isin(merged["ID"].unique())].merge(merged, left_on="ID", right_on="ID", how="left")

merged.columns

Index(['Participant_x', 'Group_x', 'Condition_x', 'Accuracy', 'ID',
       'Participant_y', 'Group_y', 'Condition_y', 'ReactionTime', 'EmoCon_ID',
       'Antidepressants', 'Video Mask done', 'Gender_x', 'Alter',
       'Gender_f1_m2', 'Group_MDD1_HC2', 'BVAQ', 'BDI-II', 'STAI2', 'STAI1',
       'WMS Correct', 'TM A', 'TM B', 'Hamilton Score Norm', 'Hamilton Score',
       'DigitSpan_Forwards_Span', 'DigitSpan_ForwardsTotal Score',
       'DigitSpan_Backwards_Span', 'DigitSpan_Backwards_Total Score',
       'DERS_Gesamtwert', 'DERS_Nicht-Akzeptanz emotionalerReaktionen',
       'DERS_Probleme mit zielorientiertem Verhalten',
       'DERS_Impulskontrollprobleme',
       'DERS_Mangel an emotionaler Aufmerksamkeit',
       'DERS_Eingeschr√§nkter Zugang zu Emotionsregulations-Strategien',
       'DERS_Mangel an emotionaler Klarheit', 'CERQ_Selbstbeschuldigung',
       'CERQ_Akzeptanz', 'CERQ_Rumination', 'CERQ_Positive Refokussierung',
       'CERQ_Refokussierung auf Planung', 'CERQ_Positi

In [37]:
# Select final columns to export
final_cols = [
    "ID",
    "Group_x",
    "Gender_x",
    "Alter",
    "Condition_x",
    "Accuracy",
    "ReactionTime",
    "BDI-II",
    "Hamilton Score",
    "DERS_Gesamtwert",
    "STAI1",
    "STAI2"
]
final_cols = [c for c in final_cols if c in merged.columns]

df = merged[final_cols]
df.head()

Unnamed: 0,ID,Group_x,Gender_x,Alter,Condition_x,Accuracy,ReactionTime,BDI-II,Hamilton Score,DERS_Gesamtwert,STAI1,STAI2
0,sub-007,1.0,male,23,happy_happy,90.909091,0.573668,26,19.0,134,75,69
1,sub-007,1.0,male,23,happy_happy,90.909091,0.688039,26,19.0,134,75,69
2,sub-007,1.0,male,23,happy_happy,90.909091,0.696108,26,19.0,134,75,69
3,sub-007,1.0,male,23,happy_happy,90.909091,0.594425,26,19.0,134,75,69
4,sub-007,1.0,male,23,happy_sad,92.307692,0.573668,26,19.0,134,75,69


In [38]:
df.rename(columns={"Group_x": "Group", "Gender_x": "Gender", "Condition_x": "Condition",
                   "Hamilton Score": "Hamilton", "DERS_Gesamtwert": "DERS", "STAI 1": "STAI_1", "STAI 2": "STAI_2", "Alter": "Age"}, inplace=True)


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  return super().rename(


In [39]:
df.head()

Unnamed: 0,ID,Group,Gender,Age,Condition,Accuracy,ReactionTime,BDI-II,Hamilton,DERS,STAI1,STAI2
0,sub-007,1.0,male,23,happy_happy,90.909091,0.573668,26,19.0,134,75,69
1,sub-007,1.0,male,23,happy_happy,90.909091,0.688039,26,19.0,134,75,69
2,sub-007,1.0,male,23,happy_happy,90.909091,0.696108,26,19.0,134,75,69
3,sub-007,1.0,male,23,happy_happy,90.909091,0.594425,26,19.0,134,75,69
4,sub-007,1.0,male,23,happy_sad,92.307692,0.573668,26,19.0,134,75,69


In [40]:
df.to_csv("C:/Users/juhoffmann/Desktop/SubliminalVideoPriming/data/behav_data/behavior_and_questionnaires.csv", index=False)

In [None]:
import statsmodels.formula.api as smf

cols = ["ID","Accuracy","ReactionTime","BDI-II","DERS","STAI1","STAI2","Group","Condition"]
df_clean = df[cols].dropna().reset_index(drop=True)
df_clean["logRT"] = np.log(df_clean["ReactionTime"])

In [None]:
m_acc = smf.mixedlm(
    "Accuracy ~ Q('BDI-II') + DERS + STAI1 + STAI2",
    data=df_clean,
    groups=df_clean["ID"] # random intercept for each participant
).fit(method="lbfgs", maxiter=2000)

print(m_acc.summary())
print(m_acc.cov_re)

         Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: Accuracy  
No. Observations: 1974    Method:             REML      
No. Groups:       112     Scale:              142.5798  
Min. group size:  1       Log-Likelihood:     -7859.3795
Max. group size:  256     Converged:          Yes       
Mean group size:  17.6                                  
--------------------------------------------------------
              Coef.  Std.Err.   z    P>|z| [0.025 0.975]
--------------------------------------------------------
Intercept     81.427    8.080 10.077 0.000 65.590 97.264
Q('BDI-II')   -0.398    0.192 -2.078 0.038 -0.774 -0.023
DERS           0.064    0.100  0.642 0.521 -0.132  0.261
STAI1         -0.014    0.191 -0.075 0.940 -0.389  0.360
STAI2          0.161    0.227  0.709 0.478 -0.284  0.607
Group Var    157.649    1.984                           

           Group
Group  157.64863


In [None]:
m_rt = smf.mixedlm(
    "logRT ~ Q('BDI-II') + DERS + STAI1 + STAI2",
    data=df_clean,
    groups=df_clean["ID"]
).fit(method="lbfgs", maxiter=2000)

print(m_rt.summary())
print(m_rt.cov_re)

        Mixed Linear Model Regression Results
Model:            MixedLM Dependent Variable: logRT   
No. Observations: 1974    Method:             REML    
No. Groups:       112     Scale:              0.0180  
Min. group size:  1       Log-Likelihood:     941.9854
Max. group size:  256     Converged:          Yes     
Mean group size:  17.6                                
------------------------------------------------------
            Coef.  Std.Err.   z    P>|z| [0.025 0.975]
------------------------------------------------------
Intercept   -0.525    0.128 -4.109 0.000 -0.776 -0.275
Q('BDI-II') -0.001    0.003 -0.175 0.861 -0.006  0.005
DERS         0.000    0.002  0.088 0.930 -0.003  0.003
STAI1       -0.003    0.003 -0.832 0.405 -0.008  0.003
STAI2        0.005    0.004  1.515 0.130 -0.002  0.013
Group Var    0.041    0.044                           

          Group
Group  0.040707
