Demographics    

In [12]:
import pandas as pd 
from scipy import stats
from statsmodels.stats.multitest import multipletests
import numpy as np


In [9]:
df = pd.read_csv('W:\Fmri_Forschung\Allerlei\JuliaS\GitHub\SubliminalVideoPriming\data\demographics/demographic_questionnaires.csv', sep =";")

In [10]:
df.head()


Unnamed: 0,BIDS Number,"Gender (f=1, m=2)","Group (MDD=1, HC=2)",BVAQ New,BDI-II Sum Score,STAI-State (Form 2),STAI-Trait (Form 1),WMS Correct,TM A,TM B,...,DERS Lack of Emotional Clarity,CERQ Self-Blame,CERQ Acceptance,CERQ Rumination,CERQ Positive Refocusing,CERQ Refocusing on Planning,CERQ Positive Reappraisal,CERQ Putting into Perspective,CERQ Catastrophizing,CERQ Blaming Others
0,Sub-058,1,2,41,9,49,44,27,13.003,43.909,...,10,18,9,10,4,7,5,4,13,5
1,Sub-035,1,1,49,41,76,70,28,23.948,57.796,...,8,12,16,17,4,8,8,12,8,5
2,Sub-034,1,2,35,14,46,46,23,24.727,54.862,...,10,9,16,14,12,16,16,18,4,6
3,Sub-108,2,1,58,13,57,51,25,20.724,32.76,...,9,11,11,10,7,14,10,14,5,7
4,Sub-046,1,2,58,9,55,56,25,21.656,40.437,...,16,6,17,14,8,10,9,13,7,16


Descriptive Statistics and Group Differences

In [18]:
# Ensure group column is numeric
df["Group (MDD=1, HC=2)"] = pd.to_numeric(df["Group (MDD=1, HC=2)"], errors="coerce")

# Keep only numeric columns
numeric_cols = df.select_dtypes(include="number").columns
numeric_cols = [c for c in numeric_cols if c != "Group (MDD=1, HC=2)"]

# Store results
results = []

for col in numeric_cols:
    # Split groups
    g1 = df[df["Group (MDD=1, HC=2)"] == 1][col].dropna()
    g2 = df[df["Group (MDD=1, HC=2)"] == 2][col].dropna()

    if len(g1) > 1 and len(g2) > 1:
        # Descriptive stats
        mean1, sd1 = g1.mean(), g1.std()
        mean2, sd2 = g2.mean(), g2.std()

        # Welch's t-test
        t_stat, p_val = stats.ttest_ind(g1, g2, equal_var=False)

        # Cohen's d (pooled SD)
        n1, n2 = len(g1), len(g2)
        s1, s2 = np.var(g1, ddof=1), np.var(g2, ddof=1)
        pooled_sd = np.sqrt(((n1 - 1) * s1 + (n2 - 1) * s2) / (n1 + n2 - 2))
        cohen_d = (mean1 - mean2) / pooled_sd

        results.append({
            "Variable": col,
            "MDD Mean ± SD": f"{mean1:.3f} ± {sd1:3f}",
            "HC Mean ± SD": f"{mean2:.3f} ± {sd2:.3f}",
            "t-statistic": t_stat,
            "p-value": p_val,
            "Cohen's d": cohen_d
        })

# Convert to DataFrame
results_df = pd.DataFrame(results)

# Multiple comparison correction (Bonferroni)
reject, p_adj, _, _ = multipletests(results_df["p-value"], alpha=0.05, method="bonferroni")
results_df["p-value (Bonferroni)"] = p_adj
results_df["Significant (Bonferroni<0.05)"] = reject

# Round numeric columns to 4 decimals
for col in ["t-statistic", "p-value", "Cohen's d", "p-value (Bonferroni)"]:
    results_df[col] = results_df[col].round(4)

# Save as semicolon-separated CSV
results_df.to_csv("combined_results_bonferroni.csv", sep=";", index=False)

print("✅ Combined results saved to 'combined_results_bonferroni.csv'")


✅ Combined results saved to 'combined_results_bonferroni.csv'
