In [1]:
import warnings

import pandas as pd
import pingouin as pg
import statsmodels.api as sm
from scipy.stats import shapiro, levene
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore')

In [2]:
df = pd.read_csv('source/FReDA4.csv')
df2 = pd.read_csv('source/FReDA3.csv')
df["Group4"] = None

In [3]:
# couples_satisfied = df[df["Group3"] == "Couple Satisfaction"].copy()
# couples_deprived = df[df["Group3"] == "Couple Deprivation"].copy()
# couples_saturated = df[df["Group3"] == "Couple Saturation"].copy()
# couples_mixed = df[df["Group3"] == "Couple Mixed"].copy()

In [4]:
# Satisfied_couples = df[df["Group3"] == "Couple Satisfaction"].copy()
# deprived_couples = df[df["Group2"] == "Couple Deprivation"].copy()
# deprived_one = df[df["Group2"] == "One-sided Deprivation"].copy()
# deprived_me = df[df["Group1"] == "SubGroup2"].copy()
# deprived_partner = df[df["Group1"] == "SubGroup7"].copy()
# saturated_couples = df[df["Group2"] == "Couple Saturation"].copy()
# saturated_one = df[df["Group2"] == "One-sided Saturation"].copy()
# saturated_me = df[df["Group1"] == "SubGroup5"].copy()
# saturated_partner = df[df["Group1"] == "SubGroup8"].copy()

In [5]:
# Satisfied
df.loc[df['Group1'] == 'SubGroup1', 'Group4'] = 'Satisfied'

# Deprived groups
df.loc[df['Group1'] == 'SubGroup3', 'Group4'] = 'Deprived_Couples'
df.loc[df['Group2'] == 'One-sided Deprivation', 'Group4'] = 'Deprived_One'
# df.loc[df['Group1'] == 'SubGroup2', 'Group4'] = 'Deprived_Me'
# df.loc[df['Group1'] == 'SubGroup7', 'Group4'] = 'Deprived_Partner'

# Saturated groups
df.loc[df['Group1'] == 'SubGroup6', 'Group4'] = 'Saturated_Couples'
df.loc[df['Group2'] == 'One-sided Saturation', 'Group4'] = 'Saturated_One'
# df.loc[df['Group1'] == 'SubGroup5', 'Group4'] = 'Saturated_Me'
# df.loc[df['Group1'] == 'SubGroup8', 'Group4'] = 'Saturated_Partner'

# Mixed groups
df.loc[df['Group1'] == 'SubGroup4', 'Group4'] = 'Mixed_Couples'
df.loc[df['Group1'] == 'SubGroup9', 'Group4'] = 'Mixed_Couples'

In [6]:
print(df['Group4'].isna().sum())

0


In [7]:
df = df.rename(columns={
    'Self-esteem': 'Self_esteem',
    'Life Satisfaction': 'Life_Satisfaction',
    'Communication Quality': 'Communication_Quality',
    'Relationship Satisfaction': 'Relationship_Satisfaction',
    'Conflict Management': 'Conflict_Management'
})

In [8]:
# traits = ['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness']
# traits = ['Depressiveness', 'Loneliness', 'Self_esteem', 'Life_Satisfaction', 'Health']
# traits = ["Communication_Quality", "Relationship_Satisfaction", "Conflict_Management"]
traits = [
    'Neuroticism',
    'Extraversion',
    'Openness',
    'Agreeableness',
    'Conscientiousness',

    'Depressiveness',
    'Loneliness',
    'Self_esteem',
    'Life_Satisfaction',
    'Health',

    'Relationship_Satisfaction',
    'Communication_Quality',
    'Conflict_Management'
]

In [9]:
df = df.dropna(subset=traits).copy()

In [10]:
# -----------------------
# Assumption checks
# -----------------------
for trait in traits:
    print(f"\n--- {trait} ---")

    # Shapiro normality per group
    for group in df['Group4'].unique():
        data = df[df['Group4'] == group][trait]
        if len(data) >= 3:  # Shapiro requires >=3
            stat, p = shapiro(data)
            # print(f"{group} Shapiro p={p:.3f}")

    # Levene test for homogeneity of variance
    groups_data = [df[df['Group4'] == g][trait] for g in df['Group4'].unique()]
    stat, p = levene(*groups_data)
    print(f"Levene test p={p:.3f} (p>0.05 = equal variances)")


--- Neuroticism ---
Levene test p=0.021 (p>0.05 = equal variances)

--- Extraversion ---
Levene test p=0.642 (p>0.05 = equal variances)

--- Openness ---
Levene test p=0.286 (p>0.05 = equal variances)

--- Agreeableness ---
Levene test p=0.002 (p>0.05 = equal variances)

--- Conscientiousness ---
Levene test p=0.506 (p>0.05 = equal variances)

--- Depressiveness ---
Levene test p=0.086 (p>0.05 = equal variances)

--- Loneliness ---
Levene test p=0.000 (p>0.05 = equal variances)

--- Self_esteem ---
Levene test p=0.000 (p>0.05 = equal variances)

--- Life_Satisfaction ---
Levene test p=0.004 (p>0.05 = equal variances)

--- Health ---
Levene test p=0.000 (p>0.05 = equal variances)

--- Relationship_Satisfaction ---
Levene test p=0.000 (p>0.05 = equal variances)

--- Communication_Quality ---
Levene test p=0.000 (p>0.05 = equal variances)

--- Conflict_Management ---
Levene test p=0.000 (p>0.05 = equal variances)


In [11]:
# -----------------------
# ANOVA + Effect size
# -----------------------
for trait in traits:
    print(f"\n=== ANOVA for {trait} ===")

    # OLS model
    model = ols(f'{trait} ~ C(Group4)', data=df).fit()

    # Standard ANOVA table
    anova_table = sm.stats.anova_lm(model, typ=2)
    print(anova_table)

    # Eta squared from pingouin
    eta2 = pg.anova(dv=trait, between='Group4', data=df,  effsize='n2', detailed=True)
    print(f"Eta squared = {eta2.loc[0, 'n2']:.3f}")


=== ANOVA for Neuroticism ===
                 sum_sq       df          F        PR(>F)
C(Group4)    897.438630      5.0  31.741524  3.089367e-32
Residual   67414.931508  11922.0        NaN           NaN
Eta squared = 0.013

=== ANOVA for Extraversion ===
                 sum_sq       df         F    PR(>F)
C(Group4)    131.860270      5.0  6.320137  0.000007
Residual   49746.963338  11922.0       NaN       NaN
Eta squared = 0.003

=== ANOVA for Openness ===
                 sum_sq       df         F        PR(>F)
C(Group4)    207.906483      5.0  8.364543  6.603241e-08
Residual   59265.905388  11922.0       NaN           NaN
Eta squared = 0.003

=== ANOVA for Agreeableness ===
                 sum_sq       df          F        PR(>F)
C(Group4)    216.594559      5.0  11.133573  1.008143e-10
Residual   46386.551987  11922.0        NaN           NaN
Eta squared = 0.005

=== ANOVA for Conscientiousness ===
                 sum_sq       df         F    PR(>F)
C(Group4)     66.805369     

In [15]:
# -----------------------
# Post-hoc tests
# -----------------------

traits = [
    'Neuroticism',
    'Extraversion',
    'Openness',
    'Agreeableness',
    'Conscientiousness',

    'Depressiveness',
    'Loneliness',
    'Self_esteem',
    'Life_Satisfaction',
    'Health',

    'Relationship_Satisfaction',
    'Communication_Quality',
    'Conflict_Management'
]

from pingouin import pairwise_tukey

for trait in traits:
    print(f"\n--- Post-hoc comparisons for {trait} ---")

    # Tukey HSD
    tukey = pairwise_tukey(
        dv=trait,  # column name as string
        between='Group4',  # grouping variable
        data=df,
        effsize='cohen'
    )
    # print(tukey)
    print(tukey.loc[[2, 9, 12]])


--- Post-hoc comparisons for Neuroticism ---
                   A                  B   mean(A)   mean(B)      diff  \
2   Deprived_Couples          Satisfied  8.258096  7.566117  0.691980   
9      Mixed_Couples          Satisfied  8.395349  7.566117  0.829232   
12         Satisfied  Saturated_Couples  7.566117  7.635135 -0.069018   

          se          T       p-tukey     cohen  
2   0.059701  11.590694  2.718825e-12  0.295729  
9   0.108680   7.630067  3.097522e-12  0.353678  
12  0.279482  -0.246951  9.998756e-01 -0.029856  

--- Post-hoc comparisons for Extraversion ---
                   A                  B   mean(A)    mean(B)      diff  \
2   Deprived_Couples          Satisfied  9.199934   9.397001 -0.197068   
9      Mixed_Couples          Satisfied  9.248658   9.397001 -0.148343   
12         Satisfied  Saturated_Couples  9.397001  10.135135 -0.738134   

          se         T   p-tukey     cohen  
2   0.051285 -3.842609  0.001705 -0.096324  
9   0.093358 -1.588966  0.6