In [14]:
import warnings

import pandas as pd
import pingouin as pg
import statsmodels.api as sm
from scipy.stats import shapiro, levene
from statsmodels.formula.api import ols
from statsmodels.stats.multicomp import pairwise_tukeyhsd

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore')

In [15]:
df = pd.read_csv('source/FReDA4.csv')
df2 = pd.read_csv('source/FReDA3.csv')
df["Group4"] = None

In [16]:
# couples_satisfied = df[df["Group3"] == "Couple Satisfaction"].copy()
# couples_deprived = df[df["Group3"] == "Couple Deprivation"].copy()
# couples_saturated = df[df["Group3"] == "Couple Saturation"].copy()
# couples_mixed = df[df["Group3"] == "Couple Mixed"].copy()

In [17]:
# Satisfied_couples = df[df["Group3"] == "Couple Satisfaction"].copy()
# deprived_couples = df[df["Group2"] == "Couple Deprivation"].copy()
# deprived_one = df[df["Group2"] == "One-sided Deprivation"].copy()
# deprived_me = df[df["Group1"] == "SubGroup2"].copy()
# deprived_partner = df[df["Group1"] == "SubGroup7"].copy()
# saturated_couples = df[df["Group2"] == "Couple Saturation"].copy()
# saturated_one = df[df["Group2"] == "One-sided Saturation"].copy()
# saturated_me = df[df["Group1"] == "SubGroup5"].copy()
# saturated_partner = df[df["Group1"] == "SubGroup8"].copy()

In [18]:
# Satisfied
df.loc[df['Group1'] == 'SubGroup1', 'Group4'] = 'Satisfied'

# Deprived groups
df.loc[df['Group1'] == 'SubGroup3', 'Group4'] = 'Deprived_Couples'
df.loc[df['Group2'] == 'One-sided Deprivation', 'Group4'] = 'Deprived_One'
df.loc[df['Group1'] == 'SubGroup2', 'Group4'] = 'Deprived_Me'
df.loc[df['Group1'] == 'SubGroup7', 'Group4'] = 'Deprived_Partner'

# Saturated groups
df.loc[df['Group1'] == 'SubGroup6', 'Group4'] = 'Saturated_Couples'
df.loc[df['Group2'] == 'One-sided Saturation', 'Group4'] = 'Saturated_One'
df.loc[df['Group1'] == 'SubGroup5', 'Group4'] = 'Saturated_Me'
df.loc[df['Group1'] == 'SubGroup8', 'Group4'] = 'Saturated_Partner'

# Mixed groups
df.loc[df['Group1'] == 'SubGroup4', 'Group4'] = 'Mixed_Couples'
df.loc[df['Group1'] == 'SubGroup9', 'Group4'] = 'Mixed_Couples'

In [19]:
print(df['Group4'].isna().sum())

0


In [20]:
df = df.rename(columns={
    'Self-esteem': 'Self_esteem',
    'Life Satisfaction': 'Life_Satisfaction',
    'Communication Quality': 'Communication_Quality',
    'Relationship Satisfaction': 'Relationship_Satisfaction',
    'Conflict Management': 'Conflict_Management'
})

In [21]:
traits = ['Extraversion', 'Agreeableness', 'Conscientiousness', 'Neuroticism', 'Openness']
# traits = ['Depressiveness', 'Loneliness', 'Self_esteem', 'Life_Satisfaction', 'Health']
# traits = ["Communication_Quality", "Relationship_Satisfaction", "Conflict_Management"]

In [22]:
df = df.dropna(subset=traits).copy()

In [23]:
# -----------------------
# Assumption checks
# -----------------------
for trait in traits:
    print(f"\n--- {trait} ---")

    # Shapiro normality per group
    for group in df['Group4'].unique():
        data = df[df['Group4'] == group][trait]
        if len(data) >= 3:  # Shapiro requires >=3
            stat, p = shapiro(data)
            # print(f"{group} Shapiro p={p:.3f}")

    # Levene test for homogeneity of variance
    groups_data = [df[df['Group4'] == g][trait] for g in df['Group4'].unique()]
    stat, p = levene(*groups_data)
    print(f"Levene test p={p:.3f} (p>0.05 = equal variances)")


--- Extraversion ---
Levene test p=0.803 (p>0.05 = equal variances)

--- Agreeableness ---
Levene test p=0.014 (p>0.05 = equal variances)

--- Conscientiousness ---
Levene test p=0.461 (p>0.05 = equal variances)

--- Neuroticism ---
Levene test p=0.014 (p>0.05 = equal variances)

--- Openness ---
Levene test p=0.367 (p>0.05 = equal variances)


In [24]:
# -----------------------
# ANOVA + Effect size
# -----------------------
for trait in traits:
    print(f"\n=== ANOVA for {trait} ===")

    # OLS model
    model = ols(f'{trait} ~ C(Group4)', data=df).fit()

    # Standard ANOVA table
    anova_table = sm.stats.anova_lm(model, typ=2)
    print(anova_table)

    # Eta squared from pingouin
    eta2 = pg.anova(dv=trait, between='Group4', data=df,  effsize='n2', detailed=True)
    print(f"Eta squared = {eta2.loc[0, 'n2']:.3f}")


=== ANOVA for Extraversion ===
                 sum_sq       df         F    PR(>F)
C(Group4)    147.812741      7.0  5.069602  0.000009
Residual   55822.534835  13402.0       NaN       NaN
Eta squared = 0.003

=== ANOVA for Agreeableness ===
                 sum_sq       df         F        PR(>F)
C(Group4)    231.743231      7.0  8.488928  2.080193e-10
Residual   52266.781751  13402.0       NaN           NaN
Eta squared = 0.004

=== ANOVA for Conscientiousness ===
                 sum_sq       df         F    PR(>F)
C(Group4)     82.007242      7.0  2.580847  0.011715
Residual   60836.131163  13402.0       NaN       NaN
Eta squared = 0.001

=== ANOVA for Neuroticism ===
                 sum_sq       df         F        PR(>F)
C(Group4)    884.069715      7.0  22.23332  4.047977e-30
Residual   76129.638264  13402.0       NaN           NaN
Eta squared = 0.011

=== ANOVA for Openness ===
                 sum_sq       df         F        PR(>F)
C(Group4)    260.217455      7.0  7.426703

In [26]:
# -----------------------
# Post-hoc tests
# -----------------------

traits = [
    # 'Neuroticism',
    #       'Extraversion',
    #       'Openness',
    #       'Agreeableness',
    #       'Conscientiousness',

    'Depressiveness',
    # 'Loneliness',
    # 'Self_esteem',
    # 'Life_Satisfaction',
    # 'Health',
    #
    # 'Relationship_Satisfaction',
    # 'Communication_Quality',
    # 'Conflict_Management'
]

from pingouin import pairwise_tukey

for trait in traits:
    print(f"\n--- Post-hoc comparisons for {trait} ---")

    # Tukey HSD
    # tukey = pairwise_tukeyhsd(endog=df[trait], groups=df['Group4'], alpha=0.05)
    tukey = pairwise_tukey(
        dv=trait,  # column name as string
        between='Group4',  # grouping variable
        data=df,
        effsize='cohen'
    )
    print(tukey.loc[[3, 9, 14, 22, 23, 24, 18]])


--- Post-hoc comparisons for Depressiveness ---
                   A                  B   mean(A)   mean(B)      diff  \
3   Deprived_Couples          Satisfied  5.250447  4.822845  0.427602   
9        Deprived_Me          Satisfied  5.052938  4.822845  0.230093   
14  Deprived_Partner          Satisfied  5.063345  4.822845  0.240500   
22         Satisfied  Saturated_Couples  4.822845  4.842697 -0.019852   
23         Satisfied       Saturated_Me  4.822845  5.177536 -0.354691   
24         Satisfied  Saturated_Partner  4.822845  5.098143 -0.275298   
18     Mixed_Couples          Satisfied  5.449768  4.822845  0.626923   

          se          T       p-tukey     cohen  
3   0.038078  11.229688  1.369349e-12  0.270531  
9   0.043969   5.233137  4.685633e-06  0.147233  
14  0.039984   6.014971  5.157326e-08  0.152901  
22  0.172164  -0.115306  1.000000e+00 -0.012924  
23  0.100099  -3.543420  9.441583e-03 -0.229268  
24  0.086706  -3.175069  3.238239e-02 -0.177386  
18  0.068301   9