In [10]:
import warnings

import numpy as np
import pandas as pd
import pingouin as pg
import scipy.stats as stats
from scipy.stats import chi2_contingency

pd.options.mode.chained_assignment = None
warnings.filterwarnings('ignore')

In [11]:
df = pd.read_csv('source/FReDA3.csv')
df2 = pd.read_csv('source/FReDA4.csv')

In [12]:
couples_satisfied = df[df["Group3"] == "Couple Satisfaction"].copy()
couples_deprived = df[df["Group3"] == "Couple Deprivation"].copy()
couples_saturated = df[df["Group3"] == "Couple Saturation"].copy()
couples_mixed = df[df["Group3"] == "Couple Mixed"].copy()

In [13]:
dfs = [couples_satisfied, couples_deprived, couples_saturated, couples_mixed]

for df in dfs:

    # 1. Identify columns (adjust names to match your exact CSV/DataFrame)
    items = ['Frequency', 'Kiss Frequency', 'Hold Frequency', 'Hug Frequency',
             'Desire', 'Kiss Desire', 'Hold Desire', 'Hug Desire']

    # 2. Create 'Side A' (Anchor is the primary)
    side_a = df[['Id'] + [f'Anchor {i}' for i in items] + [f'Partner {i}' for i in items]].copy()
    side_a.columns = ['Couple_ID'] + [f'Self_{i}' for i in items] + [f'Partner_{i}' for i in items]

    # 3. Create 'Side B' (Partner is the primary)
    side_b = df[['Id'] + [f'Partner {i}' for i in items] + [f'Anchor {i}' for i in items]].copy()
    side_b.columns = ['Couple_ID'] + [f'Self_{i}' for i in items] + [f'Partner_{i}' for i in items]

    # 4. Merge
    df_long = pd.concat([side_a, side_b], ignore_index=True)

    # Select all Self and Partner columns to center
    cols_to_center = [c for c in df_long.columns if 'Self_' in c or 'Partner_' in c]

    # Calculate the mean for each couple ID
    couple_means = df_long.groupby('Couple_ID')[cols_to_center].transform('mean')

    # Center the data: (Actual Value - Couple Mean)
    df_centered = df_long[cols_to_center] - couple_means

    results = []
    # Match Frequency items to their corresponding Desire items
    categories = ['Frequency', 'Kiss Frequency', 'Hold Frequency', 'Hug Frequency']

    for cat in categories:
        freq_col = f'Self_{cat}'
        # Mapping 'Frequency' to 'Desire'
        desire_col = freq_col.replace('Frequency', 'Desire')

        # Calculate Spearman Correlation on the centered data
        r_val, p_val = stats.spearmanr(df_centered[freq_col], df_centered[desire_col])

        results.append({
            'Touch Type': cat.replace(' Frequency', '') or 'Overall',
            'Spearman r (rm)': round(r_val, 3),
            'p-value': '< .001' if p_val < .001 else round(p_val, 4)
        })

    # Final Table
    table_output = pd.DataFrame(results)
    print(table_output)

  Touch Type  Spearman r (rm) p-value
0  Frequency            1.000  < .001
1       Kiss            0.991  < .001
2       Hold            0.997  < .001
3        Hug            0.995  < .001
  Touch Type  Spearman r (rm) p-value
0  Frequency            0.609  < .001
1       Kiss            0.562  < .001
2       Hold            0.672  < .001
3        Hug            0.603  < .001
  Touch Type  Spearman r (rm) p-value
0  Frequency            0.586  < .001
1       Kiss            0.433  < .001
2       Hold            0.632  < .001
3        Hug            0.650  < .001
  Touch Type  Spearman r (rm) p-value
0  Frequency            0.228  < .001
1       Kiss            0.215  < .001
2       Hold            0.330  < .001
3        Hug            0.270  < .001


In [14]:
couples_satisfied = df2[df2["Group3"] == "Couple Satisfaction"].copy()
couples_deprived = df2[df2["Group3"] == "Couple Deprivation"].copy()
couples_saturated = df2[df2["Group3"] == "Couple Saturation"].copy()
couples_mixed = df2[df2["Group3"] == "Couple Mixed"].copy()

groups = ['Couple Deprivation', 'Couple Mixed', 'Couple Satisfaction', 'Couple Saturation']

In [15]:
def generate_interpretations(df):
    baseline = 'Couple Satisfaction'
    others = [g for g in df.index if g != baseline]

    # Mapping for shorthand report
    name_map = {
        'Couple Deprivation': 'D',
        'Couple Mixed': 'Mixed',
        'Couple Saturation': 'Sat'
    }

    print(f"{'Level':<10} | {'Comparison':<15} | {'Significance'}")
    print("-" * 45)

    for level in df.columns:
        s_val = df.loc[baseline, level]

        for other in others:
            o_val = df.loc[other, level]

            # Direction
            symbol = ">" if s_val > o_val else "<"

            # Significance Check (Difference > 1.96)
            diff = abs(s_val - o_val)
            sig_status = "Significant" if diff > 1.96 else "Not Significant"

            print(f"{level:<10} | S {symbol} {name_map[other]:<10} | {sig_status}")
        print("-" * 45)

In [39]:
def report_kruskal_results(df, dv, group_col='Group3', baseline='Couple Satisfaction'):
    # 1. Run Kruskal-Wallis
    k_res = pg.kruskal(data=df, dv=dv, between=group_col)
    h_val = k_res['H'].item()
    p_val = k_res['p-unc'].item()

    # 2. Effect Size: Eta-squared
    n = df[dv].count()
    k = df[group_col].nunique()
    eta_sq = (h_val - k + 1) / (n - k)

    # 3. Get Mean Ranks to determine direction
    # We rank the DV across the whole dataset first
    df_ranked = df.copy()
    df_ranked['temp_rank'] = df[dv].rank()
    mean_ranks = df_ranked.groupby(group_col)['temp_rank'].mean()

    # 4. Post-hoc Dunn's test
    posthoc = sp.posthoc_dunn(df, val_col=dv, group_col=group_col, p_adjust='bonferroni')

    # 5. Build the comparison string (e.g., "S > Sat")
    comparisons = []
    name_map = {
        'Couple Satisfaction': 'S',
        'Couple Deprivation': 'Dep',
        'Couple Saturation': 'Sat',
        'Couple Mixed': 'Mixed'
    }
    baseline_ranks = mean_ranks[baseline]
    short_base = name_map.get(baseline, 'S')
    print(short_base)

    # Loop through all groups in the index
    for other_group in [g for g in mean_ranks.index if g != baseline]:
        print(other_group)
        p_comp = posthoc.loc[baseline, other_group]

        # Only add to the string if the difference is statistically significant
        if p_comp < 0.05:
            symbol = ">" if baseline_ranks > mean_ranks[other_group] else "<"
            short_other = name_map.get(other_group, other_group)
            comparisons.append(f"{short_base} {symbol} {short_other}")

    # Format Significance
    sig = "***" if p_val < .001 else "**" if p_val < .01 else "*" if p_val < .05 else "ns"

    # Output Results
    print(f"Variable: {dv}")
    print(f"Stats: η² = {eta_sq:.3f}, {sig}")
    print(f"Post-hoc: {'; '.join(comparisons) if comparisons else 'No significant differences'}")
    print("-" * 30)

# --- EXECUTION ---
report_kruskal_results(df2, 'Education')

S
Couple Deprivation
Couple Mixed
Couple Saturation
Variable: Education
Stats: η² = 0.003, ***
Post-hoc: S > Sat
------------------------------


In [33]:
import scikit_posthocs as sp # You may need to: pip install scikit_posthocs
import pingouin as pg

dv = 'Education'
group_col = 'Group3'

# 1. Omnibus Test: Kruskal-Wallis
kruskal_anova = pg.kruskal(data=df2, dv=dv, between=group_col)

# 2. Effect Size: Eta-squared (H)
n = df2[dv].count()
k = df2[group_col].nunique()
eta_sq_h = (kruskal_anova["H"].item() - k + 1) / (n - k)

print("--- Kruskal-Wallis Omnibus ---")
print(kruskal_anova)
print(f'ETA-squared: {eta_sq_h:.4f}')

# 3. Post-Hoc: Dunn's Test (The Standard for Kruskal-Wallis)
# This compares groups (Satisfied vs Deprived, etc.) based on Mean Ranks
posthoc = sp.posthoc_dunn(df2, val_col=dv, group_col=group_col, p_adjust='bonferroni')

print("\n--- Dunn's Post-Hoc (P-values with Bonferroni Correction) ---")
print(posthoc.round(4))

--- Kruskal-Wallis Omnibus ---
         Source  ddof1          H         p-unc
Kruskal  Group3      3  45.665449  6.680727e-10
ETA-squared: 0.0034

--- Dunn's Post-Hoc (P-values with Bonferroni Correction) ---
                     Couple Deprivation  Couple Mixed  Couple Satisfaction  \
Couple Deprivation               1.0000        0.2345               1.0000   
Couple Mixed                     0.2345        1.0000               0.7862   
Couple Satisfaction              1.0000        0.7862               1.0000   
Couple Saturation                0.0000        0.0344               0.0000   

                     Couple Saturation  
Couple Deprivation              0.0000  
Couple Mixed                    0.0344  
Couple Satisfaction             0.0000  
Couple Saturation               1.0000  


In [67]:
import pandas as pd
import numpy as np
from scipy.stats import chi2_contingency

def report_binary_chi(df, dv, group_col='Group3', baseline='Couple Satisfaction'):
    # 1. Create the crosstab (counts of 0s and 1s per group)
    ct = pd.crosstab(df[dv], df[group_col])

    # 2. Run Chi-Squared Omnibus
    chi2, p_val, dof, expected = chi2_contingency(ct)

    # 3. Effect Size: Cramer's V
    n = ct.sum().sum()
    r, k = ct.shape
    v = np.sqrt(chi2 / (n * (min(r, k) - 1)))

    # 4. Post-hoc: Adjusted Standardized Residuals
    row_totals = ct.sum(axis=1).values
    col_totals = ct.sum(axis=0).values
    # Expected formula for residuals
    v_adj = np.outer(row_totals, col_totals) * (1 - row_totals[:, None] / n) * (1 - col_totals / n) / n
    adj_residuals = (ct.values - expected) / np.sqrt(v_adj)
    res_df = pd.DataFrame(adj_residuals, index=ct.index, columns=ct.columns)

    # 5. Build the comparison string (Comparing Baseline to others)
    # We look at Category '1' (usually 'Yes') to determine direction
    comparisons = []

    # Mapping for your group abbreviations
    name_map = {
        'Couple Satisfaction': 'S',
        'Couple Deprivation': 'Dep',
        'Couple Saturation': 'Sat',
        'Couple Mixed': 'Mix'
    }

    # Percentages for direction (e.g., % of 'Yes' in each group)
    percentages = (ct / ct.sum()) * 100
    base_pct = percentages.loc[1, baseline]

    for other_group in [g for g in ct.columns if g != baseline]:
        print(other_group)
        # A cell is significant if |Z| > 1.96
        # We check the Z-score for Category '1' (Yes)
        z_score = res_df.loc[1, other_group]
        print(z_score)

        # If the groups are significantly different from each other
        # print(name_map.get(other_group, other_group).replace('Couple ', '')[:3])
        if abs(z_score) > 1.96:
            other_pct = percentages.loc[1, other_group]
            symbol = ">" if base_pct > other_pct else "<"

            short_base = name_map.get(baseline, 'S')
            short_other = name_map.get(other_group)
            comparisons.append(f"{short_base} {symbol} {short_other}")

    # Format output
    sig = "***" if p_val < .001 else "**" if p_val < .01 else "*" if p_val < .05 else "ns"

    print(f"Variable: {dv}")
    print(f"Stats: η² (V) = {v:.3f}, {sig}")
    print(f"Post-hoc: {'; '.join(comparisons) if comparisons else 'ns'}")
    print("-" * 30)

# Run it
report_binary_chi(df2, 'Cohabitation')

Couple Deprivation
7.0145503138225465
Couple Mixed
-0.6589033823530143
Couple Saturation
-2.768726815575294
Variable: Cohabitation
Stats: η² (V) = 0.061, ***
Post-hoc: S < Dep; S > Sat
------------------------------


In [18]:
dv = "Married"


# drop = True
#
# count_base = couples_satisfied[dv].value_counts(dropna=drop)
# count_d = couples_deprived[dv].value_counts(dropna=drop)
# count_s = couples_saturated[dv].value_counts(dropna=drop)
# count_m = couples_mixed[dv].value_counts(dropna=drop)
#
# summ = pd.DataFrame({
#     'Satisfied': count_base,
#     'Deprived': count_d,
#     'Saturated': count_s,
#     'Mixed': count_m,
# })
#
# summ_cond = ["Deprived", "Saturated", "Mixed"]
# for cond in summ_cond:
#     print('Pairwise ---', cond)
#     contingency = summ.filter(items=['Satisfied', cond])
#     N = contingency.to_numpy().sum()
#     res = chi2_contingency(contingency)
#     chi_stat = res[0]
#     print(f"Dof:{res.dof}, Chi-square: {res.statistic}, p-value: {res.pvalue}")
#
#     # Calculate Cramer's V
#     r, k = contingency.shape
#     result = np.sqrt(chi_stat / (N * (min(r-1, k-1))))
#
#     print(f"Cramer's V: {result}")

         Source  ddof1          H         p-unc
Kruskal  Group3      3  45.665449  6.680727e-10
ETA-squared 0.00338802896202501
--- (Standardized Residuals) ---
Education             0.0   1.0   2.0   3.0   4.0   6.0   7.0   8.0
Group3                                                             
Couple Deprivation  -3.97  0.74 -0.82 -0.56 -0.01  0.47  0.65  1.28
Couple Mixed        -1.25 -0.42  2.20  1.31  1.33 -1.79 -0.29 -0.22
Couple Satisfaction  5.22 -1.73 -0.96 -1.53 -0.16  0.94 -0.10 -0.50
Couple Saturation    2.73  1.84  2.90  4.16 -0.87 -2.03 -1.70 -2.99
Level      | Comparison      | Significance
---------------------------------------------
0.0        | S > D          | Significant
0.0        | S > Mixed      | Significant
0.0        | S > Sat        | Significant
---------------------------------------------
1.0        | S < D          | Significant
1.0        | S < Mixed      | Not Significant
1.0        | S < Sat        | Significant
---------------------------------------

In [19]:
dv = 'Work Status'
kruskal_anova = pg.kruskal(data=df2, dv=dv, between='Group3')
n = df2[dv].count()
k = df2["Group3"].nunique()
eta_sq_h = (kruskal_anova["H"].item() - k + 1) / (n - k)

print(kruskal_anova)
print('ETA-squared', eta_sq_h)

# 1. Run the Chi-square independence test
expected, observed, stats = pg.chi2_independence(df2, x='Group3', y=dv)

# 2. Calculate Standardized Residuals
# These identify exactly which levels (0, 1, 2...) are different
residuals = (observed - expected) / (expected**0.5)

# 3. Print the residuals for the "Satisfied" row vs the others
print("--- (Standardized Residuals) ---")
print(residuals.round(2))

df_res = pd.DataFrame(residuals, index=groups)
generate_interpretations(df_res)

         Source  ddof1           H         p-unc
Kruskal  Group3      3  135.454842  3.608807e-29
ETA-squared 0.009853071605688202
--- (Standardized Residuals) ---
Work Status           0.0   1.0   2.0
Group3                               
Couple Deprivation   3.60  4.66 -3.87
Couple Mixed         0.25 -0.43  0.13
Couple Satisfaction -5.21 -5.22  4.80
Couple Saturation   -0.42 -3.29  1.90
Level      | Comparison      | Significance
---------------------------------------------
0.0        | S < D          | Significant
0.0        | S < Mixed      | Significant
0.0        | S < Sat        | Significant
---------------------------------------------
1.0        | S < D          | Significant
1.0        | S < Mixed      | Significant
1.0        | S < Sat        | Not Significant
---------------------------------------------
2.0        | S > D          | Significant
2.0        | S > Mixed      | Significant
2.0        | S > Sat        | Significant
--------------------------------------------

In [20]:
dv = 'Urbanization'
kruskal_anova = pg.kruskal(data=df2, dv=dv, between='Group3')
n = df2[dv].count()
k = df2["Group3"].nunique()
eta_sq_h = (kruskal_anova["H"].item() - k + 1) / (n - k)

print(kruskal_anova)
print('ETA-squared', eta_sq_h)

# 1. Run the Chi-square independence test
expected, observed, stats = pg.chi2_independence(df2, x='Group3', y=dv)

# 2. Calculate Standardized Residuals
# These identify exactly which levels (0, 1, 2...) are different
residuals = (observed - expected) / (expected ** 0.5)

# 3. Print the residuals for the "Satisfied" row vs the others
print("--- ", dv, "(Standardized Residuals) ---")
print(residuals.round(2))

df_res = pd.DataFrame(residuals, index=groups)
print("--- ", dv, "(Interpretation) ---")
generate_interpretations(df_res)

         Source  ddof1         H     p-unc
Kruskal  Group3      3  25.16126  0.000014
ETA-squared 0.001675709651394591
---  Urbanization (Standardized Residuals) ---
Urbanization          0.0   1.0   2.0
Group3                               
Couple Deprivation   1.27  1.22 -2.17
Couple Mixed        -1.75 -1.37  2.66
Couple Satisfaction -0.99 -0.52  1.24
Couple Saturation   -0.36 -1.62  1.93
---  Urbanization (Interpretation) ---
Level      | Comparison      | Significance
---------------------------------------------
0.0        | S < D          | Significant
0.0        | S > Mixed      | Not Significant
0.0        | S < Sat        | Not Significant
---------------------------------------------
1.0        | S < D          | Not Significant
1.0        | S > Mixed      | Not Significant
1.0        | S > Sat        | Not Significant
---------------------------------------------
2.0        | S > D          | Significant
2.0        | S < Mixed      | Not Significant
2.0        | S < Sat     

In [21]:
dv = 'Kids'
kruskal_anova = pg.kruskal(data=df2, dv=dv, between='Group3')
n = df2[dv].count()
k = df2["Group3"].nunique()
eta_sq_h = (kruskal_anova["H"].item() - k + 1) / (n - k)

print(kruskal_anova)
print('ETA-squared', eta_sq_h)

# 1. Run the Chi-square independence test
expected, observed, stats = pg.chi2_independence(df2, x='Group3', y=dv)

# 2. Calculate Standardized Residuals
# These identify exactly which levels (0, 1, 2...) are different
residuals = (observed - expected) / (expected ** 0.5)

# 3. Print the residuals for the "Satisfied" row vs the others
print("--- ", dv, "(Standardized Residuals) ---")
print(residuals.round(2))

df_res = pd.DataFrame(residuals, index=groups)
print("--- ", dv, "(Interpretation) ---")
generate_interpretations(df_res)

         Source  ddof1           H          p-unc
Kruskal  Group3      3  509.248086  4.725396e-110
ETA-squared 0.03726248242713801
---  Kids (Standardized Residuals) ---
Kids                   0.0   1.0   2.0   3.0
Group3                                      
Couple Deprivation  -11.52  5.30  7.25  3.62
Couple Mixed          0.66  1.38 -0.73 -2.23
Couple Satisfaction  13.62 -7.52 -7.90 -3.53
Couple Saturation     6.94 -1.94 -5.60 -1.98
---  Kids (Interpretation) ---
Level      | Comparison      | Significance
---------------------------------------------
0.0        | S > D          | Significant
0.0        | S > Mixed      | Significant
0.0        | S > Sat        | Significant
---------------------------------------------
1.0        | S < D          | Significant
1.0        | S < Mixed      | Significant
1.0        | S < Sat        | Significant
---------------------------------------------
2.0        | S < D          | Significant
2.0        | S < Mixed      | Significant
2.0       

In [22]:
count_base = couples_satisfied['Region'].value_counts(dropna=False)
count_d = couples_deprived['Region'].value_counts(dropna=False)
count_s = couples_saturated['Region'].value_counts(dropna=False)
count_m = couples_mixed['Region'].value_counts(dropna=False)

contingency = pd.DataFrame({
    'Satisfied': count_base,
    'Deprived': count_d,
    # 'Saturated': count_s,
    # 'Mixed': count_m,
})

print(contingency)
res = chi2_contingency(contingency)
chi_stat = res[0]

# Performing Cramer's V calculation
# Size of the sample
N = contingency.to_numpy().sum()
# Minimum dimension
minimum_dimension = (min(contingency.shape)-1)

# Calculate Cramer's V
r, k = contingency.shape
result = np.sqrt(chi_stat / (N * (min(r-1, k-1))))

chi2, p, dof, expected = chi2_contingency(contingency)
(expected < 5).sum()
n_violations = (expected < 5).sum()
total_cells = expected.size

print(f"Cells with expected count < 5: {n_violations}/{total_cells}")
print(f"Percentage: {100 * n_violations / total_cells:.1f}%")
print(f"Cramer's V: {result}")
print(f"Dof:{res.dof}, Chi-square: {res.statistic}, p-value: {res.pvalue}")

        Satisfied  Deprived
Region                     
1.0          3210      6858
0.0           533      1273
NaN            99       211
Cells with expected count < 5: 0/6
Percentage: 0.0%
Cramer's V: 0.01814015824374786
Dof:2, Chi-square: 4.009332116062472, p-value: 0.13470527192907278


In [23]:
dv = 'Married'
kruskal_anova = pg.kruskal(data=df2, dv=dv, between='Group3')
n = df2[dv].count()
k = df2["Group3"].nunique()
eta_sq_h = (kruskal_anova["H"].item() - k + 1) / (n - k)

print(kruskal_anova)
print('ETA-squared', eta_sq_h)

# 1. Run the Chi-square independence test
expected, observed, stats = pg.chi2_independence(df2, x='Group3', y=dv)

# 2. Calculate Standardized Residuals
# These identify exactly which levels (0, 1, 2...) are different
residuals = (observed - expected) / (expected ** 0.5)

# 3. Print the residuals for the "Satisfied" row vs the others
print("--- ", dv, "(Standardized Residuals) ---")
print(residuals.round(2))

df_res = pd.DataFrame(residuals, index=groups)
print("--- ", dv, "(Interpretation) ---")
generate_interpretations(df_res)

         Source  ddof1           H         p-unc
Kruskal  Group3      3  187.920791  1.716877e-40
ETA-squared 0.013603118338376687
---  Married (Standardized Residuals) ---
Married               0.0   1.0
Group3                         
Couple Deprivation  -6.61  4.90
Couple Mixed         0.82 -0.61
Couple Satisfaction  7.12 -5.28
Couple Saturation    5.12 -3.79
---  Married (Interpretation) ---
Level      | Comparison      | Significance
---------------------------------------------
0.0        | S > D          | Significant
0.0        | S > Mixed      | Significant
0.0        | S > Sat        | Significant
---------------------------------------------
1.0        | S < D          | Significant
1.0        | S < Mixed      | Significant
1.0        | S < Sat        | Not Significant
---------------------------------------------


In [24]:
dv = 'Cohabitation'
kruskal_anova = pg.kruskal(data=df2, dv=dv, between='Group3')
n = df2[dv].count()
k = df2["Group3"].nunique()
eta_sq_h = (kruskal_anova["H"].item() - k + 1) / (n - k)

print(kruskal_anova)
print('ETA-squared', eta_sq_h)

# 1. Run the Chi-square independence test
expected, observed, stats = pg.chi2_independence(df2, x='Group3', y=dv)

# 2. Calculate Standardized Residuals
# These identify exactly which levels (0, 1, 2...) are different
residuals = (observed - expected) / (expected ** 0.5)

# 3. Print the residuals for the "Satisfied" row vs the others
print("--- ", dv, "(Standardized Residuals) ---")
print(residuals.round(2))

df_res = pd.DataFrame(residuals, index=groups)
print("--- ", dv, "(Interpretation) ---")
generate_interpretations(df_res)

         Source  ddof1          H         p-unc
Kruskal  Group3      3  51.342131  4.136180e-11
ETA-squared 0.003555614214657242
---  Cohabitation (Standardized Residuals) ---
Cohabitation            0     1
Group3                         
Couple Deprivation  -4.17  1.26
Couple Mixed         0.62 -0.19
Couple Satisfaction  4.75 -1.44
Couple Saturation    2.58 -0.78
---  Cohabitation (Interpretation) ---
Level      | Comparison      | Significance
---------------------------------------------
0          | S > D          | Significant
0          | S > Mixed      | Significant
0          | S > Sat        | Significant
---------------------------------------------
1          | S < D          | Significant
1          | S < Mixed      | Not Significant
1          | S < Sat        | Not Significant
---------------------------------------------


In [25]:
drop = True
count_base = couples_satisfied['Sex'].value_counts(dropna=drop)
count_d = couples_deprived['Sex'].value_counts(dropna=drop)
count_s = couples_saturated['Sex'].value_counts(dropna=drop)
count_m = couples_mixed['Sex'].value_counts(dropna=drop)

contingency = pd.DataFrame({
    'Satisfied': count_base,
    # 'Deprived': count_d,
    # 'Saturated': count_s,
    'Mixed': count_m,
})

print(contingency)
res = chi2_contingency(contingency)
chi_stat = res[0]

# Performing Cramer's V calculation
# Size of the sample
N = contingency.to_numpy().sum()
# Minimum dimension
minimum_dimension = (min(contingency.shape)-1)

# Calculate Cramer's V
r, k = contingency.shape
result = np.sqrt(chi_stat / (N * (min(r-1, k-1))))

chi2, p, dof, expected = chi2_contingency(contingency)
(expected < 5).sum()
n_violations = (expected < 5).sum()
total_cells = expected.size

print(f"Cells with expected count < 5: {n_violations}/{total_cells}")
print(f"Percentage: {100 * n_violations / total_cells:.1f}%")
print(f"Cramer's V: {result}")
print(f"Dof:{res.dof}, Chi-square: {res.statistic}, p-value: {res.pvalue}")

     Satisfied  Mixed
Sex                  
0.0       1916    331
1.0       1918    328
Cells with expected count < 5: 0/4
Percentage: 0.0%
Cramer's V: 0.0011659593118967008
Dof:1, Chi-square: 0.006108058798674834, p-value: 0.9377054963235312


In [26]:

dv = 'Relationship Sex'
kruskal_anova = pg.kruskal(data=df2, dv=dv, between='Group3')
n = df2[dv].count()
k = df2["Group3"].nunique()
eta_sq_h = (kruskal_anova["H"].item() - k + 1) / (n - k)

print(kruskal_anova)
print('ETA-squared', eta_sq_h)

# 1. Run the Chi-square independence test
expected, observed, stats = pg.chi2_independence(df2, x='Group3', y=dv)

# 2. Calculate Standardized Residuals
# These identify exactly which levels (0, 1, 2...) are different
residuals = (observed - expected) / (expected ** 0.5)

# 3. Print the residuals for the "Satisfied" row vs the others
print("--- ", dv, "(Standardized Residuals) ---")
print(residuals.round(2))

df_res = pd.DataFrame(residuals, index=groups)
print("--- ", dv, "(Interpretation) ---")
generate_interpretations(df_res)

         Source  ddof1         H     p-unc
Kruskal  Group3      3  3.461688  0.325765
ETA-squared 3.395766311696073e-05
---  Relationship Sex (Standardized Residuals) ---
Relationship Sex        0     1
Group3                         
Couple Deprivation   0.12 -0.65
Couple Mixed         0.01 -0.05
Couple Satisfaction -0.04  0.23
Couple Saturation   -0.32  1.69
---  Relationship Sex (Interpretation) ---
Level      | Comparison      | Significance
---------------------------------------------
0          | S < D          | Not Significant
0          | S < Mixed      | Not Significant
0          | S > Sat        | Not Significant
---------------------------------------------
1          | S > D          | Not Significant
1          | S > Mixed      | Not Significant
1          | S < Sat        | Not Significant
---------------------------------------------


In [57]:
dv = 'Age'
anova = pg.anova(data=df2, dv=dv, between='Group3')

print("---------- ", dv, "ANOVA ----------")
print(anova)
print("--------------------------------")

summ_cond = ["Couple Deprivation", "Couple Saturation", "Couple Mixed"]
for cond in summ_cond:
    ttest = pg.ttest(
        x=df2[df2['Group3'] == 'Couple Satisfaction']['Age'],
        y=df2[df2['Group3'] == cond]['Age'],
    )
    print("---------- ", cond, "ttest ----------")
    print(ttest)

# Pairwise T-tests with Bonferroni correction to avoid false positives
posthocs = pg.pairwise_tests(data=df2, dv='Age', between='Group3', padjust='bonf')
print(posthocs)

# expected, observed, stats = pg.chi2_independence(df2, x='Group3', y=dv)
#
# # 2. Calculate Standardized Residuals
# # These identify exactly which levels (0, 1, 2...) are different
# residuals = (observed - expected) / (expected ** 0.5)
#
# # 3. Print the residuals for the "Satisfied" row vs the others
# print("--- ", dv, "(Standardized Residuals) ---")
# print(residuals.round(2))
# #
# # df_res = pd.DataFrame(residuals, index=groups)
# # print("--- ", dv, "(Interpretation) ---")
# # generate_interpretations(df_res)

----------  Age ANOVA ----------
   Source  ddof1  ddof2          F         p-unc       np2
0  Group3      3  13596  42.300862  3.359902e-27  0.009248
--------------------------------
----------  Couple Deprivation ttest ----------
               T          dof alternative         p-val           CI95%  \
T-test -8.206534  6812.984045   two-sided  2.699149e-16  [-1.74, -1.07]   

         cohen-d       BF10  power  
T-test  0.166255  8.049e+12    1.0  
----------  Couple Saturation ttest ----------
               T          dof alternative     p-val         CI95%   cohen-d  \
T-test  3.855382  1084.774546   two-sided  0.000122  [0.67, 2.07]  0.151667   

          BF10     power  
T-test  71.511  0.967927  
----------  Couple Mixed ttest ----------
               T         dof alternative     p-val          CI95%   cohen-d  \
T-test -1.435701  936.433938   two-sided  0.151421  [-1.23, 0.19]  0.057744   

         BF10     power  
T-test  0.132  0.278086  
  Contrast                    

In [28]:
from scipy.stats import ttest_ind

count_base = couples_satisfied['Age']
count_d = couples_deprived['Age']
count_s = couples_saturated['Age']
count_m = couples_mixed['Age']

rvs1 = count_base
rvs2 = count_m

t, p = ttest_ind(rvs1, rvs2, equal_var=False)
m1, sd1, n1 = rvs1.mean(), rvs1.std(ddof=1), len(rvs1)
m2, sd2, n2 = rvs2.mean(), rvs2.std(ddof=1), len(rvs2)
s1, s2 = sd1 ** 2, sd2 ** 2

df = (s1 / n1 + s2 / n2) ** 2 / (
        (s1 / n1) ** 2 / (n1 - 1) + (s2 / n2) ** 2 / (n2 - 1)
)
sd_pooled = np.sqrt((s1 + s2) / 2)
d = (m1 - m2) / sd_pooled


report = (
    # f"An independent-samples Welch’s t-test showed that "
    # f"Group A (M = {m1:.2f}, SD = {sd1:.2f}) scored higher than "
    # f"Group B (M = {m2:.2f}, SD = {sd2:.2f}), "
    f"t({df:.2f}) = {t:.2f}, p {p}, d = {d:.2f}."
)
print(report)

t(936.43) = -1.44, p 0.15142143056957713, d = -0.06.


In [29]:
# rename_map = {
#     'SubGroup1': 'Satisfied',
#     'SubGroup2': 'Deprived_Me',
#     'SubGroup3': 'Deprived_Couples',
#     'SubGroup7': 'Deprived_Partner',
#     'SubGroup5': 'Saturated_Me',
#     'SubGroup6': 'Saturated_Couples',
#     'SubGroup8': 'Saturated_Partner',
#     'SubGroup4': 'Mixed_Couples',
#     'SubGroup9': 'Mixed_Couples'
# }

# rename_map = {
#     'SubGroup1': 'Satisfied',
#     'SubGroup2': 'Deprived_One',
#     'SubGroup3': 'Deprived_Couples',
#     'SubGroup7': 'Deprived_One',
#     'SubGroup5': 'Saturated_One',
#     'SubGroup6': 'Saturated_Couples',
#     'SubGroup8': 'Saturated_One',
#     'SubGroup4': 'Mixed_Couples',
#     'SubGroup9': 'Mixed_Couples'
# }

# df2['Group1'] = df2['Group1'].replace(rename_map)
# Satisfied
df2.loc[df2['Group3'] == 'Couple Satisfaction', 'Group4'] = 'Couple Satisfaction'

# Deprived groups
# df2.loc[df2['Group3'] == 'Couple Deprivation', 'Group4'] = 'Couple Deprivation'
df2.loc[df2['Group2'] == 'One-sided Deprivation', 'Group4'] = 'Deprived_One'
df2.loc[df2['Group1'] == 'SubGroup3', 'Group4'] = 'Deprived_Both'

# Saturated groups
# df2.loc[df2['Group3'] == 'Couple Saturation', 'Group4'] = 'Couple Saturation'
df2.loc[df2['Group2'] == 'One-sided Saturation', 'Group4'] = 'Saturated_One'
df2.loc[df2['Group1'] == 'SubGroup6', 'Group4'] = 'Saturated_Both'

df2.loc[df2['Group3'] == 'Couple Mixed', 'Group4'] = 'Couple Mixed'

In [30]:
df2['Group4'].value_counts()

Group4
Deprived_One           4940
Couple Satisfaction    3842
Deprived_Both          3402
Saturated_One           666
Couple Mixed            660
Saturated_Both           90
Name: count, dtype: int64

In [31]:
groups = [
    'Deprived_One',
    'Couple Satisfaction',
    'Deprived_Both',
    'Saturated_One',
    'Couple Mixed',
    'Saturated_Both'
]

traits = ['Neuroticism', 'Extraversion', 'Agreeableness', 'Conscientiousness', 'Openness',
          'Depressiveness', 'Loneliness', 'Self-esteem', 'Life Satisfaction', 'Health',
          "Communication Quality", "Relationship Satisfaction", "Conflict Management"]

stats = (
    df2
    .groupby('Group4')[traits]
    .agg(['mean', 'std'])
)
# print(stats)

for group in groups:
    # 1. Clean up the columns (optional, but makes things easier)
    df_tidy = stats.stack(level=0)  # Moves Neuroticism/Extraversion to the rows

    # 2. Select a specific group to display (e.g., 'Couple Satisfaction')
    satisfied_group = df_tidy.loc[group].copy()

    # 3. Rename columns for the final look
    satisfied_group.columns = ['Score', 'SD']

    # 4. Round to 2 decimal places as in your example
    satisfied_group = satisfied_group.round(2)
    print("--------------", group, "--------------")
    print(satisfied_group)

-------------- Deprived_One --------------
                           Score    SD
Agreeableness              10.97  1.95
Communication Quality      21.87  3.37
Conflict Management        23.40  3.11
Conscientiousness          10.97  2.13
Depressiveness              5.06  1.61
Extraversion                9.30  2.03
Health                      3.77  0.79
Life Satisfaction           7.86  1.57
Loneliness                  1.85  1.02
Neuroticism                 7.99  2.41
Openness                    9.93  2.23
Relationship Satisfaction   8.67  1.54
Self-esteem                11.44  2.43
-------------- Couple Satisfaction --------------
                           Score    SD
Agreeableness              11.10  1.95
Communication Quality      23.44  3.05
Conflict Management        24.35  2.91
Conscientiousness          11.06  2.11
Depressiveness              4.82  1.54
Extraversion                9.42  2.05
Health                      3.91  0.77
Life Satisfaction           8.20  1.51
Loneliness