In [39]:
import pandas as pd
import numpy as np
import pingouin as pg
import statsmodels.api as sm

In [40]:
df = pd.read_csv('source/FReDA4.csv')

In [41]:
print(df["Group3"].value_counts())

Group3
Couple Deprivation     8342
Couple Satisfaction    3842
Couple Saturation       756
Couple Mixed            660
Name: count, dtype: int64


In [42]:
# traits = ['Neuroticism',
#           'Extraversion',
#           'Openness',
#           'Agreeableness',
#           'Conscientiousness',
#           'Depressiveness',
#           'Loneliness',
#           'Self-esteem',
#           'Life Satisfaction',
#           'Health',
#           'Married',
#           'Cohabitation',
#           'Kids',
#           'Relationship Satisfaction',
#           'Communication Quality',
#           'Conflict Management'
#           ]
traits = [
    'Frequency',
        'Desire',
        'Neuroticism',
        'Extraversion',
        'Openness',
        'Agreeableness',
        'Conscientiousness',
        'Depressiveness',
        'Loneliness',
        'Self-esteem',
        'Life Satisfaction',
        'Health',
        'Age',
        'Sex',
        'Married',
        'Cohabitation',
        'Kids',
        'Relationship Satisfaction',
        'Communication Quality',
        'Conflict Management'
]

In [43]:
df_clean = df.dropna(subset=traits).copy()

In [44]:
print(df_clean["Group3"].value_counts())

Group3
Couple Deprivation     7376
Couple Satisfaction    3326
Couple Saturation       641
Couple Mixed            555
Name: count, dtype: int64


In [45]:

target = 'Frequency'


def run_importance_regression(data, label):
    X = data[traits]
    y = data[target]

    # Standardize X to get 'Importance' (Z-scores)
    X_std = (X - X.mean()) / X.std()
    X_std = sm.add_constant(X_std)  # Add intercept

    model = sm.OLS(y, X_std).fit()
    # Return coefficients and p-values
    results = model.summary()
    print(results)
    results = pd.DataFrame({
        'Trait': traits,
        f'{label}_coef': model.params[1:],
        f'{label}_p-value': model.pvalues[1:]
    })
    return results


# 1. Overall Importance
overall_res = run_importance_regression(df_clean, 'Overall')

# 2. Per Group Importance
group_results = []
for group_name in df_clean['Group3'].unique():
    group_data = df_clean[df_clean['Group3'] == group_name]
    group_results.append(run_importance_regression(group_data, group_name))

# 3. Merge everything into one master comparison table
final_importance = overall_res
for res in group_results:
    final_importance = final_importance.merge(res, on='Trait')

print(final_importance)

                            OLS Regression Results                            
Dep. Variable:              Frequency   R-squared:                       1.000
Model:                            OLS   Adj. R-squared:                  1.000
Method:                 Least Squares   F-statistic:                 4.642e+30
Date:                Wed, 18 Feb 2026   Prob (F-statistic):               0.00
Time:                        12:01:28   Log-Likelihood:             3.6221e+05
No. Observations:               11898   AIC:                        -7.244e+05
Df Residuals:                   11877   BIC:                        -7.242e+05
Df Model:                          20                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

In [50]:
import numpy as np
import statsmodels.api as sm

df_clean['is_satisfied'] = (df_clean['Group3'] == "Couple Satisfaction").astype(int)
df_clean['is_deprived'] = (df_clean['Group3'] == "Couple Deprivation").astype(int)
df_clean['is_saturated'] = (df_clean['Group3'] == "Couple Saturation").astype(int)
df_clean['is_mixed'] = (df_clean['Group3'] == "Couple Mixed").astype(int)

# y = df_clean['is_satisfied']
# y = df_clean['is_deprived']
# y = df_clean['is_saturated']
y = df_clean['is_mixed']

X = df_clean[
    [
        'Frequency',
        'Desire',
        'Neuroticism',
        'Extraversion',
        'Openness',
        'Agreeableness',
        'Conscientiousness',
        'Depressiveness',
        'Loneliness',
        'Self-esteem',
        'Life Satisfaction',
        'Health',
        'Relationship Satisfaction',
        'Communication Quality',
        'Conflict Management',
        'Age',
        'Sex',
        'Married',
        'Cohabitation',
        'Kids',
    ]
]
X = sm.add_constant(X)

# 2. Fit Logistic Regression
model = sm.Logit(y, X).fit()

# # 3. Convert coefficients to Odds Ratios
# or_results = pd.DataFrame({
#     'Odds Ratio': np.exp(model.params),
#     'Lower CI': np.exp(model.conf_int()[0]),
#     'Upper CI': np.exp(model.conf_int()[1]),
#     'p-value': model.pvalues
# })

params = model.params
conf = model.conf_int()
pvalues = model.pvalues

# 2. Convert to Odds Ratios
df_results = pd.DataFrame({
    'AOR': np.exp(params),
    'Lower CI': np.exp(conf[0]),
    'Upper CI': np.exp(conf[1]),
    'p_value': pvalues
})


# 3. Create the "AOR (95% CI)" string column
def format_aor(row):
    return f"{row['AOR']:.2f} ({row['Lower CI']:.2f}–{row['Upper CI']:.2f})"


df_results['AOR (95% CI)'] = df_results.apply(format_aor, axis=1)


# 4. Add Significance Stars
def add_stars(p):
    if p < 0.001: return '***'
    if p < 0.01: return '**'
    if p < 0.05: return '*'
    return 'ns'


df_results['Sig'] = df_results['p_value'].apply(add_stars)

# 5. Clean up for the final table
# Dropping the intercept (const) as it's usually not reported in this table style
final_table = df_results.drop('const', errors='ignore')[['AOR (95% CI)', 'p_value', 'Sig']]

# Optional: Rename the index for a cleaner look
final_table.index.name = 'Predictor'

print("Table 1. Predictors of Couple Deprivation")
print(final_table)

# To export to Excel:
# final_table.to_excel("Tukey_Regression_Results.xlsx")

Optimization terminated successfully.
         Current function value: 0.174544
         Iterations 8
Table 1. Predictors of Couple Deprivation
                               AOR (95% CI)       p_value  Sig
Predictor                                                     
Frequency                  1.62 (1.45–1.81)  6.092142e-18  ***
Desire                     0.48 (0.43–0.53)  5.840005e-49  ***
Neuroticism                0.99 (0.94–1.04)  6.737207e-01   ns
Extraversion               1.04 (0.99–1.09)  1.446422e-01   ns
Openness                   1.01 (0.97–1.05)  7.133889e-01   ns
Agreeableness              1.02 (0.97–1.06)  5.360219e-01   ns
Conscientiousness          0.99 (0.95–1.03)  6.549502e-01   ns
Depressiveness             1.09 (1.01–1.18)  2.054247e-02    *
Loneliness                 1.23 (1.11–1.35)  2.976690e-05  ***
Self-esteem                1.00 (0.95–1.05)  9.744344e-01   ns
Life Satisfaction          1.02 (0.95–1.09)  5.665988e-01   ns
Health                     0.93 (0.82