In [56]:
import pandas as pd
import numpy as np
import pingouin as pg
import statsmodels.api as sm

In [57]:
df = pd.read_csv('source/FReDA4.csv')

In [58]:
print(df["Group3"].value_counts())

Group3
Couple Deprivation     8342
Couple Satisfaction    3842
Couple Saturation       756
Couple Mixed            660
Name: count, dtype: int64


In [59]:
traits = ['Neuroticism',
          'Extraversion',
          'Openness',
          'Agreeableness',
          'Conscientiousness',
          'Depressiveness',
          'Loneliness',
          'Self-esteem',
          'Life Satisfaction',
          'Health',
          'Married',
          'Cohabitation',
          'Kids',
          'Relationship Satisfaction',
          'Communication Quality',
          'Conflict Management'
          ]

In [60]:
df_clean = df.dropna(subset=traits).copy()

In [61]:
print(df_clean["Group3"].value_counts())

Group3
Couple Deprivation     7384
Couple Satisfaction    3334
Couple Saturation       645
Couple Mixed            556
Name: count, dtype: int64


In [62]:

target = 'Frequency'


def run_importance_regression(data, label):
    X = data[traits]
    y = data[target]

    # Standardize X to get 'Importance' (Z-scores)
    X_std = (X - X.mean()) / X.std()
    X_std = sm.add_constant(X_std)  # Add intercept

    model = sm.OLS(y, X_std).fit()
    # Return coefficients and p-values
    results = model.summary()
    print(results)
    results = pd.DataFrame({
        'Trait': traits,
        f'{label}_coef': model.params[1:],
        f'{label}_p-value': model.pvalues[1:]
    })
    return results


# 1. Overall Importance
overall_res = run_importance_regression(df_clean, 'Overall')

# 2. Per Group Importance
group_results = []
for group_name in df_clean['Group3'].unique():
    group_data = df_clean[df_clean['Group3'] == group_name]
    group_results.append(run_importance_regression(group_data, group_name))

# 3. Merge everything into one master comparison table
final_importance = overall_res
for res in group_results:
    final_importance = final_importance.merge(res, on='Trait')

print(final_importance)

                            OLS Regression Results                            
Dep. Variable:              Frequency   R-squared:                       0.316
Model:                            OLS   Adj. R-squared:                  0.315
Method:                 Least Squares   F-statistic:                     344.1
Date:                Tue, 17 Feb 2026   Prob (F-statistic):               0.00
Time:                        17:06:15   Log-Likelihood:                -17641.
No. Observations:               11919   AIC:                         3.532e+04
Df Residuals:                   11902   BIC:                         3.544e+04
Df Model:                          16                                         
Covariance Type:            nonrobust                                         
                                coef    std err          t      P>|t|      [0.025      0.975]
---------------------------------------------------------------------------------------------
const                 

In [65]:
import numpy as np
import statsmodels.api as sm

df_clean['is_deprived'] = (df_clean['Group3'] == "Couple Deprivation").astype(int)

y = df_clean['is_deprived']
X = df_clean[
    [
        'Frequency',
        'Desire',
        'Neuroticism',
        'Extraversion',
        'Openness',
        'Agreeableness',
        'Conscientiousness',
        'Depressiveness',
        'Loneliness',
        'Self-esteem',
        'Life Satisfaction',
        'Health',
        'Married',
        'Cohabitation',
        'Kids',
        'Relationship Satisfaction',
        'Communication Quality',
        'Conflict Management'
    ]
]
X = sm.add_constant(X)

# 2. Fit Logistic Regression
model = sm.Logit(y, X).fit()

# 3. Convert coefficients to Odds Ratios
or_results = pd.DataFrame({
    'Odds Ratio': np.exp(model.params),
    'Lower CI': np.exp(model.conf_int()[0]),
    'Upper CI': np.exp(model.conf_int()[1]),
    'p-value': model.pvalues
})

Optimization terminated successfully.
         Current function value: 0.456507
         Iterations 8
