In [34]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor

In [35]:
df = pd.read_csv('source/FReDA2.csv')

In [36]:
df["Anchor Saturation of Touch"] = df["Anchor Saturation of Touch"] * -1
df["Partner Saturation of Touch"] = df["Partner Saturation of Touch"] * -1

In [37]:
touch_factors = [
    'Anchor Frequency of Touch',
    'Anchor Wish for Touch',

    'Partner Frequency of Touch',
    'Partner Wish for Touch',
]

relationship_touch_factors = [
    'Anchor Deprivation of Touch',
    'Anchor Saturation of Touch',

    'Partner Deprivation of Touch',
    'Partner Saturation of Touch'
]

individual_factors = [
    'Anchor Age',
    'Anchor Work Status',
    # 'Anchor East',
    # 'Anchor Urbanization',

    'Partner Age',
    'Partner Work Status',
    # 'Partner East',
    # 'Partner Urbanization'
]

personality_factors = [
    'Anchor Extraversion',
    'Anchor Agreeableness',
    'Anchor Conscientiousness',
    'Anchor Openness',
    'Anchor Neuroticism',
    'Anchor Conservatism',
    # 'Anchor Religiosity',

    'Partner Extraversion',
    'Partner Agreeableness',
    'Partner Conscientiousness',
    'Partner Openness',
    'Partner Neuroticism',
    'Partner Conservatism',
    # 'Partner Religiosity',
]

wellbeing_factors = [
    'Anchor Depressiveness',
    'Anchor Loneliness',
    'Anchor Self-esteem',
    'Anchor Life Satisfaction',
    'Anchor Health',

    'Partner Depressiveness',
    'Partner Loneliness',
    'Partner Self-esteem',
    'Partner Life Satisfaction',
    'Partner Health',
]

relationship_factors = [
    # 'Relationship Sex',
    'Relationship Length',
    # 'Age difference',
    'Married',
    'Cohabitation',
    'Kids',

    'Anchor Communication Quality',
    'Anchor Relationship Satisfaction',
    'Anchor Conflict Management',

    'Partner Communication Quality',
    'Partner Relationship Satisfaction',
    'Partner Conflict Management',
]

In [38]:
_agreement_group = df[
    (df['Anchor Perception of Touch'] == 'Just Enough')
    &
    (df['Partner Perception of Touch'] == 'Just Enough')
    ].reset_index(drop=True)

_disagreement_group = df[
    (
            (df['Anchor Perception of Touch'] == 'Saturated')
            &
            (df['Partner Perception of Touch'] == 'Saturated')
    )
    |
    (
            (df['Anchor Perception of Touch'] == 'Deprived')
            &
            (df['Partner Perception of Touch'] == 'Deprived')
    )
    ].reset_index(drop=True)

_mixed_group = df.merge(pd.concat([_agreement_group, _disagreement_group]), how='outer', indicator=True)
_mixed_group = _mixed_group[_mixed_group['_merge'] == 'left_only'].drop(columns=['_merge']).reset_index(drop=True)

In [39]:
def run_blockwise_regression(df, outcome_var, blocks):
    """
    Run block-wise regression on selected group and outcome variable.

    Parameters:
    - df: DataFrame containing your data
    - outcome_var: string, outcome column name
    - blocks: dict, where keys are block names and values are lists of variable names
    - group: optional string (e.g., "Agreement") to subset by 'Group' column

    Returns:
    - List of fitted models, one for each block addition
    """

    # Drop missing values
    all_vars = sum(blocks.values(), []) + [outcome_var]
    df = df.dropna(subset=all_vars)

    y = df[outcome_var]
    models = []

    current_predictors = []
    for block_name, predictors in blocks.items():
        current_predictors += predictors

        # Scale predictors
        scaler = StandardScaler()
        X_block = pd.DataFrame(scaler.fit_transform(df[current_predictors]), columns=current_predictors, index=df.index)
        X_block = sm.add_constant(X_block)

        model = sm.OLS(y, X_block).fit()
        print(f"\n--- {block_name.upper()} BLOCK ---")
        print(model.summary())
        models.append((block_name, model))

        # Calculate and display VIF
        vif_data = pd.DataFrame()
        vif_data["feature"] = X_block.columns
        vif_data["VIF"] = [variance_inflation_factor(X_block.values, i) for i in range(X_block.shape[1])]
        # print("\n--- Variance Inflation Factors ---")
        # print(vif_data)

    return models

blocks = {
    "Individual": individual_factors,
    "Personality": personality_factors,
    "Wellbeing": wellbeing_factors,
    "Relationship": relationship_factors
}

models_agreement = run_blockwise_regression(_disagreement_group, 'Anchor Saturation of Touch', blocks)


--- INDIVIDUAL BLOCK ---
                                OLS Regression Results                                
Dep. Variable:     Anchor Saturation of Touch   R-squared:                       0.007
Model:                                    OLS   Adj. R-squared:                  0.002
Method:                         Least Squares   F-statistic:                     1.287
Date:                        Wed, 16 Jul 2025   Prob (F-statistic):              0.273
Time:                                15:37:46   Log-Likelihood:                -334.15
No. Observations:                         695   AIC:                             678.3
Df Residuals:                             690   BIC:                             701.0
Df Model:                                   4                                         
Covariance Type:                    nonrobust                                         
                          coef    std err          t      P>|t|      [0.025      0.975]
----------------