In [21]:
import numpy as np
import pandas as pd
import seaborn as sns
import statsmodels.api as sm
from sklearn.preprocessing import StandardScaler
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.miscmodels.ordinal_model import OrderedModel


In [22]:
df = pd.read_csv('source/FReDA2.csv')

In [23]:
def val_to_category(val):
    if val < 0.5:
        return 0
    elif val < 2:
        return 1
    elif val < 3:
        return 2
    elif val < 4:
        return 3
    elif val < 5:
        return 4
    else:
        return 5


df['Anchor FreqCat'] = df['Anchor Frequency of Touch'].apply(val_to_category)
df['Anchor WishCat'] = df['Anchor Wish for Touch'].apply(val_to_category)
df['Partner FreqCat'] = df['Partner Frequency of Touch'].apply(val_to_category)
df['Partner WishCat'] = df['Partner Wish for Touch'].apply(val_to_category)
df['Couple FreqCat'] = df['Couple Frequency of Touch'].apply(val_to_category)
df['Couple WishCat'] = df['Couple Wish for Touch'].apply(val_to_category)

In [24]:
anchor_predictors = [
    'Anchor Difference of Touch',

    'Anchor Age',
    'Anchor Work Status',

    'Anchor Extraversion',
    'Anchor Agreeableness',
    'Anchor Conscientiousness',
    'Anchor Openness',
    'Anchor Neuroticism',
    'Anchor Conservatism',
    'Anchor Religiosity',

    'Anchor Depressiveness',
    'Anchor Loneliness',
    'Anchor Self-esteem',
    'Anchor Life Satisfaction',
    'Anchor Health',

    'Anchor Communication Quality',
    'Anchor Relationship Satisfaction',
    'Anchor Conflict Management',

    'Relationship Sex',
    'Relationship Length',
    'Age difference',
    'Married',
    'Cohabitation',
    'Kids',

]
partner_predictors = [
    'Partner Difference of Touch',

    'Partner Age',
    'Partner Work Status',
    'Partner East',
    'Partner Urbanization'

    'Partner Extraversion',
    'Partner Agreeableness',
    'Partner Conscientiousness',
    'Partner Openness',
    'Partner Neuroticism',
    'Partner Conservatism',
    'Partner Religiosity',

    'Partner Depressiveness',
    'Partner Loneliness',
    'Partner Self-esteem',
    'Partner Life Satisfaction',
    'Partner Health',
    'Partner Communication Quality',
    'Partner Relationship Satisfaction',
    'Partner Conflict Management',

    'Relationship Sex',
    'Relationship Length',
    'Age difference',
    'Married',
    'Cohabitation',
    'Kids',

]

In [25]:
anchor_dependent = [
    'Anchor FreqCat',
    'Anchor WishCat',
]
partner_dependent = [
    'Partner FreqCat',
    'Partner WishCat',
]
couple_dependent = [
    'Couple FreqCat',
    'Couple WishCat',
]

In [26]:
df['Group'].value_counts()

Group
Disagreement    3131
Agreement       1922
Concession      1746
Name: count, dtype: int64

In [27]:
df['SubGroup'].value_counts()

SubGroup
Disagreement                3131
Deprived Agreement          1701
High Frequency Agreement    1509
Low Frequency Agreement      405
Saturated Agreement           45
Non-Touch Agreement            8
Name: count, dtype: int64

In [28]:
df = df.dropna().reset_index(drop=True)

In [34]:
group_agreement = df[df['Group'] == 'Agreement'].dropna().reset_index(drop=True)
group_disagreement = df[df['Group'] == 'Disagreement'].dropna().reset_index(drop=True)
group_concession = df[df['Group'] == 'Concession'].dropna().reset_index(drop=True)

In [35]:
x = df[anchor_predictors]

for d in anchor_dependent:
    y = df[d]
    mod = OrderedModel(y, x, distr='logit')
    res = mod.fit(method='bfgs')
    print(res.summary())

    vif = pd.DataFrame()
    vif["VIF"] = [variance_inflation_factor(x.values, i) for i in range(x.shape[1])]
    vif["Variable"] = x.columns
    print(vif)

Optimization terminated successfully.
         Current function value: 1.167540
         Iterations: 71
         Function evaluations: 74
         Gradient evaluations: 74
                             OrderedModel Results                             
Dep. Variable:         Anchor FreqCat   Log-Likelihood:                -1467.6
Model:                   OrderedModel   AIC:                             2993.
Method:            Maximum Likelihood   BIC:                             3142.
Date:                Tue, 29 Jul 2025                                         
Time:                        15:15:46                                         
No. Observations:                1257                                         
Df Residuals:                    1228                                         
Df Model:                          24                                         
                                       coef    std err          z      P>|z|      [0.025      0.975]
----------------

In [10]:
# test_group = df[df['SubGroup'] == 'High Frequency Agreement']
# test_group = df[df['SubGroup'] == 'Low Frequency Agreement']
test_group = df[df['Group'] == 'Concession']
print(test_group['Id'].size)

1257


In [None]:
# https://support.minitab.com/en-us/minitab/help-and-how-to/statistical-modeling/regression/how-to/fit-binary-logistic-model/interpret-the-results/all-statistics-and-graphs/coefficients-and-regression-equation/
# VIF 	Multicollinearity
# VIF = 1 	None
# 1 < VIF < 5 	Moderate
# VIF > 5 	High

                                   Anchor Difference of Touch  \
Anchor Difference of Touch                           1.000000   
Partner Difference of Touch                          0.200685   
Anchor Age                                           0.053660   
Anchor Work Status                                  -0.060348   
Partner Age                                          0.063646   
Partner Work Status                                 -0.026638   
Anchor Extraversion                                 -0.039591   
Anchor Agreeableness                                -0.027117   
Anchor Conscientiousness                            -0.021775   
Anchor Openness                                     -0.032950   
Anchor Neuroticism                                   0.096741   
Anchor Conservatism                                  0.002718   
Partner Extraversion                                -0.000669   
Partner Agreeableness                               -0.060712   
Partner Conscientiousness

In [18]:


#TODO: Coeff to Odds Ratio


Optimization terminated successfully.
         Current function value: 1.341527
         Iterations: 61
         Function evaluations: 63
         Gradient evaluations: 63
                             OrderedModel Results                             
Dep. Variable:         Anchor FreqCat   Log-Likelihood:                -6423.2
Model:                   OrderedModel   AIC:                         1.293e+04
Method:            Maximum Likelihood   BIC:                         1.321e+04
Date:                Tue, 29 Jul 2025                                         
Time:                        14:25:00                                         
No. Observations:                4788                                         
Df Residuals:                    4745                                         
Df Model:                          38                                         
                                        coef    std err          z      P>|z|      [0.025      0.975]
---------------

KeyboardInterrupt: 