In [24]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scripts.stats import (test_moderation, interpret_moderation,
                           interpret_direct_effect, format_effect_with_stars)
import processing.scales as scales
import warnings
warnings.filterwarnings('ignore')

# define variable names
tia_scales = scales.tia_scales
hcsds_scales = scales.hcsds_scales
ati_scales = scales.ati_scales
manip_check_scales = scales.manip_check_scales

scale_titles = scales.scale_titles

# TODO: select which variables from OLS regression are interesting
res_of_interest = ['b_interaction', 'se_interaction']

# Load data
data = pd.read_csv('../data/data_scales.csv')
print(f"Total sample size: {len(data)}")
print(f"\nGroup distribution:")
print(data['stimulus_group'].value_counts())

Total sample size: 255

Group distribution:
stimulus_group
1    129
0    126
Name: count, dtype: int64


## Data Preparation

### Variable Centering and Coding

We prepare variables for moderation analysis:

1. **Effect code treatment**: stimulus_group as -0.5 (control) and 0.5 (uncertainty)
2. **Standardize continuous variables**: For better comparison of beta values between variables
3. **Effect code categorical variables**: For symmetric interpretation

In [25]:
# 1. Effect code treatment: control = -0.5, uncertainty = 0.5
data['group_effect'] = data['stimulus_group'] - 0.5

# 2. Normalize all continuous variables
continuous_vars = hcsds_scales + ati_scales + tia_scales + ['age', 'page_submit']

for var in continuous_vars:
    data[f'{var}_c'] = (data[var] - data[var].mean())/data[var].std()

# 3. Effect code gender: male (1) = 0.5, female (2) = -0.5, "other/prefer not to say" (3) = 0
data['gender_c'] = data['gender'].map({1: 0.5, 2: -0.5, 3: 0})

# 4. Mean-center ordinal variables (education, AI experience)
data['education_c'] = data['education'] - data['education'].mean()
data['Q19_c'] = data['Q19'] - data['Q19'].mean()

print(f"Prepared {len(data)} observations for analysis")
print(f"Continuous moderators: {len(continuous_vars)}")
print(f"Total moderators to test: {len(continuous_vars) + 3}")  # + gender, education, Q19

Prepared 255 observations for analysis
Continuous moderators: 11
Total moderators to test: 14


## Moderation analysis

In [36]:
# select variables
independent = 'group_effect'
dependent = tia_scales
moderators = hcsds_scales + ati_scales + ['age_c', 'gender_c', 'education_c', 'Q19_c', 'page_submit']

moderation_results = []

for mod in moderators:
    if mod in scale_titles:
        mod_name = scale_titles[mod]
    else:
        mod_name = mod

    for dep in dependent:
        d_temp = data.copy()
        d_temp['interaction'] = d_temp[independent] * d_temp[mod]
        formula = f'{dep} ~ {independent} + {mod} + interaction'

        # fit model
        model = smf.ols(formula, data=d_temp).fit()

        # append interesting results
        # for key in


TypeError: 'RegressionResultsWrapper' object is not iterable