In [3]:
import pandas as pd
from scipy.stats import ttest_ind
import numpy as np
from scipy import stats
from statsmodels.stats.power import FTestAnovaPower

In [5]:
data = pd.read_csv('data/all_weights.csv')
data.columns

Index(['Effort', 'Helpfulness', 'Trustworthy', 'Anger', 'Online', 'Empathy',
       'Familiarity', 'group', 'EmpathyBot', 'AngerInducement', 'WithAnger',
       'Q1.2', 'Q1.3', 'Q1.4', 'Q2.1', 'Q2.2', 'Q2.3', 'Q1_2', 'Q1_3', 'Q1_4',
       'Q2_1', 'Q2_2', 'Q2_3', 'ComprehensionCount'],
      dtype='object')

In [6]:
data['systemizing'] = data['EmpathyBot'].map({
    'NonEmpathy': 1,
    'FAQ': -1,
    'Empathy': 0
})
data['empathizing'] = data['EmpathyBot'].map({
    'Empathy': 1,
    'NonEmpathy': -1,
    'FAQ': 0
})

In [7]:
def find_mean_var_total(data, dependent_var, condition):
    mean = data[dependent_var][data[condition].isin([1, -1])].mean()
    var = data[dependent_var][data[condition].isin([1, -1])].var()
    print(f"{dependent_var} - {condition} Mean: {mean:.2f}, Variance: {var:.2f}")
    return mean, var

In [8]:
for dependent_var in ['Effort', 'Helpfulness', 'Trustworthy']:
    for condition in ['systemizing', 'empathizing']:
        find_mean_var_total(data, dependent_var, condition)


Effort - systemizing Mean: 9.49, Variance: 19.98
Effort - empathizing Mean: 7.92, Variance: 15.35
Helpfulness - systemizing Mean: 15.70, Variance: 8.74
Helpfulness - empathizing Mean: 16.12, Variance: 7.92
Trustworthy - systemizing Mean: 16.09, Variance: 13.18
Trustworthy - empathizing Mean: 16.03, Variance: 14.34


In [9]:
## Find mean and variance
def find_mean_var(data, dependent_var, condition):
    positive_mean = data[dependent_var][data[condition] == 1].mean()
    positive_var = data[dependent_var][data[condition] == 1].var()
    negative_mean = data[dependent_var][data[condition] == -1].mean()
    negative_var = data[dependent_var][data[condition] == -1].var()
    print(f"{dependent_var} - {condition} Positive Mean: {positive_mean:.2f}, Positive Variance: {positive_var:.2f}, Negative Mean: {negative_mean:.2f}, Negative Variance: {negative_var:.2f}")
    return positive_mean, positive_var, negative_mean, negative_var


In [10]:
for dependent_var in ['Effort', 'Helpfulness', 'Trustworthy']:
    for condition in ['systemizing', 'empathizing']:
        find_mean_var(data, dependent_var, condition)



Effort - systemizing Positive Mean: 8.07, Positive Variance: 16.36, Negative Mean: 10.90, Negative Variance: 19.75
Effort - empathizing Positive Mean: 7.78, Positive Variance: 14.50, Negative Mean: 8.07, Negative Variance: 16.36
Helpfulness - systemizing Positive Mean: 15.67, Positive Variance: 11.13, Negative Mean: 15.74, Negative Variance: 6.48
Helpfulness - empathizing Positive Mean: 16.56, Positive Variance: 4.51, Negative Mean: 15.67, Negative Variance: 11.13
Trustworthy - systemizing Positive Mean: 15.63, Positive Variance: 12.74, Negative Mean: 16.53, Negative Variance: 13.35
Trustworthy - empathizing Positive Mean: 16.40, Positive Variance: 15.75, Negative Mean: 15.63, Negative Variance: 12.74


In [11]:
data['EmpathyBot'].unique()


array(['NonEmpathy', 'FAQ', 'Empathy'], dtype=object)

In [12]:
## Find mean and variance when condition is EmpathyBot
def find_mean_var_empathybot(data, dependent_var):
    level1_mean = data[dependent_var][data['EmpathyBot'] == 'NonEmpathy'].mean()
    level1_var = data[dependent_var][data['EmpathyBot'] == 'NonEmpathy'].var()
    level2_mean = data[dependent_var][data['EmpathyBot'] == 'FAQ'].mean()
    level2_var = data[dependent_var][data['EmpathyBot'] == 'FAQ'].var()
    level3_mean = data[dependent_var][data['EmpathyBot'] == 'Empathy'].mean()
    level3_var = data[dependent_var][data['EmpathyBot'] == 'Empathy'].var()
    print(f"{dependent_var} - EmpathyBot Level 1 Mean: {level1_mean:.2f}, Level 1 Variance: {level1_var:.2f}")
    print(f"Level 2 Mean: {level2_mean:.2f}, Level 2 Variance: {level2_var:.2f}")
    print(f"Level 3 Mean: {level3_mean:.2f}, Level 3 Variance: {level3_var:.2f}")
    return level1_mean, level1_var, level2_mean, level2_var, level3_mean, level3_var


In [13]:
for dependent_var in ['Effort', 'Helpfulness', 'Trustworthy']:
    find_mean_var_empathybot(data, dependent_var)


Effort - EmpathyBot Level 1 Mean: 8.07, Level 1 Variance: 16.36
Level 2 Mean: 10.90, Level 2 Variance: 19.75
Level 3 Mean: 7.78, Level 3 Variance: 14.50
Helpfulness - EmpathyBot Level 1 Mean: 15.67, Level 1 Variance: 11.13
Level 2 Mean: 15.74, Level 2 Variance: 6.48
Level 3 Mean: 16.56, Level 3 Variance: 4.51
Trustworthy - EmpathyBot Level 1 Mean: 15.63, Level 1 Variance: 12.74
Level 2 Mean: 16.53, Level 2 Variance: 13.35
Level 3 Mean: 16.40, Level 3 Variance: 15.75


In [14]:
def find_mean_var_empathybot_total(data, dependent_var):
    mean = data[dependent_var].mean()
    var = data[dependent_var].var()
    print(f"{dependent_var} - EmpathyBot Mean: {mean:.2f}, Variance: {var:.2f}")
    return mean, var


In [15]:
for dependent_var in ['Effort', 'Helpfulness', 'Trustworthy']:
    find_mean_var_empathybot_total(data, dependent_var)


Effort - EmpathyBot Mean: 8.91, Variance: 18.72
Helpfulness - EmpathyBot Mean: 15.99, Variance: 7.45
Trustworthy - EmpathyBot Mean: 16.19, Variance: 14.02


In [16]:
## Function to test type 1 error
def test_type_1_error(data, dependent_var, condition, alpha=0.05, n_simulations=1000):
    # Get the null distribution (when H0 is true - no difference between groups)
    null_data = data[data[condition].isin([1, -1])][dependent_var].to_numpy()
    type_1_errors = 0
    
    for _ in range(n_simulations):
        # Randomly split the data into two groups
        shuffled_data = null_data.copy()
        np.random.shuffle(shuffled_data)
        group1 = shuffled_data[:len(shuffled_data)//2]
        group2 = shuffled_data[len(shuffled_data)//2:]
        
        # Perform t-test
        t_stat, p_value = ttest_ind(group1, group2, equal_var=False)
        
        # Count type 1 errors (rejecting H0 when it's true)
        if p_value < alpha:
            type_1_errors += 1
    
    type_1_error_rate = type_1_errors / n_simulations
    print(f"{dependent_var} - {condition} Type 1 Error Rate: {type_1_error_rate:.3f}")
    return type_1_error_rate


In [17]:
# Function to test type 2 error
def test_type_2_error_bootstrap(data, dependent_var, condition, alpha=0.05, n_simulations=1000):
    """Bootstrap approach - resamples from actual data distribution"""
    # Get original groups
    group1_data = data[data[condition] == -1][dependent_var].to_numpy()
    group2_data = data[data[condition] == 1][dependent_var].to_numpy()
    
    # Calculate original effect size
    pooled_std = np.sqrt((group1_data.var() + group2_data.var()) / 2)
    observed_effect_size = abs(group1_data.mean() - group2_data.mean()) / pooled_std
    
    type_2_errors = 0
    for _ in range(n_simulations):
        # Resample with replacement from original data
        bootstrap_group1 = np.random.choice(group1_data, size=len(group1_data), replace=True)
        bootstrap_group2 = np.random.choice(group2_data, size=len(group2_data), replace=True)
        
        # Perform t-test
        t_stat, p_value = ttest_ind(bootstrap_group1, bootstrap_group2, equal_var=False)
        
        if p_value >= alpha:
            type_2_errors += 1
    
    type_2_error_rate = type_2_errors / n_simulations
    print(f"{dependent_var} - {condition} Estimated Type 2 Error Rate (Bootstrap): {type_2_error_rate:.3f}")
    return type_2_error_rate

In [18]:
for dependent_var in ['Effort', 'Helpfulness', 'Trustworthy']:
    for condition in ['systemizing', 'empathizing']:
        test_type_1_error(data, dependent_var, condition)
        test_type_2_error_bootstrap(data, dependent_var, condition)



Effort - systemizing Type 1 Error Rate: 0.059
Effort - systemizing Estimated Type 2 Error Rate (Bootstrap): 0.008
Effort - empathizing Type 1 Error Rate: 0.034
Effort - empathizing Estimated Type 2 Error Rate (Bootstrap): 0.915
Helpfulness - systemizing Type 1 Error Rate: 0.047
Helpfulness - systemizing Estimated Type 2 Error Rate (Bootstrap): 0.953
Helpfulness - empathizing Type 1 Error Rate: 0.051
Helpfulness - empathizing Estimated Type 2 Error Rate (Bootstrap): 0.406
Trustworthy - systemizing Type 1 Error Rate: 0.054
Trustworthy - systemizing Estimated Type 2 Error Rate (Bootstrap): 0.584
Trustworthy - empathizing Type 1 Error Rate: 0.049
Trustworthy - empathizing Estimated Type 2 Error Rate (Bootstrap): 0.708


In [19]:
def test_type_1_error_total(data, dependent_var, n_simulations=1000, alpha=0.05):
    null_data = data[dependent_var].to_numpy()
    type_1_errors = 0

    for _ in range(n_simulations):
        shuffled_data = null_data.copy()
        np.random.shuffle(shuffled_data)
        group1 = shuffled_data[:len(shuffled_data)//2]
        group2 = shuffled_data[len(shuffled_data)//2:]
    
        t_stat, p_value = ttest_ind(group1, group2, equal_var=False)

        if p_value < alpha:
            type_1_errors += 1
    
    type_1_error_rate = type_1_errors / n_simulations
    print(f"{dependent_var} - Total Type 1 Error Rate: {type_1_error_rate:.3f}")
    return type_1_error_rate


In [20]:
def test_type_2_error_main_effect(data, dependent_var, factor='EmpathyBot', alpha=0.05, n_simulations=1000):
    """
    Calculate Type 2 error rate for main effect of a factor with 3 levels
    factor: Factor with 3 levels (e.g., 'NonEmpathy', 'FAQ', 'Empathy')
    """
    # Get original groups
    groups = [group[dependent_var].to_numpy() for name, group in data.groupby(factor)]
    
    # Calculate original F-statistic
    f_stat, p_val = stats.f_oneway(*groups)
    
    type_2_errors = 0
    
    for _ in range(n_simulations):
        # Bootstrap sample with replacement
        bootstrap_groups = []
        for group in groups:
            bootstrap_group = np.random.choice(group, size=len(group), replace=True)
            bootstrap_groups.append(bootstrap_group)
        
        # Perform one-way ANOVA
        _, p_value = stats.f_oneway(*bootstrap_groups)
        
        # Count Type 2 errors (failing to reject H0 when there is an effect)
        if p_value >= alpha:
            type_2_errors += 1
    
    type_2_error_rate = type_2_errors / n_simulations
    power = 1 - type_2_error_rate
    
    print(f"\n{dependent_var} - {factor} Main Effect:")
    print(f"Type 2 Error Rate: {type_2_error_rate:.3f}")
    print(f"Statistical Power: {power:.3f}")
    
    return type_2_error_rate, power



In [21]:
for dependent_var in ['Effort', 'Helpfulness', 'Trustworthy']:
    test_type_1_error_total(data, dependent_var)
    test_type_2_error_main_effect(data, dependent_var)


Effort - Total Type 1 Error Rate: 0.049

Effort - EmpathyBot Main Effect:
Type 2 Error Rate: 0.000
Statistical Power: 1.000
Helpfulness - Total Type 1 Error Rate: 0.053

Helpfulness - EmpathyBot Main Effect:
Type 2 Error Rate: 0.368
Statistical Power: 0.632
Trustworthy - Total Type 1 Error Rate: 0.041

Trustworthy - EmpathyBot Main Effect:
Type 2 Error Rate: 0.658
Statistical Power: 0.342


In [22]:
# power analysis
def simple_power_analysis(design_type, alpha=0.05, power=0.8):
    """
    Calculate required sample sizes for different effect sizes
    design_type: either '2x2' (4 groups) or '3x2' (6 groups)
    """
    # Initialize power analysis
    power_analysis = FTestAnovaPower()
    
    # Set effect sizes (Cohen's f)
    effect_sizes = {
        'small': 0.10,
        'medium': 0.25,
        'large': 0.40
    }
    
    # Set number of groups
    groups = 4 if design_type == '2x2' else 6
    
    # Calculate required sample sizes
    results = {}
    for effect_name, f in effect_sizes.items():
        n = power_analysis.solve_power(
            effect_size=f,
            power=power,
            alpha=alpha,
            k_groups=groups
        )
        results[effect_name] = {
            'total_n': int(np.ceil(n)),
            'n_per_group': int(np.ceil(n/groups))
        }
    
    print(f"\nRequired Sample Sizes for {design_type} Design:")
    print(f"{'Effect Size':<12} {'Total N':<10} {'N per Group':<12}")
    print("-" * 34)
    for effect, ns in results.items():
        print(f"{effect:<12} {ns['total_n']:<10} {ns['n_per_group']:<12}")
    
    return results

In [23]:
for design in ['2x2', '3x2']:
    simple_power_analysis(design)


Required Sample Sizes for 2x2 Design:
Effect Size  Total N    N per Group 
----------------------------------
small        1095       274         
medium       179        45          
large        73         19          

Required Sample Sizes for 3x2 Design:
Effect Size  Total N    N per Group 
----------------------------------
small        1289       215         
medium       211        36          
large        86         15          
