In [None]:
import numpy as np
import pandas as pd
import statsmodels.api as sm
from statsmodels.formula.api import logit
from tqdm import tqdm
import matplotlib.pyplot as plt
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

def generate_candidate_pair_data(n_pairs_per_treatment):
    """
    Generate realistic paired comparison data based on actual hiring statistics
    
    Parameters:
    -----------
    n_pairs_per_treatment : int
        Number of candidate pairs per treatment condition
        
    Returns:
    --------
    List of candidate pairs with attribute differences
    """
    
    pairs = []
    
    for pair_id in range(n_pairs_per_treatment):
        # Generate two candidates based on actual hiring data distributions
        candidates = []
        
        for candidate_num in range(2):
            # Basic demographics (from summary stats)
            female = np.random.binomial(1, 0.5)
            
            # Internship experience
            internship_exp = np.random.binomial(1, 0.483)
            
            # Certificates (0-4, following actual distribution)
            certificates = np.random.choice(range(5), p=[0.1, 0.25, 0.25, 0.25, 0.15])
            
            # University ranking (simplified to numeric for easier difference calculation)
            # Higher values = better universities, based on regression coefficients
            university_values = {
                'etc': 0,  # baseline
                'domestic_top14_22': 0.897,
                'international_middle_low': 1.136,
                'domestic_top10_13': 1.309,
                'domestic_top9_female': 1.400,
                'domestic_top6_8': 1.881,
                'domestic_top4_5': 2.348,
                'international_high': 2.525,
                'domestic_top3': 3.525
            }
            university_categories = list(university_values.keys())
            university_probs = [0.35, 0.05, 0.02, 0.18, 0.02, 0.15, 0.12, 0.03, 0.08]
            university_cat = np.random.choice(university_categories, p=university_probs)
            university_score = university_values[university_cat]
            
            # GPA (from summary stats, no quadratic term)
            gpa = np.random.normal(80.934, 7.489)
            gpa = np.clip(gpa, 36.4, 100.0)
            
            candidates.append({
                'female': female,
                'internship_exp': internship_exp,
                'certificates': certificates,
                'university_score': university_score,
                'gpa': gpa
            })
        
        # Calculate differences (Candidate 1 - Candidate 2)
        delta_gender = (1 - candidates[0]['female']) - (1 - candidates[1]['female'])  # male=1, female=0
        delta_internship_exp = candidates[0]['internship_exp'] - candidates[1]['internship_exp']
        delta_certificates = candidates[0]['certificates'] - candidates[1]['certificates']
        delta_university = candidates[0]['university_score'] - candidates[1]['university_score']
        delta_gpa = candidates[0]['gpa'] - candidates[1]['gpa']
        
        pairs.append({
            'pair_id': pair_id,
            'delta_gender': delta_gender,  # 1 if first is male and second is female, -1 if opposite, 0 if same
            'delta_internship_exp': delta_internship_exp,
            'delta_certificates': delta_certificates,
            'delta_university': delta_university,
            'delta_gpa': delta_gpa,
            'candidate1': candidates[0],
            'candidate2': candidates[1]
        })
    
    return pairs

def calculate_selection_probability(pair_data, treatment, is_individualism, effect_sizes, participant_random_effect):
    """
    Calculate probability of selecting first candidate based on attribute differences
    Using coefficients adapted from actual hiring data for paired comparison
    """
    
    # Base coefficients adapted for paired comparison
    # Baseline gender bias corresponds to OR=1.72 (log(1.72) ≈ 0.542)
    coefficients = {
        'intercept': 0,  # No intercept in paired comparison (symmetric)
        'gender': 1.519,  # OR=1.72 baseline gender bias (updated from 1.519)
        'internship_exp': 0.360,
        'certificates': 0.169,
        'university': 1.0,  # Coefficient per unit university score difference
        'gpa': 0.335/10   # Original was per unit GPA, adjusted for typical GPA differences
    }
    
    # Base log odds from attribute differences
    log_odds = coefficients['intercept']
    log_odds += coefficients['gender'] * pair_data['delta_gender']
    log_odds += coefficients['internship_exp'] * pair_data['delta_internship_exp']
    log_odds += coefficients['certificates'] * pair_data['delta_certificates']
    log_odds += coefficients['university'] * pair_data['delta_university']
    log_odds += coefficients['gpa'] * pair_data['delta_gpa']
    
    # Add consistent participant random effect
    log_odds += participant_random_effect
    
    # Treatment effects on gender preference (2-way interactions)
    if treatment == 'T0_unfair':
        # H1B: Unfair AI amplifies gender bias
        log_odds += effect_sizes['unfair_gender_interaction'] * pair_data['delta_gender']
    elif treatment == 'T1_group_fair':
        # H1A: Group fairness AI reduces gender bias
        log_odds += effect_sizes['group_fair_gender_interaction'] * pair_data['delta_gender']
    elif treatment == 'T2_individual_fair':
        # H1A: Individual fairness AI reduces gender bias
        log_odds += effect_sizes['individual_fair_gender_interaction'] * pair_data['delta_gender']
    
    # 3-way interactions (culture moderates AI effects on gender bias)
    if treatment == 'T0_unfair' and is_individualism:
        # Culture effect on unfair AI (no specific hypothesis)
        log_odds += effect_sizes['culture_unfair_gender'] * pair_data['delta_gender']
    elif treatment == 'T1_group_fair' and is_individualism:
        # H3B: In individualist cultures, group fairness may be less effective
        log_odds += effect_sizes['culture_group_gender'] * pair_data['delta_gender']
    elif treatment == 'T2_individual_fair' and is_individualism:
        # H3A: In individualist cultures, individual fairness is more effective
        log_odds += effect_sizes['culture_individual_gender'] * pair_data['delta_gender']
    
    # Convert to probability
    prob = 1 / (1 + np.exp(-log_odds))
    return np.clip(prob, 1e-10, 1-1e-10)

def generate_study1_data(n_participants, pairs_per_treatment, effect_sizes):
    """
    Generate data for Study 1 with consistent participant random effects
    """
    data_rows = []
    treatments = ['control', 'T0_unfair', 'T1_group_fair', 'T2_individual_fair']
    
    # Generate participant random effects (consistent across all conditions for each participant)
    sigma_within = np.pi**2 / 3 
    target_icc = 0.25
    sigma_participant = np.sqrt(target_icc * sigma_within / (1 - target_icc)) 

    participant_random_effects = {}
    for participant_id in range(n_participants):
        participant_random_effects[participant_id] = np.random.normal(0, sigma_participant)
    
    for participant_id in range(n_participants):
        # Assign participant's cultural orientation (fixed for participant)
        is_individualism = np.random.binomial(1, 0.5)
        
        # Each participant evaluates pairs in all treatment conditions
        for treatment in treatments:
            # Generate candidate pairs for this treatment
            pairs = generate_candidate_pair_data(pairs_per_treatment)
            
            for pair in pairs:
                # Calculate selection probability using consistent random effect
                prob = calculate_selection_probability(
                    pair, treatment, is_individualism, effect_sizes,
                    participant_random_effects[participant_id]
                )
                
                # Generate binary choice (1 = select first candidate, 0 = select second)
                y = np.random.binomial(1, prob)
                
                data_rows.append({
                    'y': y,
                    'delta_gender': pair['delta_gender'],
                    'delta_internship_exp': pair['delta_internship_exp'],
                    'delta_certificates': pair['delta_certificates'],
                    'delta_university': pair['delta_university'],
                    'delta_gpa': pair['delta_gpa'],
                    'treatment': treatment,
                    'is_individualism': is_individualism,
                    'participant_id': participant_id,
                    'pair_id': f"{participant_id}_{treatment}_{pair['pair_id']}"
                })
    
    return pd.DataFrame(data_rows)

def test_hypothesis_h1a(df, alpha=0.05):
    """Test H1A: Fair AI reduces gender bias (group + individual fairness vs control)"""
    try:
        # Create treatment dummy variables
        df_test = pd.get_dummies(df, columns=['treatment'], prefix='T', drop_first=False)
        df_test = df_test.drop(columns=['T_control'])  # Control as reference
        
        # Convert to float
        for col in ['T_T0_unfair', 'T_T1_group_fair', 'T_T2_individual_fair']:
            df_test[col] = df_test[col].astype(float)
        
        formula = """
            y ~ delta_gender + delta_internship_exp + delta_certificates + 
              delta_university + delta_gpa +
              T_T0_unfair + T_T1_group_fair + T_T2_individual_fair + 
              delta_gender:T_T0_unfair + delta_gender:T_T1_group_fair + delta_gender:T_T2_individual_fair
        """

        model = logit(formula, data=df_test).fit(
            cov_type='cluster', 
            cov_kwds={'groups': df_test['participant_id']},
            disp=0, maxiter=100
        )
        
        # Test if fair AI reduces bias (negative interactions)
        fair_interactions = ['delta_gender:T_T1_group_fair', 'delta_gender:T_T2_individual_fair']
        p_vals = [model.pvalues.get(term, 1.0) for term in fair_interactions if term in model.pvalues]
        coefficients = [model.params.get(term, 0.0) for term in fair_interactions if term in model.params]
        
        # H1A is supported if fair AI reduces bias (negative coef & significant)
        return any(p < alpha and coef < 0 for p, coef in zip(p_vals, coefficients))

    except:
        return False

def test_hypothesis_h1b(df, alpha=0.05):
    """Test H1B: Unfair AI amplifies gender bias"""
    try:
        # Create treatment dummy variables
        df_test = pd.get_dummies(df, columns=['treatment'], prefix='T', drop_first=False)
        df_test = df_test.drop(columns=['T_control'])  # Control as reference
        
        # Convert to float
        for col in ['T_T0_unfair', 'T_T1_group_fair', 'T_T2_individual_fair']:
            df_test[col] = df_test[col].astype(float)
        
        formula = """
            y ~ delta_gender + delta_internship_exp + delta_certificates + 
              delta_university + delta_gpa +
              T_T0_unfair + T_T1_group_fair + T_T2_individual_fair + 
              delta_gender:T_T0_unfair + delta_gender:T_T1_group_fair + delta_gender:T_T2_individual_fair
        """
        
        model = logit(formula, data=df_test).fit(
            cov_type='cluster', 
            cov_kwds={'groups': df_test['participant_id']},
            disp=0, maxiter=100
        )
        
        # Test if unfair AI amplifies bias (positive interaction)
        if 'delta_gender:T_T0_unfair' in model.pvalues:
            p_val = model.pvalues['delta_gender:T_T0_unfair']
            coef = model.params['delta_gender:T_T0_unfair']
            return (p_val < alpha) and (coef > 0)  # Positive = amplifies bias
        
        return False
        
    except:
        return False

def test_hypothesis_h2(df, alpha=0.05):
    """Test H2: Individual fairness more effective than Group fairness (directional hypothesis)"""
    try:
        # Create treatment dummy variables - use full dataset
        df_test = pd.get_dummies(df, columns=['treatment'], prefix='T', drop_first=False)
        df_test = df_test.drop(columns=['T_control'])  # Control as reference
        
        # Convert to float
        for col in ['T_T0_unfair', 'T_T1_group_fair', 'T_T2_individual_fair']:
            df_test[col] = df_test[col].astype(float)
        
        formula = """
            y ~ delta_gender + delta_internship_exp + delta_certificates + 
              delta_university + delta_gpa +
              T_T0_unfair + T_T1_group_fair + T_T2_individual_fair + 
              delta_gender:T_T0_unfair + delta_gender:T_T1_group_fair + delta_gender:T_T2_individual_fair
        """
        
        model = logit(formula, data=df_test).fit(
            cov_type='cluster', 
            cov_kwds={'groups': df_test['participant_id']},
            disp=0, maxiter=100
        )
        
        # H2: Test if Individual fairness (δ₂) is more effective than Group fairness (δ₁)
        # More effective means MORE negative coefficient (greater bias reduction)
        # Test: δ₂ - δ₁ < 0, equivalent to δ₁ - δ₂ > 0
        
        if ('delta_gender:T_T1_group_fair' in model.params and 
            'delta_gender:T_T2_individual_fair' in model.params):
            
            # Get coefficients
            delta_1 = model.params['delta_gender:T_T1_group_fair']  # Group fairness
            delta_2 = model.params['delta_gender:T_T2_individual_fair']  # Individual fairness
            
            # Calculate difference (δ₁ - δ₂)
            diff = delta_1 - delta_2
            
            # Get variance-covariance matrix
            cov_matrix = model.cov_params()
            var_delta_1 = cov_matrix.loc['delta_gender:T_T1_group_fair', 'delta_gender:T_T1_group_fair']
            var_delta_2 = cov_matrix.loc['delta_gender:T_T2_individual_fair', 'delta_gender:T_T2_individual_fair']
            cov_delta_1_2 = cov_matrix.loc['delta_gender:T_T1_group_fair', 'delta_gender:T_T2_individual_fair']
            
            # Standard error of difference
            se_diff = np.sqrt(var_delta_1 + var_delta_2 - 2 * cov_delta_1_2)
            
            # Z-test for difference
            z_stat = diff / se_diff
            
            # One-tailed test: H2 is supported if δ₁ - δ₂ > 0 (individual more effective)
            p_val = 1 - stats.norm.cdf(z_stat)  # Upper tail
            
            return (p_val < alpha) and (diff > 0)
        
        return False
        
    except:
        return False


def test_hypothesis_h3a(df, alpha=0.05):
    """Test H3A: Individual fairness is more effective in individualist cultures"""
    try:
        # Create treatment dummy variables
        df_test = pd.get_dummies(df, columns=['treatment'], prefix='T', drop_first=False)
        df_test = df_test.drop(columns=['T_control'])  # Control as reference
        
        # Convert to float
        for col in ['T_T0_unfair', 'T_T1_group_fair', 'T_T2_individual_fair']:
            df_test[col] = df_test[col].astype(float)
        
        # Full model with 3-way interactions
        formula = """
            y ~ delta_gender + delta_internship_exp + delta_certificates + 
              delta_university + delta_gpa +
              T_T0_unfair + T_T1_group_fair + T_T2_individual_fair +
              is_individualism +
              delta_gender:T_T0_unfair + delta_gender:T_T1_group_fair + 
              delta_gender:T_T2_individual_fair +
              delta_gender:is_individualism +
              T_T0_unfair:is_individualism + T_T1_group_fair:is_individualism + 
              T_T2_individual_fair:is_individualism +
              delta_gender:T_T0_unfair:is_individualism + 
              delta_gender:T_T1_group_fair:is_individualism +
              delta_gender:T_T2_individual_fair:is_individualism
        """
        
        model = logit(formula, data=df_test).fit(
            cov_type='cluster', 
            cov_kwds={'groups': df_test['participant_id']},
            disp=0, maxiter=100
        )
        
        # Test H3A: Individual fairness more effective in individualist cultures
        # This means the 3-way interaction should be significantly NEGATIVE
        # (individual fairness reduces bias MORE in individualist cultures)
        interaction_term = 'delta_gender:T_T2_individual_fair:is_individualism'
        
        if interaction_term in model.pvalues:
            p_val = model.pvalues[interaction_term]
            coef = model.params[interaction_term]
            return (p_val < alpha) and (coef < 0)  # Negative = more effective in individualist cultures
        
        return False
        
    except:
        return False

def test_hypothesis_h3b(df, alpha=0.05):
    """Test H3B: Group fairness is less effective in individualist cultures"""
    try:
        # Create treatment dummy variables
        df_test = pd.get_dummies(df, columns=['treatment'], prefix='T', drop_first=False)
        df_test = df_test.drop(columns=['T_control'])  # Control as reference
        
        # Convert to float
        for col in ['T_T0_unfair', 'T_T1_group_fair', 'T_T2_individual_fair']:
            df_test[col] = df_test[col].astype(float)
        
        # Full model with 3-way interactions
        formula = """
            y ~ delta_gender + delta_internship_exp + delta_certificates + 
              delta_university + delta_gpa +
              T_T0_unfair + T_T1_group_fair + T_T2_individual_fair +
              is_individualism +
              delta_gender:T_T0_unfair + delta_gender:T_T1_group_fair + 
              delta_gender:T_T2_individual_fair +
              delta_gender:is_individualism +
              T_T0_unfair:is_individualism + T_T1_group_fair:is_individualism + 
              T_T2_individual_fair:is_individualism +
              delta_gender:T_T0_unfair:is_individualism + 
              delta_gender:T_T1_group_fair:is_individualism +
              delta_gender:T_T2_individual_fair:is_individualism
        """
        
        model = logit(formula, data=df_test).fit(
            cov_type='cluster', 
            cov_kwds={'groups': df_test['participant_id']},
            disp=0, maxiter=100
        )
        
        # Test H3B: Group fairness less effective in individualist cultures
        # This means the 3-way interaction should be significantly POSITIVE
        # (group fairness reduces bias LESS in individualist cultures)
        interaction_term = 'delta_gender:T_T1_group_fair:is_individualism'
        
        if interaction_term in model.pvalues:
            p_val = model.pvalues[interaction_term]
            coef = model.params[interaction_term]
            return (p_val < alpha) and (coef > 0)  # Positive = less effective in individualist cultures
        
        return False
        
    except:
        return False

def simulate_study1_power_analysis(n_participants_list, pairs_per_treatment, effect_sizes, n_sim=1000, alpha=0.05):
    """
    Power analysis simulation for Study 1 hypotheses
    """
    
    results = {
        'n_participants': n_participants_list,
        'power_h1a': [],  # Fair AI reduces bias
        'power_h1b': [],  # Unfair AI amplifies bias  
        'power_h2': [],   # Individual vs Group fairness
        'power_h3a': [],  # H3A: Individual fairness more effective in individualist cultures
        'power_h3b': [],  # H3B: Group fairness less effective in individualist cultures
        'convergence_failures': [],
        'exceptions': []
    }
    
    print("Study 1 Power Analysis")
    print(f"Pairs per treatment: {pairs_per_treatment}")
    print("Testing hypotheses:")
    print("  H1A: Fair AI reduces gender bias")
    print("  H1B: Unfair AI amplifies gender bias")
    print("  H2: Individual fairness MORE effective than Group fairness (directional)")
    print("  H3A: Individual fairness more effective in individualist cultures")
    print("  H3B: Group fairness less effective in individualist cultures")
    
    for n in n_participants_list:
        print(f"\nRunning simulation for {n} participants...")
        
        power_counts = {'h1a': 0, 'h1b': 0, 'h2': 0, 'h3a': 0, 'h3b': 0}
        convergence_fails = 0
        exceptions = 0
        
        for sim in tqdm(range(n_sim), desc=f"n={n}", leave=False):
            try:
                # Generate data
                df = generate_study1_data(n, pairs_per_treatment, effect_sizes)
                
                # Test each hypothesis
                if test_hypothesis_h1a(df, alpha):
                    power_counts['h1a'] += 1
                    
                if test_hypothesis_h1b(df, alpha):
                    power_counts['h1b'] += 1
                    
                if test_hypothesis_h2(df, alpha):
                    power_counts['h2'] += 1
                    
                if test_hypothesis_h3a(df, alpha):
                    power_counts['h3a'] += 1
                    
                if test_hypothesis_h3b(df, alpha):
                    power_counts['h3b'] += 1
                    
            except Exception as e:
                if "Singular matrix" in str(e) or "convergence" in str(e).lower():
                    convergence_fails += 1
                else:
                    exceptions += 1
                continue
        
        # Calculate power
        valid_sims = n_sim - exceptions
        
        if valid_sims > 0:
            results['power_h1a'].append(power_counts['h1a'] / valid_sims)
            results['power_h1b'].append(power_counts['h1b'] / valid_sims)
            results['power_h2'].append(power_counts['h2'] / valid_sims)
            results['power_h3a'].append(power_counts['h3a'] / valid_sims)
            results['power_h3b'].append(power_counts['h3b'] / valid_sims)
        else:
            results['power_h1a'].append(0)
            results['power_h1b'].append(0)
            results['power_h2'].append(0)
            results['power_h3a'].append(0)
            results['power_h3b'].append(0)
            
        results['convergence_failures'].append(convergence_fails / n_sim)
        results['exceptions'].append(exceptions / n_sim)
    
    return results

def plot_study1_power_curves(results, pairs_per_treatment):
    """Plot power curves for Study 1 hypotheses"""
    plt.figure(figsize=(20, 12))
    
    hypotheses = [
        ('power_h1a', 'H1A: Fair AI Reduces Gender Bias', 'blue'),
        ('power_h1b', 'H1B: Unfair AI Amplifies Gender Bias', 'red'),
        ('power_h2', 'H2: Individual > Group Fairness', 'green'),
        ('power_h3a', 'H3A: Individual Fairness × Individualism', 'purple'),
        ('power_h3b', 'H3B: Group Fairness × Individualism', 'orange')
    ]
    
    for i, (power_key, title, color) in enumerate(hypotheses):
        plt.subplot(2, 3, i+1)
        plt.plot(results['n_participants'], results[power_key], 'o-', 
                linewidth=2, markersize=6, color=color)
        plt.axhline(0.8, color='orange', linestyle='--', alpha=0.7, label='Power = 0.8')
        plt.axhline(0.95, color='red', linestyle='--', alpha=0.7, label='Power = 0.95')
        plt.title(f'{title}\n({pairs_per_treatment} pairs per treatment)', fontsize=11)
        plt.xlabel('Number of Participants')
        plt.ylabel('Estimated Power')
        plt.ylim(0, 1)
        plt.grid(True, alpha=0.3)
        plt.legend()
    
    plt.tight_layout()
    plt.show()

def find_study1_sample_requirements(results, pairs_per_treatment):
    """Find sample size requirements for Study 1 hypotheses"""
    
    hypotheses = [
        ('power_h1a', 'H1A: Fair AI Reduces Gender Bias'),
        ('power_h1b', 'H1B: Unfair AI Amplifies Gender Bias'),
        ('power_h2', 'H2: Individual vs Group Fairness'),
        ('power_h3a', 'H3A: Individual Fairness × Individualism'),
        ('power_h3b', 'H3B: Group Fairness × Individualism')
    ]
    
    for target_power in [0.8, 0.95]:
        print(f"\n=== {target_power*100}% Power Requirements ===")
        
        for power_key, hypothesis_name in hypotheses:
            found = False
            for i, power in enumerate(results[power_key]):
                if power >= target_power:
                    n_participants = results['n_participants'][i]
                    total_pairs = n_participants * pairs_per_treatment * 4  # 4 treatments
                    print(f"{hypothesis_name}: {n_participants} participants "
                          f"({total_pairs:,} total pair evaluations)")
                    found = True
                    break
            
            if not found:
                max_n = max(results['n_participants'])
                max_power = max(results[power_key])
                print(f"{hypothesis_name}: >{max_n} participants needed "
                      f"(max observed power: {max_power:.3f})")

# Example execution
if __name__ == "__main__":
    # Experimental design parameters
    pairs_per_treatment = 5  # Number of candidate pairs each participant evaluates per treatment
    participants_range = list(range(25, 376, 50)) 
    
    # Define effect sizes based on OR=1.72 baseline and expected effects
    effect_sizes = {
        # H1A: Fair AI reduces bias (OR = 1/1.72)
        # H2
        'group_fair_gender_interaction': -0.542,      # log(1/1.72) ≈ -0.542
        'individual_fair_gender_interaction': -0.907, # Individual slightly more effective: cohen's d = 0.5, OR = 2.48
        
        # H1B: Unfair AI amplifies bias (OR = 1.72)  
        'unfair_gender_interaction': 0.542,           # log(1.72) ≈ 0.542
        
        # H3A: Individual fairness more effective in individualist cultures (negative 3-way interaction)
        'culture_individual_gender': -0.542,          # Individual fairness more effective in individualist cultures
        
        # H3B: Group fairness less effective in individualist cultures (positive 3-way interaction)
        'culture_group_gender': 0.542,                # Group fairness less effective in individualist cultures  
        
        # Control for unfair AI cultural interaction (no specific hypothesis)
        'culture_unfair_gender': 0,                   # No specific hypothesis for unfair AI
    }
    
    print("=== STUDY 1 POWER ANALYSIS ===")
    print(f"Participants range: {participants_range}")
    print(f"Pairs per treatment per participant: {pairs_per_treatment}")
    print(f"Total pairs per participant: {pairs_per_treatment * 4}")
    print("Effect sizes (log-odds):")
    for key, value in effect_sizes.items():
        print(f"  {key}: {value:.3f}")
    
    # Run power analysis
    results = simulate_study1_power_analysis(
        n_participants_list=participants_range,
        pairs_per_treatment=pairs_per_treatment,
        effect_sizes=effect_sizes,
        n_sim=1000,
        alpha=0.05
    )
    
    # Plot results
    plot_study1_power_curves(results, pairs_per_treatment)
    
    # Find sample size requirements
    find_study1_sample_requirements(results, pairs_per_treatment)
    
    # Print detailed results
    print(f"\n=== DETAILED RESULTS ===")
    for i, n in enumerate(results['n_participants']):
        total_pairs = n * pairs_per_treatment * 4
        print(f"n={n:3d} ({total_pairs:4d} pairs): "
              f"H1A={results['power_h1a'][i]:.3f}, "
              f"H1B={results['power_h1b'][i]:.3f}, "
              f"H2={results['power_h2'][i]:.3f}, "
              f"H3A={results['power_h3a'][i]:.3f}, "
              f"H3B={results['power_h3b'][i]:.3f}")
    
    print(f"\n=== STUDY DESIGN SUMMARY ===")
    print(f"Baseline gender bias: OR = 1.72 (moderate male preference)")
    print(f"Each participant evaluates {pairs_per_treatment * 4} pairs total")
    print(f"Design: 4 treatments × {pairs_per_treatment} pairs within-subjects")
    print(f"Hypotheses test different aspects of AI fairness interventions")