<a href="https://colab.research.google.com/github/ErickJLA/meta/blob/main/Meta_2_61.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
#@title 📊 IMPORT LIBRARIES & AUTHENTICATE

# =============================================================================
# CELL 1: ENVIRONMENT SETUP
# Purpose: Import required libraries and authenticate Google Sheets access
# Dependencies: None
# Outputs: Authentication status, library versions, system info
# =============================================================================

import numpy as np
import pandas as pd
import gspread
from google.colab import auth
from google.auth import default
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from scipy.stats import norm, chi2
import matplotlib.pyplot as plt
import datetime
import sys
import warnings

# Suppress unnecessary warnings for cleaner output
warnings.filterwarnings('ignore', category=FutureWarning)

# --- Configuration Constants ---
REQUIRED_COLUMNS = {
    'effect_data': ['xe', 'sde', 'ne', 'xc', 'sdc', 'nc'],
    'metadata': ['id']
}

SUPPORTED_EFFECT_SIZES = {
    'lnRR': 'Log Response Ratio',
    'hedges_g': "Hedges' g (corrected SMD)",
    'cohen_d': "Cohen's d (uncorrected SMD)",
    'log_OR': 'Log Odds Ratio'
}

# --- Authentication ---
print("=" * 70)
print("META-ANALYSIS PIPELINE - INITIALIZATION")
print("=" * 70)
print(f"Execution Time: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
print("-" * 70)

try:
    auth.authenticate_user()
    creds, _ = default()
    gc = gspread.authorize(creds)
    auth_status = "✓ SUCCESS"
    auth_details = "Google Sheets API access granted"
except Exception as e:
    auth_status = "✗ FAILED"
    auth_details = str(e)
    print(f"\n❌ AUTHENTICATION ERROR: {e}")
    print("\nTroubleshooting:")
    print("  1. Ensure you're running in Google Colab")
    print("  2. Check your Google account permissions")
    print("  3. Try re-running the cell")
    raise Exception("Stopping execution due to authentication failure.")

# --- Library Version Check ---
print("\n📦 LIBRARY VERSIONS:")
print(f"  • NumPy:      {np.__version__}")
print(f"  • Pandas:     {pd.__version__}")
print(f"  • gspread:    {gspread.__version__}")
print(f"  • Matplotlib: {plt.matplotlib.__version__}")

# --- Configuration Summary ---
print("\n⚙️  CONFIGURATION:")
print(f"  • Required effect data columns: {', '.join(REQUIRED_COLUMNS['effect_data'])}")
print(f"  • Required metadata columns:    {', '.join(REQUIRED_COLUMNS['metadata'])}")
print(f"  • Supported effect sizes:       {len(SUPPORTED_EFFECT_SIZES)}")
for key, name in SUPPORTED_EFFECT_SIZES.items():
    print(f"      - {key}: {name}")

# --- Status Summary ---
print("\n" + "=" * 70)
print("INITIALIZATION STATUS")
print("=" * 70)
print(f"Authentication:  {auth_status}")
print(f"Details:         {auth_details}")
print(f"Ready:           {'YES ✓' if auth_status == '✓ SUCCESS' else 'NO ✗'}")
print("=" * 70)

# Store initialization metadata for later reference
INIT_METADATA = {
    'timestamp': datetime.datetime.now(),
    'auth_status': auth_status,
    'numpy_version': np.__version__,
    'pandas_version': pd.__version__,
    'supported_effects': list(SUPPORTED_EFFECT_SIZES.keys())
}

print("\n✅ Setup complete. Proceed to next cell to load data.\n")

META-ANALYSIS PIPELINE - INITIALIZATION
Execution Time: 2025-11-14 18:00:52
----------------------------------------------------------------------

📦 LIBRARY VERSIONS:
  • NumPy:      2.0.2
  • Pandas:     2.2.2
  • gspread:    6.2.1
  • Matplotlib: 3.10.0

⚙️  CONFIGURATION:
  • Required effect data columns: xe, sde, ne, xc, sdc, nc
  • Required metadata columns:    id
  • Supported effect sizes:       4
      - lnRR: Log Response Ratio
      - hedges_g: Hedges' g (corrected SMD)
      - cohen_d: Cohen's d (uncorrected SMD)
      - log_OR: Log Odds Ratio

INITIALIZATION STATUS
Authentication:  ✓ SUCCESS
Details:         Google Sheets API access granted
Ready:           YES ✓

✅ Setup complete. Proceed to next cell to load data.



In [3]:
#@title 📁 LOAD DATA & CREATE INTERACTIVE WIDGETS

# =============================================================================
# CELL 2: DATA LOADING AND FILTER CONFIGURATION
# Purpose: Load data from Google Sheets and create interactive filtering widgets
# Dependencies: Cell 1 (authentication and constants)
# Outputs: raw_data DataFrame, interactive widget interface
# =============================================================================

# --- STEP 1: GOOGLE SHEETS AUTHENTICATION ---
print("\n" + "="*70)
print("STEP 1: GOOGLE SHEETS AUTHENTICATION")
print("="*70)

try:
    auth.authenticate_user()
    creds, _ = default()
    gc = gspread.authorize(creds)
    print("✓ Authentication successful")
    print(f"  Timestamp: {datetime.datetime.now().strftime('%H:%M:%S')}")
except Exception as e:
    print(f"✗ Authentication failed: {e}")
    raise

# --- STEP 2: DATA LOADING CONFIGURATION ---
print("\n" + "="*70)
print("STEP 2: DATA LOADING CONFIGURATION")
print("="*70)

# Sheet configuration (Colab form parameters)
sheetName = 'tesis' #@param{type:"string"}
worksheetName = 'micro' #@param{type:"string"}

print(f"📊 Target Sheet:     {sheetName}")
print(f"📄 Target Worksheet: {worksheetName}")

# --- STEP 3: LOAD DATA FROM GOOGLE SHEETS ---
print("\n" + "="*70)
print("STEP 3: LOADING DATA FROM GOOGLE SHEETS")
print("="*70)

try:
    spreadsheet = gc.open(sheetName)
    worksheet = spreadsheet.worksheet(worksheetName)
    rows = worksheet.get_all_values()

    if not rows:
        raise ValueError("No data found in the worksheet.")

    # Extract header and data
    column_names = rows[0]
    data_records = rows[1:]

    # Create DataFrame
    raw_data = pd.DataFrame.from_records(data_records, columns=column_names)

    print(f"✓ Data loaded successfully")
    print(f"  Dimensions: {raw_data.shape[0]} rows × {raw_data.shape[1]} columns")
    print(f"  Columns: {', '.join(column_names[:5])}{'...' if len(column_names) > 5 else ''}")

except Exception as e:
    print(f"✗ ERROR loading data: {e}")
    print("\nTroubleshooting:")
    print("  1. Verify sheet name and worksheet name are correct")
    print("  2. Ensure you have access permissions to the sheet")
    print("  3. Check that the sheet contains data")
    raise

# --- STEP 4: DATA TYPE CONVERSION & CLEANING ---
print("\n" + "="*70)
print("STEP 4: DATA TYPE CONVERSION & CLEANING")
print("="*70)

# Store original dimensions for reporting
original_rows = len(raw_data)
cleaning_log = []

# Convert numeric columns
numeric_columns = REQUIRED_COLUMNS['effect_data']
conversion_results = {}

for col in numeric_columns:
    if col in raw_data.columns:
        raw_data[col] = raw_data[col].astype(str).str.strip()
        before_conversion = raw_data[col].notna().sum()
        raw_data[col] = pd.to_numeric(raw_data[col], errors='coerce')
        after_conversion = raw_data[col].notna().sum()
        conversion_results[col] = {
            'before': before_conversion,
            'after': after_conversion,
            'coerced': before_conversion - after_conversion
        }
    else:
        print(f"⚠️  WARNING: Required column '{col}' not found in data!")
        cleaning_log.append(f"Missing required column: {col}")

# Report conversion results
print("📊 Numeric Column Conversion:")
for col, results in conversion_results.items():
    if results['coerced'] > 0:
        print(f"  • {col}: {results['after']} valid values ({results['coerced']} coerced to NaN)")
    else:
        print(f"  • {col}: {results['after']} valid values")

# Ensure ID is string
if 'id' in raw_data.columns:
    raw_data['id'] = raw_data['id'].astype(str).str.strip()
    print(f"✓ ID column converted to string ({raw_data['id'].nunique()} unique IDs)")

# Drop rows with missing essential values
essential_cols = ['xe', 'ne', 'xc', 'nc']
missing_essential = raw_data[essential_cols].isna().any(axis=1).sum()
raw_data.dropna(subset=essential_cols, inplace=True)

if missing_essential > 0:
    print(f"🧹 Dropped {missing_essential} rows with missing essential values")
    cleaning_log.append(f"Dropped {missing_essential} rows (missing xe/ne/xc/nc)")

# Ensure N >= 1 for sample sizes
invalid_n_count = 0
for col in ['ne', 'nc']:
    raw_data[col] = raw_data[col].fillna(0).astype(int)
    invalid_n = (raw_data[col] < 1).sum()
    if invalid_n > 0:
        raw_data = raw_data[raw_data[col] >= 1]
        invalid_n_count += invalid_n

if invalid_n_count > 0:
    print(f"🧹 Dropped {invalid_n_count} rows with invalid sample sizes (n < 1)")
    cleaning_log.append(f"Dropped {invalid_n_count} rows (n < 1)")

# Final data summary
final_rows = len(raw_data)
print(f"\n✓ Clean dataset: {final_rows} rows remaining ({original_rows - final_rows} total removed)")

# --- STEP 5: DATA QUALITY SUMMARY ---
print("\n" + "="*70)
print("STEP 5: DATA QUALITY SUMMARY")
print("="*70)

print("📈 Effect Size Data Availability:")
for col in ['xe', 'sde', 'ne', 'xc', 'sdc', 'nc']:
    if col in raw_data.columns:
        valid = raw_data[col].notna().sum()
        pct = (valid / final_rows) * 100
        print(f"  • {col:4s}: {valid:4d}/{final_rows} ({pct:5.1f}% complete)")

# Check for studies with multiple observations
if 'id' in raw_data.columns:
    unique_studies = raw_data['id'].nunique()
    total_obs = len(raw_data)
    avg_obs_per_study = total_obs / unique_studies if unique_studies > 0 else 0
    print(f"\n📚 Study Structure:")
    print(f"  • Unique studies: {unique_studies}")
    print(f"  • Total observations: {total_obs}")
    print(f"  • Avg observations per study: {avg_obs_per_study:.2f}")

# --- STEP 6: IDENTIFY MODERATOR COLUMNS ---
print("\n" + "="*70)
print("STEP 6: IDENTIFYING MODERATOR COLUMNS")
print("="*70)

# Exclude hardcoded columns and numeric columns
excluded_cols = (REQUIRED_COLUMNS['effect_data'] +
                 REQUIRED_COLUMNS['metadata'] +
                 ['xe', 'xc', 'sde', 'sdc', 'ne', 'nc'])

# Get potential moderator columns (categorical/text columns)
available_moderators = [col for col in raw_data.columns
                        if col not in excluded_cols
                        and raw_data[col].dtype == 'object']

print(f"✓ Found {len(available_moderators)} potential moderator columns:\n")

# Detailed moderator summary
moderator_summary = []
for col in available_moderators:
    n_unique = raw_data[col].nunique()
    n_missing = raw_data[col].isna().sum()
    pct_complete = ((final_rows - n_missing) / final_rows) * 100
    print(f"  • {col}")
    print(f"      - Unique values: {n_unique}")
    print(f"      - Completeness: {pct_complete:.1f}% ({final_rows - n_missing}/{final_rows})")

    # Show top 3 most common values
    if n_unique > 0:
        top_values = raw_data[col].value_counts().head(3)
        print(f"      - Top values: ", end="")
        print(", ".join([f"{val} (n={count})" for val, count in top_values.items()]))
    print()

    moderator_summary.append({
        'column': col,
        'unique_values': n_unique,
        'completeness': pct_complete
    })

# --- STEP 7: CREATE INTERACTIVE WIDGETS ---
print("="*70)
print("STEP 7: CREATING INTERACTIVE WIDGETS")
print("="*70)

# Widget container
widget_box = widgets.VBox()

# --- Widget 1: Pre-filter by Treatment Type (OPTIONAL) ---
prefilter_header = widgets.HTML(
    "<h3 style='color: #2E86AB; margin-bottom: 5px;'>📌 Pre-Filter (Optional)</h3>"
)
prefilter_desc = widgets.HTML(
    "<p style='margin-top: 0; color: #666;'><i>Filter dataset to include only specific "
    "treatment types before analysis. Leave as 'None' to include all data.</i></p>"
)

# Dropdown to select column for pre-filtering
prefilter_col_options = ['None'] + available_moderators
prefilter_col_widget = widgets.Dropdown(
    options=prefilter_col_options,
    value='None',
    description='Filter by:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='500px')
)

# Container for checkboxes (will be populated dynamically)
prefilter_values_widget = widgets.VBox()

def update_prefilter_checkboxes(change):
    """Update checkboxes when column selection changes"""
    selected_col = change['new']

    if selected_col == 'None':
        prefilter_values_widget.children = []
        return

    # Get unique values from selected column
    unique_values = sorted(raw_data[selected_col].dropna().unique())

    # Create checkboxes for each value (all checked by default)
    checkboxes = [
        widgets.Checkbox(
            value=True,
            description=f"{val} (n={len(raw_data[raw_data[selected_col] == val])})",
            style={'description_width': 'initial'},
            layout=widgets.Layout(width='500px')
        ) for val in unique_values
    ]

    prefilter_values_widget.children = [
        widgets.HTML("<p style='margin: 10px 0; font-weight: bold;'>Select values to KEEP:</p>")
    ] + checkboxes

# Attach observer
prefilter_col_widget.observe(update_prefilter_checkboxes, names='value')

# --- Widget 2: Subgroup Analysis Columns ---
subgroup_header = widgets.HTML(
    "<h3 style='color: #2E86AB; margin-bottom: 5px;'>📊 Subgroup Analysis</h3>"
)
subgroup_desc = widgets.HTML(
    "<p style='margin-top: 0; color: #666;'><i>Select moderator columns for subgroup analysis. "
    "Subgroups with insufficient studies will be automatically excluded based on thresholds below.</i></p>"
)

filterCol1_widget = widgets.Dropdown(
    options=available_moderators,
    value='Treatment_Type' if 'Treatment_Type' in available_moderators else available_moderators[0],
    description='Factor 1:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='500px')
)

filterCol2_widget = widgets.Dropdown(
    options=['None'] + available_moderators,
    value='Outcome_Type' if 'Outcome_Type' in available_moderators else 'None',
    description='Factor 2:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='500px')
)

# --- Widget 3: Minimum Thresholds ---
threshold_header = widgets.HTML(
    "<h3 style='color: #2E86AB; margin-bottom: 5px;'>⚙️ Quality Filters</h3>"
)
threshold_desc = widgets.HTML(
    "<p style='margin-top: 0; color: #666;'><i>Minimum requirements for including subgroups in analysis. "
    "Higher values increase reliability but may exclude smaller subgroups.</i></p>"
)

minPapers_widget = widgets.IntSlider(
    value=2,
    min=1,
    max=10,
    step=1,
    description='Min Papers:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='500px')
)

minObservations_widget = widgets.IntSlider(
    value=2,
    min=1,
    max=20,
    step=1,
    description='Min Observations:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='500px')
)

# --- Widget 4: Apply Button ---
apply_button = widgets.Button(
    description='▶ Apply Configuration & Continue',
    button_style='success',
    layout=widgets.Layout(width='500px', height='50px'),
    style={'font_weight': 'bold', 'font_size': '14px'}
)

# Output area for feedback
output_area = widgets.Output()

def on_apply_button_clicked(b):
    """Execute when Apply button is clicked"""
    with output_area:
        clear_output()
        print("\n" + "="*70)
        print("APPLYING CONFIGURATION")
        print("="*70)

        # Show selected configuration
        print("\n📋 Analysis Configuration Summary:")
        print("-" * 70)

        # Pre-filter settings
        print("\n1️⃣  PRE-FILTER:")
        if prefilter_col_widget.value != 'None':
            selected_values = [
                cb.description.split(' (n=')[0]
                for cb in prefilter_values_widget.children[1:]
                if cb.value
            ]
            total_kept = sum([
                len(raw_data[raw_data[prefilter_col_widget.value] == val])
                for val in selected_values
            ])
            print(f"    Column: {prefilter_col_widget.value}")
            print(f"    Values to keep: {', '.join(selected_values)}")
            print(f"    Observations after filter: {total_kept}/{len(raw_data)}")
        else:
            print("    No pre-filter applied (all data included)")

        # Subgroup settings
        print("\n2️⃣  SUBGROUP ANALYSIS:")
        print(f"    Primary factor:   {filterCol1_widget.value}")
        print(f"    Secondary factor: {filterCol2_widget.value}")

        # Quality thresholds
        print("\n3️⃣  QUALITY THRESHOLDS:")
        print(f"    Minimum papers per subgroup:       {minPapers_widget.value}")
        print(f"    Minimum observations per subgroup: {minObservations_widget.value}")

        print("\n" + "="*70)
        print("✓ Configuration saved successfully!")
        print("="*70)
        print("\n⚠️  Next Steps:")
        print("    1. Review the configuration above")
        print("    2. Run the next cell to apply filters and continue analysis")
        print("    3. If you need to change settings, modify widgets and click Apply again")

apply_button.on_click(on_apply_button_clicked)

# --- Assemble Widget Layout ---
widget_box.children = [
    prefilter_header,
    prefilter_desc,
    prefilter_col_widget,
    prefilter_values_widget,
    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),
    subgroup_header,
    subgroup_desc,
    filterCol1_widget,
    filterCol2_widget,
    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),
    threshold_header,
    threshold_desc,
    minPapers_widget,
    minObservations_widget,
    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),
    apply_button,
    output_area
]

# Display widgets
display(widget_box)

# --- FINAL STATUS ---
print("\n" + "="*70)
print("✓ Interactive widgets created successfully")
print("="*70)
print("\n👆 CONFIGURE YOUR ANALYSIS:")
print("    1. Optionally select a pre-filter column and values")
print("    2. Choose subgroup analysis factors")
print("    3. Set quality thresholds")
print("    4. Click 'Apply Configuration & Continue'")
print("\n" + "="*70)

# Store metadata for AI/downstream use
LOAD_METADATA = {
    'timestamp': datetime.datetime.now(),
    'sheet_name': sheetName,
    'worksheet_name': worksheetName,
    'original_rows': original_rows,
    'final_rows': final_rows,
    'rows_removed': original_rows - final_rows,
    'cleaning_log': cleaning_log,
    'available_moderators': available_moderators,
    'moderator_summary': moderator_summary,
    'conversion_results': conversion_results
}

print(f"\n📊 Dataset ready: {final_rows} observations from {raw_data['id'].nunique() if 'id' in raw_data.columns else 'unknown'} studies")


STEP 1: GOOGLE SHEETS AUTHENTICATION
✓ Authentication successful
  Timestamp: 18:01:08

STEP 2: DATA LOADING CONFIGURATION
📊 Target Sheet:     tesis
📄 Target Worksheet: micro

STEP 3: LOADING DATA FROM GOOGLE SHEETS
✓ Data loaded successfully
  Dimensions: 69 rows × 18 columns
  Columns: id, xe, sde, ne, xc...

STEP 4: DATA TYPE CONVERSION & CLEANING
📊 Numeric Column Conversion:
  • xe: 69 valid values
  • sde: 69 valid values
  • ne: 69 valid values
  • xc: 69 valid values
  • sdc: 69 valid values
  • nc: 69 valid values
✓ ID column converted to string (23 unique IDs)

✓ Clean dataset: 69 rows remaining (0 total removed)

STEP 5: DATA QUALITY SUMMARY
📈 Effect Size Data Availability:
  • xe  :   69/69 (100.0% complete)
  • sde :   69/69 (100.0% complete)
  • ne  :   69/69 (100.0% complete)
  • xc  :   69/69 (100.0% complete)
  • sdc :   69/69 (100.0% complete)
  • nc  :   69/69 (100.0% complete)

📚 Study Structure:
  • Unique studies: 23
  • Total observations: 69
  • Avg observations

VBox(children=(HTML(value="<h3 style='color: #2E86AB; margin-bottom: 5px;'>📌 Pre-Filter (Optional)</h3>"), HTM…


✓ Interactive widgets created successfully

👆 CONFIGURE YOUR ANALYSIS:
    1. Optionally select a pre-filter column and values
    2. Choose subgroup analysis factors
    3. Set quality thresholds
    4. Click 'Apply Configuration & Continue'


📊 Dataset ready: 69 observations from 23 studies


In [4]:
#@title 🔧 APPLY FILTERS & PREPARE DATA FOR ANALYSIS

# =============================================================================
# CELL 3: FILTER APPLICATION AND DATA PREPARATION
# Purpose: Apply user-selected filters and prepare dataset for effect size calculation
# Dependencies: Cell 2 (raw_data, widget values)
# Outputs: data_filtered DataFrame, ANALYSIS_CONFIG dictionary, FILTER_METADATA
# =============================================================================

print("\n" + "="*70)
print("APPLYING SELECTED FILTERS")
print("="*70)
print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# Start with raw data
data_filtered = raw_data.copy()
filter_log = []

# Store initial state
initial_rows = len(data_filtered)
initial_papers = data_filtered['id'].nunique() if 'id' in data_filtered.columns else None

# --- STEP 1: APPLY PRE-FILTER (OPTIONAL) ---
print("\n" + "="*70)
print("STEP 1: PRE-FILTER APPLICATION")
print("="*70)

prefilter_col = prefilter_col_widget.value

if prefilter_col != 'None':
    print(f"\n📌 Applying pre-filter on column: '{prefilter_col}'")

    # Get selected values from checkboxes
    selected_values = [
        cb.description.split(' (n=')[0]
        for cb in prefilter_values_widget.children[1:]
        if hasattr(cb, 'value') and cb.value
    ]

    print(f"\n  Values to KEEP: {len(selected_values)}")
    for i, val in enumerate(selected_values, 1):
        count = len(data_filtered[data_filtered[prefilter_col] == val])
        print(f"    {i}. {val}: n={count}")

    # Apply filter
    before_filter = len(data_filtered)
    before_papers = data_filtered['id'].nunique() if 'id' in data_filtered.columns else None

    data_filtered = data_filtered[data_filtered[prefilter_col].isin(selected_values)].copy()

    after_filter = len(data_filtered)
    after_papers = data_filtered['id'].nunique() if 'id' in data_filtered.columns else None

    removed_obs = before_filter - after_filter
    removed_papers = (before_papers - after_papers) if before_papers and after_papers else None

    print(f"\n  📊 Pre-filter Results:")
    print(f"    Observations: {before_filter} → {after_filter} ({removed_obs} removed, {(removed_obs/before_filter)*100:.1f}%)")
    if removed_papers is not None:
        print(f"    Papers: {before_papers} → {after_papers} ({removed_papers} removed, {(removed_papers/before_papers)*100:.1f}%)")

    filter_log.append({
        'step': 'pre-filter',
        'column': prefilter_col,
        'values_kept': selected_values,
        'obs_before': before_filter,
        'obs_after': after_filter,
        'obs_removed': removed_obs
    })

    if after_filter == 0:
        print("\n  ⚠️  WARNING: Pre-filter removed all observations!")
        raise ValueError("No data remaining after pre-filter. Please adjust your selection.")

else:
    print("\n📌 No pre-filter applied")
    print(f"  Using all {len(data_filtered)} observations from dataset")
    selected_values = None

# --- STEP 2: EXTRACT SUBGROUP CONFIGURATION ---
print("\n" + "="*70)
print("STEP 2: SUBGROUP ANALYSIS CONFIGURATION")
print("="*70)

filterCol1 = filterCol1_widget.value
filterCol2 = filterCol2_widget.value if filterCol2_widget.value != 'None' else None
minPapers = minPapers_widget.value
minObservations = minObservations_widget.value

print(f"\n📊 Analysis Structure:")
print(f"  Primary moderator:   {filterCol1}")
print(f"  Secondary moderator: {filterCol2 if filterCol2 else 'None (single-factor analysis)'}")
print(f"\n⚙️  Quality Thresholds:")
print(f"  Minimum papers/study:      {minPapers}")
print(f"  Minimum observations:      {minObservations}")

# Determine analysis type
analysis_type = "two-way" if filterCol2 else "one-way"
print(f"\n  Analysis type: {analysis_type.upper()} subgroup analysis")

# --- STEP 3: VERIFY REQUIRED COLUMNS ---
print("\n" + "="*70)
print("STEP 3: COLUMN VALIDATION")
print("="*70)

required_for_analysis = [filterCol1] + ([filterCol2] if filterCol2 else [])
missing_cols = [col for col in required_for_analysis if col not in data_filtered.columns]

if missing_cols:
    print(f"✗ ERROR: Missing required columns: {missing_cols}")
    raise ValueError(f"Selected columns not found in data: {missing_cols}")

print(f"✓ All required columns present:")
for col in required_for_analysis:
    n_unique = data_filtered[col].nunique()
    n_missing = data_filtered[col].isna().sum()
    print(f"  • {col}: {n_unique} unique values, {n_missing} missing")

# --- STEP 4: PREPARE GROUPING STRUCTURE ---
print("\n" + "="*70)
print("STEP 4: GROUP IDENTIFICATION")
print("="*70)

if filterCol2:
    grouping_cols = [filterCol1, filterCol2]
else:
    grouping_cols = [filterCol1]

# Ensure columns are strings and clean whitespace
for col in grouping_cols:
    data_filtered[col] = data_filtered[col].astype(str).str.strip()

# Calculate group statistics
print(f"\nCalculating statistics for all groups...")

group_stats = data_filtered.groupby(grouping_cols).agg(
    unique_papers=('id', 'nunique'),
    total_observations=('id', 'size')
).reset_index()

total_groups_found = len(group_stats)
print(f"✓ Found {total_groups_found} unique groups in data")

# Display group structure
print(f"\n📋 Group Structure Preview:")
if filterCol2:
    print(f"  {'Factor 1':<30} {'Factor 2':<30} {'Papers':>8} {'Obs':>8}")
    print(f"  {'-'*30} {'-'*30} {'-'*8} {'-'*8}")
    for _, row in group_stats.head(10).iterrows():
        print(f"  {str(row[filterCol1]):<30} {str(row[filterCol2]):<30} {row['unique_papers']:>8} {row['total_observations']:>8}")
else:
    print(f"  {'Factor 1':<40} {'Papers':>8} {'Obs':>8}")
    print(f"  {'-'*40} {'-'*8} {'-'*8}")
    for _, row in group_stats.head(10).iterrows():
        print(f"  {str(row[filterCol1]):<40} {row['unique_papers']:>8} {row['total_observations']:>8}")

if len(group_stats) > 10:
    print(f"  ... ({len(group_stats) - 10} more groups)")

# --- STEP 5: APPLY GROUP FILTERS ---
print("\n" + "="*70)
print("STEP 5: APPLYING QUALITY THRESHOLDS")
print("="*70)

groups_to_keep = set()
groups_excluded_papers = []
groups_excluded_obs = []
groups_kept_details = []

print(f"\nEvaluating {len(group_stats)} groups against thresholds...")

for _, row in group_stats.iterrows():
    if filterCol2:
        group_tuple = (row[filterCol1], row[filterCol2])
        group_name = f"{row[filterCol1]} × {row[filterCol2]}"
    else:
        group_tuple = (row[filterCol1],)
        group_name = row[filterCol1]

    papers = row['unique_papers']
    obs = row['total_observations']

    # Check against thresholds
    if papers < minPapers:
        groups_excluded_papers.append({
            'name': group_name,
            'papers': papers,
            'obs': obs,
            'reason': f'{papers} papers < {minPapers} minimum'
        })
    elif obs < minObservations:
        groups_excluded_obs.append({
            'name': group_name,
            'papers': papers,
            'obs': obs,
            'reason': f'{obs} observations < {minObservations} minimum'
        })
    else:
        groups_to_keep.add(group_tuple)
        groups_kept_details.append({
            'name': group_name,
            'papers': papers,
            'obs': obs
        })

# Apply filtering
before_group_filter = len(data_filtered)
before_papers_filter = data_filtered['id'].nunique()

if filterCol2:
    data_filtered = data_filtered[
        data_filtered.apply(lambda row: (row[filterCol1], row[filterCol2]) in groups_to_keep, axis=1)
    ].copy()
else:
    data_filtered = data_filtered[
        data_filtered.apply(lambda row: (row[filterCol1],) in groups_to_keep, axis=1)
    ].copy()

after_group_filter = len(data_filtered)
after_papers_filter = data_filtered['id'].nunique()

# --- STEP 6: REPORT FILTERING RESULTS ---
print("\n" + "="*70)
print("FILTERING RESULTS")
print("="*70)

print(f"\n✓ Groups Meeting Criteria: {len(groups_to_keep)}/{total_groups_found}")
print(f"  Total observations: {after_group_filter}")
print(f"  Unique papers: {after_papers_filter}")

if groups_kept_details:
    print(f"\n📊 Included Groups ({len(groups_kept_details)}):")
    if filterCol2:
        print(f"  {'Group':<50} {'Papers':>8} {'Obs':>8}")
        print(f"  {'-'*50} {'-'*8} {'-'*8}")
    else:
        print(f"  {'Group':<50} {'Papers':>8} {'Obs':>8}")
        print(f"  {'-'*50} {'-'*8} {'-'*8}")

    for group in sorted(groups_kept_details, key=lambda x: x['obs'], reverse=True):
        print(f"  {group['name']:<50} {group['papers']:>8} {group['obs']:>8}")

# Report excluded groups
total_excluded = len(groups_excluded_papers) + len(groups_excluded_obs)

if total_excluded > 0:
    print(f"\n⚠️  Excluded Groups: {total_excluded}/{total_groups_found}")

    if groups_excluded_papers:
        print(f"\n  ❌ Insufficient Papers ({len(groups_excluded_papers)} groups):")
        for group in sorted(groups_excluded_papers, key=lambda x: x['papers'], reverse=True):
            print(f"    • {group['name']}: {group['papers']} papers < {minPapers} required")

    if groups_excluded_obs:
        print(f"\n  ❌ Insufficient Observations ({len(groups_excluded_obs)} groups):")
        for group in sorted(groups_excluded_obs, key=lambda x: x['obs'], reverse=True):
            print(f"    • {group['name']}: {group['obs']} obs < {minObservations} required")

    # Calculate excluded data
    excluded_obs = before_group_filter - after_group_filter
    excluded_papers = before_papers_filter - after_papers_filter
    print(f"\n  Data Excluded:")
    print(f"    Observations: {excluded_obs} ({(excluded_obs/before_group_filter)*100:.1f}% of dataset)")
    print(f"    Papers: {excluded_papers} ({(excluded_papers/before_papers_filter)*100:.1f}% of papers)")
else:
    print(f"\n✓ All groups met minimum thresholds")

# Add to filter log
filter_log.append({
    'step': 'group_filtering',
    'groups_total': total_groups_found,
    'groups_kept': len(groups_to_keep),
    'groups_excluded': total_excluded,
    'obs_before': before_group_filter,
    'obs_after': after_group_filter,
    'obs_removed': before_group_filter - after_group_filter
})

# --- STEP 7: FINAL SUMMARY ---
print("\n" + "="*70)
print("FILTER APPLICATION SUMMARY")
print("="*70)

print(f"\n📊 Data Flow:")
print(f"  Initial dataset:        {initial_rows:>6} observations, {initial_papers:>4} papers")

if prefilter_col != 'None':
    prefilter_entry = [x for x in filter_log if x['step'] == 'pre-filter'][0]
    print(f"  After pre-filter:       {prefilter_entry['obs_after']:>6} observations ({-prefilter_entry['obs_removed']:+d})")

group_entry = [x for x in filter_log if x['step'] == 'group_filtering'][0]
print(f"  After group filtering:  {group_entry['obs_after']:>6} observations ({-group_entry['obs_removed']:+d})")
print(f"  Final dataset:          {len(data_filtered):>6} observations, {data_filtered['id'].nunique():>4} papers")

# Calculate retention rates
retention_rate = (len(data_filtered) / initial_rows) * 100
paper_retention = (data_filtered['id'].nunique() / initial_papers) * 100 if initial_papers else None

print(f"\n📈 Retention Rates:")
print(f"  Observations: {retention_rate:.1f}% retained")
if paper_retention:
    print(f"  Papers: {paper_retention:.1f}% retained")

# Warnings
if retention_rate < 50:
    print(f"\n  ⚠️  WARNING: Less than 50% of data retained. Consider relaxing filters.")
if len(groups_to_keep) < 3:
    print(f"\n  ⚠️  WARNING: Only {len(groups_to_keep)} groups available. Limited subgroup analysis possible.")

# --- STEP 8: EXPORT CONFIGURATION ---
print("\n" + "="*70)
print("CONFIGURATION EXPORT")
print("="*70)

ANALYSIS_CONFIG = {
    'timestamp': datetime.datetime.now(),
    'prefilter_col': prefilter_col,
    'prefilter_values': selected_values if prefilter_col != 'None' else None,
    'filterCol1': filterCol1,
    'filterCol2': filterCol2,
    'analysis_type': analysis_type,
    'minPapers': minPapers,
    'minObservations': minObservations,
    'n_groups_total': total_groups_found,
    'n_groups_kept': len(groups_to_keep),
    'n_groups_excluded': total_excluded,
    'n_observations': len(data_filtered),
    'n_papers': data_filtered['id'].nunique(),
    'retention_rate': retention_rate,
    'groups_kept': groups_kept_details,
    'groups_excluded_papers': groups_excluded_papers,
    'groups_excluded_obs': groups_excluded_obs
}

FILTER_METADATA = {
    'initial_data': {
        'rows': initial_rows,
        'papers': initial_papers
    },
    'filter_log': filter_log,
    'final_data': {
        'rows': len(data_filtered),
        'papers': data_filtered['id'].nunique()
    },
    'groups': {
        'total': total_groups_found,
        'kept': len(groups_to_keep),
        'excluded': total_excluded
    }
}

print(f"✓ Configuration saved to ANALYSIS_CONFIG")
print(f"✓ Filter metadata saved to FILTER_METADATA")

# --- STEP 9: DATA VALIDATION ---
print("\n" + "="*70)
print("DATA VALIDATION")
print("="*70)

# Check for potential issues
validation_warnings = []

# Check for groups with very small sample sizes
small_groups = [g for g in groups_kept_details if g['papers'] <= 2 or g['obs'] <= 3]
if small_groups:
    validation_warnings.append(f"{len(small_groups)} groups have minimal data (≤2 papers or ≤3 obs)")

# Check for missing standard deviations
if 'sde' in data_filtered.columns and 'sdc' in data_filtered.columns:
    missing_sd = data_filtered[['sde', 'sdc']].isna().any(axis=1).sum()
    if missing_sd > 0:
        validation_warnings.append(f"{missing_sd} observations missing standard deviations")

if validation_warnings:
    print("\n⚠️  Data Quality Warnings:")
    for i, warning in enumerate(validation_warnings, 1):
        print(f"  {i}. {warning}")
else:
    print("\n✓ No data quality issues detected")

# --- FINAL STATUS ---
print("\n" + "="*70)
print("✅ FILTERING COMPLETE")
print("="*70)
print(f"\n📦 Dataset Ready:")
print(f"  • {len(data_filtered)} observations")
print(f"  • {data_filtered['id'].nunique()} unique papers")
print(f"  • {len(groups_to_keep)} subgroups")
print(f"  • {analysis_type} analysis")

print(f"\n▶️  Next Steps:")
print(f"  1. Review the filtering results above")
print(f"  2. Run the next cell to calculate effect sizes")
print(f"  3. If results are unsatisfactory, adjust filters in Cell 2 and re-run")

print("\n" + "="*70)


APPLYING SELECTED FILTERS
Timestamp: 2025-11-14 18:01:16

STEP 1: PRE-FILTER APPLICATION

📌 No pre-filter applied
  Using all 69 observations from dataset

STEP 2: SUBGROUP ANALYSIS CONFIGURATION

📊 Analysis Structure:
  Primary moderator:   Inoculation
  Secondary moderator: None (single-factor analysis)

⚙️  Quality Thresholds:
  Minimum papers/study:      2
  Minimum observations:      2

  Analysis type: ONE-WAY subgroup analysis

STEP 3: COLUMN VALIDATION
✓ All required columns present:
  • Inoculation: 2 unique values, 0 missing

STEP 4: GROUP IDENTIFICATION

Calculating statistics for all groups...
✓ Found 2 unique groups in data

📋 Group Structure Preview:
  Factor 1                                   Papers      Obs
  ---------------------------------------- -------- --------
  seed                                           20       52
  soil                                            5       17

STEP 5: APPLYING QUALITY THRESHOLDS

Evaluating 2 groups against thresholds...

F

In [5]:
#@title 🔧 ADVANCED HETEROGENEITY ESTIMATORS

# =============================================================================
# CELL 4.5: ADVANCED TAU-SQUARED ESTIMATORS
# Purpose: Provides multiple methods for estimating between-study variance
# Dependencies: None (standalone functions)
# Used by: Cell 6 (Overall Analysis), Cell 8 (Subgroup Analysis)
# =============================================================================

import numpy as np
import pandas as pd
from scipy.optimize import minimize_scalar, minimize
from scipy.stats import chi2
import warnings

print("="*70)
print("HETEROGENEITY ESTIMATORS MODULE")
print("="*70)

# --- 1. DERSIMONIAN-LAIRD (Your current method) ---

def calculate_tau_squared_DL(df, effect_col, var_col):
    """
    DerSimonian-Laird estimator for tau-squared

    Advantages:
    - Simple, fast
    - Non-iterative
    - Always converges

    Disadvantages:
    - Can underestimate tau² in small samples
    - Negative values truncated to 0
    - Less efficient than ML methods

    Parameters:
    -----------
    df : DataFrame
        Data with effect sizes and variances
    effect_col : str
        Name of effect size column
    var_col : str
        Name of variance column

    Returns:
    --------
    float : tau-squared estimate
    """
    k = len(df)
    if k < 2:
        return 0.0

    try:
        # Fixed-effects weights
        w = 1 / df[var_col]
        sum_w = w.sum()

        if sum_w <= 0:
            return 0.0

        # Fixed-effects pooled estimate
        pooled_effect = (w * df[effect_col]).sum() / sum_w

        # Q statistic
        Q = (w * (df[effect_col] - pooled_effect)**2).sum()
        df_Q = k - 1

        # C constant
        sum_w_sq = (w**2).sum()
        C = sum_w - (sum_w_sq / sum_w)

        # Tau-squared
        if C > 0 and Q > df_Q:
            tau_sq = (Q - df_Q) / C
        else:
            tau_sq = 0.0

        return max(0.0, tau_sq)

    except Exception as e:
        warnings.warn(f"Error in DL estimator: {e}")
        return 0.0


# --- 2. RESTRICTED MAXIMUM LIKELIHOOD (REML) ---

def calculate_tau_squared_REML(df, effect_col, var_col, max_iter=100, tol=1e-8):
    """
    REML estimator for tau-squared (RECOMMENDED - Gold Standard)

    Advantages:
    - Unbiased for tau²
    - Accounts for uncertainty in estimating mu
    - Better performance in small samples
    - Generally preferred in literature

    Disadvantages:
    - Iterative (slightly slower)
    - Can fail to converge in extreme cases

    Reference:
    Viechtbauer, W. (2005). Bias and efficiency of meta-analytic variance
    estimators in the random-effects model. Journal of Educational and
    Behavioral Statistics, 30(3), 261-293.

    Parameters:
    -----------
    df : DataFrame
        Data with effect sizes and variances
    effect_col : str
        Name of effect size column
    var_col : str
        Name of variance column
    max_iter : int
        Maximum iterations for optimization
    tol : float
        Convergence tolerance

    Returns:
    --------
    float : tau-squared estimate
    """
    k = len(df)
    if k < 2:
        return 0.0

    try:
        # Extract data
        yi = df[effect_col].values
        vi = df[var_col].values

        # Remove any infinite or negative variances
        valid_mask = np.isfinite(vi) & (vi > 0)
        if not valid_mask.all():
            warnings.warn(f"Removed {(~valid_mask).sum()} observations with invalid variances")
            yi = yi[valid_mask]
            vi = vi[valid_mask]
            k = len(yi)

        if k < 2:
            return 0.0

        # REML objective function (negative log-likelihood)
        def reml_objective(tau2):
            # Ensure tau2 is non-negative
            tau2 = max(0, tau2)

            # Weights
            wi = 1 / (vi + tau2)
            sum_wi = wi.sum()

            if sum_wi <= 0:
                return 1e10

            # Pooled estimate
            mu = (wi * yi).sum() / sum_wi

            # Q statistic
            Q = (wi * (yi - mu)**2).sum()

            # REML log-likelihood (negative for minimization)
            # L = -0.5 * [sum(log(vi + tau2)) + log(sum(wi)) + Q]
            log_lik = -0.5 * (
                np.sum(np.log(vi + tau2)) +
                np.log(sum_wi) +
                Q
            )

            return -log_lik  # Return negative for minimization

        # Get reasonable bounds for tau2
        # Lower bound: 0
        # Upper bound: Use variance of effect sizes as upper limit
        var_yi = np.var(yi, ddof=1) if k > 2 else 1.0
        upper_bound = max(10 * var_yi, 100)

        # Optimize
        result = minimize_scalar(
            reml_objective,
            bounds=(0, upper_bound),
            method='bounded',
            options={'maxiter': max_iter, 'xatol': tol}
        )

        if result.success:
            tau_sq = result.x
        else:
            warnings.warn("REML optimization did not converge, using DL fallback")
            tau_sq = calculate_tau_squared_DL(df, effect_col, var_col)

        return max(0.0, tau_sq)

    except Exception as e:
        warnings.warn(f"Error in REML estimator: {e}, using DL fallback")
        return calculate_tau_squared_DL(df, effect_col, var_col)


# --- 3. MAXIMUM LIKELIHOOD (ML) ---

def calculate_tau_squared_ML(df, effect_col, var_col, max_iter=100, tol=1e-8):
    """
    Maximum Likelihood estimator for tau-squared

    Advantages:
    - Efficient asymptotically
    - Produces valid estimates

    Disadvantages:
    - Biased downward (underestimates tau²)
    - Less preferred than REML
    - REML is generally recommended instead

    Parameters:
    -----------
    df : DataFrame
        Data with effect sizes and variances
    effect_col : str
        Name of effect size column
    var_col : str
        Name of variance column
    max_iter : int
        Maximum iterations
    tol : float
        Convergence tolerance

    Returns:
    --------
    float : tau-squared estimate
    """
    k = len(df)
    if k < 2:
        return 0.0

    try:
        yi = df[effect_col].values
        vi = df[var_col].values

        valid_mask = np.isfinite(vi) & (vi > 0)
        if not valid_mask.all():
            yi = yi[valid_mask]
            vi = vi[valid_mask]
            k = len(yi)

        if k < 2:
            return 0.0

        # ML objective function
        def ml_objective(tau2):
            tau2 = max(0, tau2)
            wi = 1 / (vi + tau2)
            sum_wi = wi.sum()

            if sum_wi <= 0:
                return 1e10

            mu = (wi * yi).sum() / sum_wi
            Q = (wi * (yi - mu)**2).sum()

            # ML log-likelihood (without the constant term)
            log_lik = -0.5 * (np.sum(np.log(vi + tau2)) + Q)

            return -log_lik

        var_yi = np.var(yi, ddof=1) if k > 2 else 1.0
        upper_bound = max(10 * var_yi, 100)

        result = minimize_scalar(
            ml_objective,
            bounds=(0, upper_bound),
            method='bounded',
            options={'maxiter': max_iter, 'xatol': tol}
        )

        if result.success:
            tau_sq = result.x
        else:
            warnings.warn("ML optimization did not converge, using DL fallback")
            tau_sq = calculate_tau_squared_DL(df, effect_col, var_col)

        return max(0.0, tau_sq)

    except Exception as e:
        warnings.warn(f"Error in ML estimator: {e}, using DL fallback")
        return calculate_tau_squared_DL(df, effect_col, var_col)


# --- 4. PAULE-MANDEL (PM) ---

def calculate_tau_squared_PM(df, effect_col, var_col, max_iter=100, tol=1e-8):
    """
    Paule-Mandel estimator for tau-squared

    Advantages:
    - Exact solution to Q = k-1 equation
    - Non-iterative in principle
    - Good performance

    Disadvantages:
    - Can be unstable with few studies
    - Requires iterative solution in practice

    Reference:
    Paule, R. C., & Mandel, J. (1982). Consensus values and weighting factors.
    Journal of Research of the National Bureau of Standards, 87(5), 377-385.

    Parameters:
    -----------
    df : DataFrame
        Data with effect sizes and variances
    effect_col : str
        Name of effect size column
    var_col : str
        Name of variance column
    max_iter : int
        Maximum iterations
    tol : float
        Convergence tolerance

    Returns:
    --------
    float : tau-squared estimate
    """
    k = len(df)
    if k < 2:
        return 0.0

    try:
        yi = df[effect_col].values
        vi = df[var_col].values

        valid_mask = np.isfinite(vi) & (vi > 0)
        if not valid_mask.all():
            yi = yi[valid_mask]
            vi = vi[valid_mask]
            k = len(yi)

        if k < 2:
            return 0.0

        df_Q = k - 1

        # PM objective: Find tau2 such that Q(tau2) = k - 1
        def pm_objective(tau2):
            tau2 = max(0, tau2)
            wi = 1 / (vi + tau2)
            sum_wi = wi.sum()

            if sum_wi <= 0:
                return 1e10

            mu = (wi * yi).sum() / sum_wi
            Q = (wi * (yi - mu)**2).sum()

            # We want Q = k - 1
            return (Q - df_Q)**2

        var_yi = np.var(yi, ddof=1) if k > 2 else 1.0
        upper_bound = max(10 * var_yi, 100)

        result = minimize_scalar(
            pm_objective,
            bounds=(0, upper_bound),
            method='bounded',
            options={'maxiter': max_iter, 'xatol': tol}
        )

        if result.success and result.fun < 1:  # Good convergence
            tau_sq = result.x
        else:
            # If PM fails, use DL
            tau_sq = calculate_tau_squared_DL(df, effect_col, var_col)

        return max(0.0, tau_sq)

    except Exception as e:
        warnings.warn(f"Error in PM estimator: {e}, using DL fallback")
        return calculate_tau_squared_DL(df, effect_col, var_col)


# --- 5. SIDIK-JONKMAN (SJ) ---

def calculate_tau_squared_SJ(df, effect_col, var_col):
    """
    Sidik-Jonkman estimator for tau-squared

    Advantages:
    - Simple, non-iterative
    - Good performance with few studies
    - Conservative (tends to produce larger estimates)

    Disadvantages:
    - Can be overly conservative
    - Less commonly used

    Reference:
    Sidik, K., & Jonkman, J. N. (2005). Simple heterogeneity variance
    estimation for meta-analysis. Journal of the Royal Statistical Society,
    Series C, 54(2), 367-384.

    Parameters:
    -----------
    df : DataFrame
        Data with effect sizes and variances
    effect_col : str
        Name of effect size column
    var_col : str
        Name of variance column

    Returns:
    --------
    float : tau-squared estimate
    """
    k = len(df)
    if k < 3:  # Need at least 3 studies for SJ
        return calculate_tau_squared_DL(df, effect_col, var_col)

    try:
        yi = df[effect_col].values
        vi = df[var_col].values

        valid_mask = np.isfinite(vi) & (vi > 0)
        if not valid_mask.all():
            yi = yi[valid_mask]
            vi = vi[valid_mask]
            k = len(yi)

        if k < 3:
            return calculate_tau_squared_DL(df, effect_col, var_col)

        # Weights for typical average
        wi = 1 / vi
        sum_wi = wi.sum()

        # Typical average (weighted mean)
        y_bar = (wi * yi).sum() / sum_wi

        # SJ estimator
        numerator = ((yi - y_bar)**2 / vi).sum()
        denominator = k - 1

        tau_sq = (numerator / denominator) - (k / sum_wi)

        return max(0.0, tau_sq)

    except Exception as e:
        warnings.warn(f"Error in SJ estimator: {e}, using DL fallback")
        return calculate_tau_squared_DL(df, effect_col, var_col)


# --- 6. UNIFIED ESTIMATOR FUNCTION ---

def calculate_tau_squared(df, effect_col, var_col, method='REML', **kwargs):
    """
    Unified function to calculate tau-squared using specified method

    Parameters:
    -----------
    df : DataFrame
        Data with effect sizes and variances
    effect_col : str
        Name of effect size column
    var_col : str
        Name of variance column
    method : str
        Estimation method: 'DL', 'REML', 'ML', 'PM', 'SJ'
        Default: 'REML' (recommended)
    **kwargs : dict
        Additional arguments passed to estimator

    Returns:
    --------
    float : tau-squared estimate
    dict : additional information (method used, convergence, etc.)
    """
    method = method.upper()

    estimators = {
        'DL': calculate_tau_squared_DL,
        'REML': calculate_tau_squared_REML,
        'ML': calculate_tau_squared_ML,
        'PM': calculate_tau_squared_PM,
        'SJ': calculate_tau_squared_SJ
    }

    if method not in estimators:
        warnings.warn(f"Unknown method '{method}', using REML")
        method = 'REML'

    try:
        tau_sq = estimators[method](df, effect_col, var_col, **kwargs)

        info = {
            'method': method,
            'tau_squared': tau_sq,
            'tau': np.sqrt(tau_sq),
            'success': True
        }

        return tau_sq, info

    except Exception as e:
        warnings.warn(f"Error with {method}, falling back to DL: {e}")
        tau_sq = calculate_tau_squared_DL(df, effect_col, var_col)

        info = {
            'method': 'DL',
            'tau_squared': tau_sq,
            'tau': np.sqrt(tau_sq),
            'success': False,
            'fallback': True,
            'error': str(e)
        }

        return tau_sq, info


# --- 7. COMPARISON FUNCTION ---

def compare_tau_estimators(df, effect_col, var_col):
    """
    Compare all tau-squared estimators on the same dataset

    Useful for sensitivity analysis and understanding which method
    is most appropriate for your data.

    Parameters:
    -----------
    df : DataFrame
        Data with effect sizes and variances
    effect_col : str
        Name of effect size column
    var_col : str
        Name of variance column

    Returns:
    --------
    DataFrame : Comparison of all methods
    """
    methods = ['DL', 'REML', 'ML', 'PM', 'SJ']
    results = []

    for method in methods:
        try:
            tau_sq, info = calculate_tau_squared(df, effect_col, var_col, method=method)

            results.append({
                'Method': method,
                'τ²': tau_sq,
                'τ': np.sqrt(tau_sq),
                'Success': info['success']
            })
        except Exception as e:
            results.append({
                'Method': method,
                'τ²': np.nan,
                'τ': np.nan,
                'Success': False
            })

    comparison_df = pd.DataFrame(results)

    return comparison_df


# --- 8. DISPLAY MODULE INFO ---
print("\n✅ Heterogeneity estimators loaded successfully")
print("\n📊 Available methods:")
print("  • DL (DerSimonian-Laird) - Simple, fast")
print("  • REML (Restricted ML) - ⭐ RECOMMENDED (Gold standard)")
print("  • ML (Maximum Likelihood) - Asymptotically efficient")
print("  • PM (Paule-Mandel) - Exact Q solution")
print("  • SJ (Sidik-Jonkman) - Conservative, good for small k")

print("\n💡 Usage:")
print("  tau_sq, info = calculate_tau_squared(df, 'effect_size', 'variance', method='REML')")
print("  comparison = compare_tau_estimators(df, 'effect_size', 'variance')")

print("\n" + "="*70)

HETEROGENEITY ESTIMATORS MODULE

✅ Heterogeneity estimators loaded successfully

📊 Available methods:
  • DL (DerSimonian-Laird) - Simple, fast
  • REML (Restricted ML) - ⭐ RECOMMENDED (Gold standard)
  • ML (Maximum Likelihood) - Asymptotically efficient
  • PM (Paule-Mandel) - Exact Q solution
  • SJ (Sidik-Jonkman) - Conservative, good for small k

💡 Usage:
  tau_sq, info = calculate_tau_squared(df, 'effect_size', 'variance', method='REML')
  comparison = compare_tau_estimators(df, 'effect_size', 'variance')



In [6]:
#@title 🔬 DETECT & SELECT EFFECT SIZE TYPE

# =============================================================================
# CELL 4: EFFECT SIZE TYPE DETECTION AND SELECTION
# Purpose: Analyze data characteristics and recommend appropriate effect size
# Dependencies: Cell 3 (data_filtered)
# Outputs: ANALYSIS_CONFIG with effect_size_type and es_config
# =============================================================================

print("\n" + "="*70)
print("EFFECT SIZE TYPE DETECTION & SELECTION")
print("="*70)
print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# --- STEP 1: DATA CHARACTERISTICS ANALYSIS ---
print("\n" + "="*70)
print("STEP 1: ANALYZING DATA CHARACTERISTICS")
print("="*70)

print(f"\n🔍 Examining {len(data_filtered)} observations across {data_filtered['id'].nunique()} studies...")

# Extract key statistics
xe_stats = data_filtered['xe'].describe()
xc_stats = data_filtered['xc'].describe()

# Check for standard deviations
has_sde = 'sde' in data_filtered.columns and data_filtered['sde'].notna().any()
has_sdc = 'sdc' in data_filtered.columns and data_filtered['sdc'].notna().any()
sd_availability = data_filtered[['sde', 'sdc']].notna().all(axis=1).sum() if has_sde and has_sdc else 0
sd_pct = (sd_availability / len(data_filtered)) * 100 if len(data_filtered) > 0 else 0

print(f"\n📊 Basic Statistics:")
print(f"  Treatment (xe):")
print(f"    Mean:   {xe_stats['mean']:>10.4f}")
print(f"    Median: {xe_stats['50%']:>10.4f}")
print(f"    Std:    {xe_stats['std']:>10.4f}")
print(f"    Range:  [{xe_stats['min']:.4f}, {xe_stats['max']:.4f}]")
print(f"\n  Control (xc):")
print(f"    Mean:   {xc_stats['mean']:>10.4f}")
print(f"    Median: {xc_stats['50%']:>10.4f}")
print(f"    Std:    {xc_stats['std']:>10.4f}")
print(f"    Range:  [{xc_stats['min']:.4f}, {xc_stats['max']:.4f}]")
print(f"\n  Standard Deviations:")
print(f"    Available: {sd_availability}/{len(data_filtered)} ({sd_pct:.1f}%)")

# --- STEP 2: CHARACTERISTIC DETECTION ---
print("\n" + "="*70)
print("STEP 2: DETECTING DATA PATTERNS")
print("="*70)

# Initialize detection results
detection_results = {}

# Characteristic 1: Control values near 1.0 (fold-change normalization)
control_near_one = ((data_filtered['xc'] >= 0.95) & (data_filtered['xc'] <= 1.05)).sum()
control_exactly_one = (data_filtered['xc'] == 1.0).sum()
pct_control_near_one = (control_near_one / len(data_filtered)) * 100
pct_control_exactly_one = (control_exactly_one / len(data_filtered)) * 100

detection_results['control_normalization'] = {
    'near_one': control_near_one,
    'pct_near_one': pct_control_near_one,
    'exactly_one': control_exactly_one,
    'pct_exactly_one': pct_control_exactly_one
}

print(f"\n1️⃣  Control Group Normalization:")
print(f"    Exactly 1.0:      {control_exactly_one:>5} ({pct_control_exactly_one:>5.1f}%)")
print(f"    Near 1.0 (±0.05): {control_near_one:>5} ({pct_control_near_one:>5.1f}%)")
if pct_control_exactly_one > 50:
    print(f"    → Strong evidence of fold-change normalization ✓")
elif pct_control_near_one > 30:
    print(f"    → Moderate evidence of fold-change normalization ⚠")
else:
    print(f"    → No evidence of fold-change normalization")

# Characteristic 2: Negative values (incompatible with ratios)
has_negative_xe = (data_filtered['xe'] < 0).any()
has_negative_xc = (data_filtered['xc'] < 0).any()
n_negative_xe = (data_filtered['xe'] < 0).sum()
n_negative_xc = (data_filtered['xc'] < 0).sum()

detection_results['negative_values'] = {
    'has_negative_xe': has_negative_xe,
    'has_negative_xc': has_negative_xc,
    'n_negative_xe': n_negative_xe,
    'n_negative_xc': n_negative_xc
}

print(f"\n2️⃣  Negative Values (invalid for ratios):")
print(f"    Treatment: {n_negative_xe} negative values ({(n_negative_xe/len(data_filtered))*100:.1f}%)")
print(f"    Control:   {n_negative_xc} negative values ({(n_negative_xc/len(data_filtered))*100:.1f}%)")
if has_negative_xe or has_negative_xc:
    print(f"    → Ratio measures NOT applicable ❌")
    print(f"    → Standardized mean differences required ✓")
else:
    print(f"    → All values positive (ratio measures possible) ✓")

# Characteristic 3: Zero values (problematic for log ratios)
has_zero_xe = (data_filtered['xe'] == 0).any()
has_zero_xc = (data_filtered['xc'] == 0).any()
n_zero_xe = (data_filtered['xe'] == 0).sum()
n_zero_xc = (data_filtered['xc'] == 0).sum()

detection_results['zero_values'] = {
    'has_zero_xe': has_zero_xe,
    'has_zero_xc': has_zero_xc,
    'n_zero_xe': n_zero_xe,
    'n_zero_xc': n_zero_xc
}

print(f"\n3️⃣  Zero Values (problematic for log ratios):")
print(f"    Treatment: {n_zero_xe} zeros ({(n_zero_xe/len(data_filtered))*100:.1f}%)")
print(f"    Control:   {n_zero_xc} zeros ({(n_zero_xc/len(data_filtered))*100:.1f}%)")
if has_zero_xe or has_zero_xc:
    print(f"    → Warning: Zero values will need special handling for lnRR ⚠")
else:
    print(f"    → No zeros detected ✓")

# Characteristic 4: Scale heterogeneity
xe_range = xe_stats['max'] - xe_stats['min']
xc_range = xc_stats['max'] - xc_stats['min']
scale_ratio = max(xe_range, xc_range) / (min(xe_range, xc_range) + 0.0001)

# Calculate coefficient of variation
xe_cv = (xe_stats['std'] / xe_stats['mean']) * 100 if xe_stats['mean'] != 0 else np.inf
xc_cv = (xc_stats['std'] / xc_stats['mean']) * 100 if xc_stats['mean'] != 0 else np.inf

detection_results['scale_heterogeneity'] = {
    'xe_range': xe_range,
    'xc_range': xc_range,
    'scale_ratio': scale_ratio,
    'xe_cv': xe_cv,
    'xc_cv': xc_cv
}

print(f"\n4️⃣  Scale Heterogeneity:")
print(f"    Treatment range: {xe_range:.4f}")
print(f"    Control range:   {xc_range:.4f}")
print(f"    Range ratio:     {scale_ratio:.2f}×")
print(f"    Treatment CV:    {xe_cv:.1f}%")
print(f"    Control CV:      {xc_cv:.1f}%")
if scale_ratio > 100:
    print(f"    → Very high heterogeneity - ratio measures recommended ✓")
elif scale_ratio > 10:
    print(f"    → Moderate heterogeneity - ratio measures beneficial ⚠")
else:
    print(f"    → Low heterogeneity - standardized differences work well ✓")

# Characteristic 5: Order of magnitude
xe_magnitude = np.log10(xe_stats['mean']) if xe_stats['mean'] > 0 else None
xc_magnitude = np.log10(xc_stats['mean']) if xc_stats['mean'] > 0 else None

detection_results['order_of_magnitude'] = {
    'xe_magnitude': xe_magnitude,
    'xc_magnitude': xc_magnitude
}

print(f"\n5️⃣  Order of Magnitude:")
if xe_magnitude is not None and xc_magnitude is not None:
    print(f"    Treatment: 10^{xe_magnitude:.2f} (mean = {xe_stats['mean']:.4f})")
    print(f"    Control:   10^{xc_magnitude:.2f} (mean = {xc_stats['mean']:.4f})")
    if abs(xe_magnitude) > 2 or abs(xc_magnitude) > 2:
        print(f"    → Large values suggest ratio-scale data ✓")
else:
    print(f"    → Cannot calculate (zero or negative values present)")

# Characteristic 6: Ratio of means
if xc_stats['mean'] > 0 and xe_stats['mean'] > 0:
    mean_ratio = xe_stats['mean'] / xc_stats['mean']
    detection_results['mean_ratio'] = mean_ratio
    print(f"\n6️⃣  Treatment/Control Ratio:")
    print(f"    Ratio of means: {mean_ratio:.4f}")
    if 0.8 < xc_stats['mean'] < 1.2:
        print(f"    Control near 1.0 suggests fold-change data ✓")
else:
    detection_results['mean_ratio'] = None
    print(f"\n6️⃣  Treatment/Control Ratio:")
    print(f"    → Cannot calculate (zero or negative means)")

# --- STEP 3: RECOMMENDATION ENGINE ---
print("\n" + "="*70)
print("STEP 3: EFFECT SIZE RECOMMENDATION")
print("="*70)

recommendation_reasons = []
score_lnRR = 0
score_hedges_g = 0
confidence_factors = []

# Decision Rule 1: Negative values
if has_negative_xe or has_negative_xc:
    score_hedges_g += 10  # Strong preference
    recommendation_reasons.append({
        'factor': 'Negative values present',
        'weight': '+++',
        'favors': 'Hedges g',
        'explanation': 'Ratio measures cannot handle negative values'
    })
    confidence_factors.append('negative_values')
else:
    score_lnRR += 2
    recommendation_reasons.append({
        'factor': 'All positive values',
        'weight': '+',
        'favors': 'lnRR',
        'explanation': 'Compatible with ratio measures'
    })

# Decision Rule 2: Control normalization
if pct_control_exactly_one > 50:
    score_lnRR += 5
    recommendation_reasons.append({
        'factor': f'{pct_control_exactly_one:.1f}% controls = 1.0',
        'weight': '+++',
        'favors': 'lnRR',
        'explanation': 'Strong evidence of fold-change normalization'
    })
    confidence_factors.append('fold_change_normalization')
elif pct_control_near_one > 30:
    score_lnRR += 3
    recommendation_reasons.append({
        'factor': f'{pct_control_near_one:.1f}% controls ≈ 1.0',
        'weight': '++',
        'favors': 'lnRR',
        'explanation': 'Evidence of fold-change normalization'
    })
elif 0.8 < xc_stats['mean'] < 1.2:
    score_lnRR += 1
    recommendation_reasons.append({
        'factor': 'Mean control ≈ 1.0',
        'weight': '+',
        'favors': 'lnRR',
        'explanation': 'Control centered near unity'
    })

# Decision Rule 3: Scale heterogeneity
if scale_ratio > 100:
    score_lnRR += 3
    recommendation_reasons.append({
        'factor': f'Scale ratio {scale_ratio:.0f}×',
        'weight': '+++',
        'favors': 'lnRR',
        'explanation': 'Very high heterogeneity across studies'
    })
    confidence_factors.append('scale_heterogeneity')
elif scale_ratio > 10:
    score_lnRR += 2
    recommendation_reasons.append({
        'factor': f'Scale ratio {scale_ratio:.1f}×',
        'weight': '++',
        'favors': 'lnRR',
        'explanation': 'Moderate scale heterogeneity'
    })
else:
    score_hedges_g += 1
    recommendation_reasons.append({
        'factor': f'Scale ratio {scale_ratio:.1f}×',
        'weight': '+',
        'favors': 'Hedges g',
        'explanation': 'Low scale heterogeneity'
    })

# Decision Rule 4: Zero values
if has_zero_xe or has_zero_xc:
    score_hedges_g += 2
    recommendation_reasons.append({
        'factor': 'Zero values present',
        'weight': '++',
        'favors': 'Hedges g',
        'explanation': 'Zero values problematic for log ratios'
    })
    confidence_factors.append('zero_values')

# Decision Rule 5: Standard deviations
if sd_pct > 80:
    score_hedges_g += 1
    recommendation_reasons.append({
        'factor': f'{sd_pct:.1f}% have SD data',
        'weight': '+',
        'favors': 'Hedges g',
        'explanation': 'Excellent SD coverage for standardized differences'
    })
elif sd_pct < 20:
    recommendation_reasons.append({
        'factor': f'Only {sd_pct:.1f}% have SD data',
        'weight': '⚠',
        'favors': 'Neither',
        'explanation': 'Limited SD data may require mean-only methods'
    })

# --- STEP 4: DISPLAY RECOMMENDATION ANALYSIS ---
print("\n📋 Decision Factors:")
print(f"  {'Factor':<40} {'Weight':<8} {'Favors':<12} Explanation")
print(f"  {'-'*40} {'-'*8} {'-'*12} {'-'*40}")
for reason in recommendation_reasons:
    print(f"  {reason['factor']:<40} {reason['weight']:<8} {reason['favors']:<12} {reason['explanation']}")

print(f"\n📊 Recommendation Scores:")
print(f"  log Response Ratio (lnRR): {score_lnRR:>3} points")
print(f"  Hedges' g (SMD):           {score_hedges_g:>3} points")

# Determine recommendation
score_diff = abs(score_lnRR - score_hedges_g)
if score_lnRR > score_hedges_g:
    recommended_type = 'lnRR'
    confidence = "High" if score_diff >= 5 else "Moderate" if score_diff >= 3 else "Low"
elif score_hedges_g > score_lnRR:
    recommended_type = 'hedges_g'
    confidence = "High" if score_diff >= 5 else "Moderate" if score_diff >= 3 else "Low"
else:
    recommended_type = 'hedges_g'  # Default to Hedges' g in case of tie
    confidence = "Low"

# Store detection metadata
DETECTION_METADATA = {
    'timestamp': datetime.datetime.now(),
    'detection_results': detection_results,
    'recommendation_reasons': recommendation_reasons,
    'scores': {
        'lnRR': score_lnRR,
        'hedges_g': score_hedges_g
    },
    'recommended_type': recommended_type,
    'confidence': confidence,
    'confidence_factors': confidence_factors
}

# --- STEP 5: DISPLAY RECOMMENDATION ---
print("\n" + "="*70)
print("RECOMMENDATION")
print("="*70)

# Create recommendation HTML based on result
if recommended_type == 'lnRR':
    recommendation_color = '#d4edda'
    recommendation_border = '#28a745'
    recommendation_text_color = '#155724'
    recommendation_title = "✓ RECOMMENDED: log Response Ratio (lnRR)"
    recommendation_body = f"""
        <p><b>Confidence: {confidence}</b> (Score: {score_lnRR} vs {score_hedges_g})</p>
        <p>Your data shows characteristics of <b>ratio-based measurements</b> (e.g., gene expression
        fold-changes, relative abundances, growth rates, or other multiplicative scales).</p>

        <p><b>Why lnRR is appropriate:</b></p>
        <ul>
            <li>Works with ratio/multiplicative scales</li>
            <li>Natural for fold-change data (control = 1.0)</li>
            <li>Handles scale heterogeneity well</li>
            <li>Direct biological interpretation as fold-changes</li>
            <li>Symmetric around no effect (lnRR = 0)</li>
        </ul>

        <p><b>Interpretation guide:</b></p>
        <ul>
            <li>lnRR = 0 → No change (RR = 1)</li>
            <li>lnRR = 0.69 → 2-fold increase (RR = 2)</li>
            <li>lnRR = -0.69 → 2-fold decrease (RR = 0.5)</li>
        </ul>

        {"<p><b>⚠ Note:</b> Zero values detected will be handled with small constant addition.</p>" if (has_zero_xe or has_zero_xc) else ""}
    """
else:
    recommendation_color = '#d1ecf1'
    recommendation_border = '#17a2b8'
    recommendation_text_color = '#0c5460'
    recommendation_title = "✓ RECOMMENDED: Hedges' g (Standardized Mean Difference)"
    recommendation_body = f"""
        <p><b>Confidence: {confidence}</b> (Score: {score_hedges_g} vs {score_lnRR})</p>
        <p>Your data shows characteristics of <b>absolute measurements</b> with potentially
        different scales or units across studies.</p>

        <p><b>Why Hedges' g is appropriate:</b></p>
        <ul>
            <li>Standardizes effects across different measurement scales</li>
            <li>Handles negative values naturally</li>
            <li>Includes small-sample bias correction</li>
            <li>Widely used and interpretable</li>
            <li>Comparable across different metrics</li>
        </ul>

        <p><b>Interpretation guide (Cohen's benchmarks):</b></p>
        <ul>
            <li>|g| < 0.2 → Negligible effect</li>
            <li>|g| ≈ 0.2-0.5 → Small effect</li>
            <li>|g| ≈ 0.5-0.8 → Medium effect</li>
            <li>|g| > 0.8 → Large effect</li>
        </ul>

        <p><b>Note:</b> Standard deviations available for {sd_pct:.1f}% of observations.</p>
    """

recommendation_html = f"""
<div style='background-color: {recommendation_color}; border: 2px solid {recommendation_border};
            padding: 20px; border-radius: 8px; margin: 15px 0;'>
    <h3 style='color: {recommendation_text_color}; margin-top: 0;'>{recommendation_title}</h3>
    <div style='color: {recommendation_text_color};'>
        {recommendation_body}
    </div>
</div>
"""

display(HTML(recommendation_html))

# --- STEP 6: CREATE SELECTION WIDGET ---
print("\n" + "="*70)
print("STEP 4: EFFECT SIZE SELECTION")
print("="*70)

effect_size_widget = widgets.RadioButtons(
    options=[
        ('log Response Ratio (lnRR) - for ratio/fold-change data', 'lnRR'),
        ("Hedges' g - for standardized mean differences (small-sample corrected)", 'hedges_g'),
        ("Cohen's d - for standardized mean differences (no correction)", 'cohen_d'),
        ('log Odds Ratio (logOR) - for binary outcomes', 'log_or')
    ],
    value=recommended_type,
    description='Effect Size:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='650px')
)

# Information panels for each effect size type
info_panels = {
    'lnRR': """
    <div style='background-color: #f8f9fa; padding: 15px; border-radius: 5px; border-left: 4px solid #28a745;'>
        <h4 style='margin-top: 0; color: #28a745;'>📊 log Response Ratio (lnRR)</h4>

        <p><b>Formula:</b> lnRR = ln(x̄ₑ / x̄ₜ)</p>
        <p><b>Variance:</b> Var(lnRR) = SD²ₑ/(nₑ·x̄²ₑ) + SD²ₜ/(nₜ·x̄²ₜ)</p>

        <p><b>Interpretation:</b></p>
        <table style='width: 100%; border-collapse: collapse;'>
            <tr style='background: #e9ecef;'>
                <th style='padding: 8px; text-align: left;'>lnRR</th>
                <th style='padding: 8px; text-align: left;'>Response Ratio</th>
                <th style='padding: 8px; text-align: left;'>Meaning</th>
            </tr>
            <tr><td style='padding: 8px;'>0</td><td style='padding: 8px;'>1.0</td><td style='padding: 8px;'>No change</td></tr>
            <tr><td style='padding: 8px;'>+0.69</td><td style='padding: 8px;'>2.0</td><td style='padding: 8px;'>2× increase (doubled)</td></tr>
            <tr><td style='padding: 8px;'>-0.69</td><td style='padding: 8px;'>0.5</td><td style='padding: 8px;'>2× decrease (halved)</td></tr>
            <tr><td style='padding: 8px;'>+1.10</td><td style='padding: 8px;'>3.0</td><td style='padding: 8px;'>3× increase (tripled)</td></tr>
        </table>

        <p><b>Best for:</b> Gene expression, abundances, concentrations, rates, any multiplicative data</p>
        <p><b>Conversion:</b> Response Ratio (RR) = exp(lnRR), % Change = (RR - 1) × 100%</p>
        <p><b>Requirements:</b> All values must be positive (xe, xc > 0)</p>
    </div>
    """,
    'hedges_g': """
    <div style='background-color: #f8f9fa; padding: 15px; border-radius: 5px; border-left: 4px solid #17a2b8;'>
        <h4 style='margin-top: 0; color: #17a2b8;'>📊 Hedges' g (Standardized Mean Difference)</h4>

        <p><b>Formula:</b> g = [(x̄ₑ - x̄ₜ) / SDₚₒₒₗₑ𝒹] × J</p>
        <p>Where J = 1 - 3/(4df - 1) is the small-sample correction factor</p>
        <p><b>Variance:</b> Vg = [(nₑ+nₜ)/(nₑ·nₜ) + g²/(2(nₑ+nₜ))] × J²</p>

        <p><b>Interpretation (Cohen's benchmarks):</b></p>
        <table style='width: 100%; border-collapse: collapse;'>
            <tr style='background: #e9ecef;'>
                <th style='padding: 8px; text-align: left;'>|g|</th>
                <th style='padding: 8px; text-align: left;'>Effect Size</th>
                <th style='padding: 8px; text-align: left;'>Description</th>
            </tr>
            <tr><td style='padding: 8px;'>< 0.2</td><td style='padding: 8px;'>Negligible</td><td style='padding: 8px;'>Trivial difference</td></tr>
            <tr><td style='padding: 8px;'>0.2 - 0.5</td><td style='padding: 8px;'>Small</td><td style='padding: 8px;'>Noticeable but small</td></tr>
            <tr><td style='padding: 8px;'>0.5 - 0.8</td><td style='padding: 8px;'>Medium</td><td style='padding: 8px;'>Moderate difference</td></tr>
            <tr><td style='padding: 8px;'>> 0.8</td><td style='padding: 8px;'>Large</td><td style='padding: 8px;'>Substantial difference</td></tr>
        </table>

        <p><b>Best for:</b> Standardizing effects across different measurement scales</p>
        <p><b>Note:</b> Preferred over Cohen's d for small samples (reduces bias)</p>
        <p><b>Requirements:</b> Need standard deviations (SDs) for accurate calculation</p>
    </div>
    """,
    'cohen_d': """
    <div style='background-color: #f8f9fa; padding: 15px; border-radius: 5px; border-left: 4px solid #6c757d;'>
        <h4 style='margin-top: 0; color: #6c757d;'>📊 Cohen's d (Standardized Mean Difference)</h4>

        <p><b>Formula:</b> d = (x̄ₑ - x̄ₜ) / SDₚₒₒₗₑ𝒹</p>
        <p><b>Variance:</b> Vd = (nₑ+nₜ)/(nₑ·nₜ) + d²/(2(nₑ+nₜ))</p>

        <p><b>Interpretation:</b> Same as Hedges' g (Cohen's benchmarks apply)</p>

        <p><b>Difference from Hedges' g:</b></p>
        <ul>
            <li>No small-sample correction (J factor = 1)</li>
            <li>Slightly biased upward for small samples</li>
            <li>Bias negligible when n > 20 per group</li>
        </ul>

        <p><b>Best for:</b> Large samples where bias correction is unnecessary</p>
        <p><b>When to use:</b> Historical comparisons, large meta-analyses (n > 20/group)</p>
        <p><b>Note:</b> Hedges' g is generally preferred in modern meta-analysis</p>
    </div>
    """,
    'log_or': """
    <div style='background-color: #f8f9fa; padding: 15px; border-radius: 5px; border-left: 4px solid #ffc107;'>
        <h4 style='margin-top: 0; color: #856404;'>📊 log Odds Ratio (logOR)</h4>

        <p><b>Formula:</b> logOR = ln[(aₑ·dₜ) / (bₑ·cₜ)]</p>
        <p>For 2×2 table: [aₑ, bₑ] = [successes, failures] in treatment</p>
        <p>                [cₜ, dₜ] = [successes, failures] in control</p>
        <p><b>Variance:</b> Var(logOR) = 1/aₑ + 1/bₑ + 1/cₜ + 1/dₜ</p>

        <p><b>Interpretation:</b></p>
        <table style='width: 100%; border-collapse: collapse;'>
            <tr style='background: #e9ecef;'>
                <th style='padding: 8px; text-align: left;'>logOR</th>
                <th style='padding: 8px; text-align: left;'>Odds Ratio</th>
                <th style='padding: 8px; text-align: left;'>Meaning</th>
            </tr>
            <tr><td style='padding: 8px;'>0</td><td style='padding: 8px;'>1.0</td><td style='padding: 8px;'>No association</td></tr>
            <tr><td style='padding: 8px;'>> 0</td><td style='padding: 8px;'>> 1.0</td><td style='padding: 8px;'>Positive association</td></tr>
            <tr><td style='padding: 8px;'>< 0</td><td style='padding: 8px;'>< 1.0</td><td style='padding: 8px;'>Negative association</td></tr>
            <tr><td style='padding: 8px;'>+0.69</td><td style='padding: 8px;'>2.0</td><td style='padding: 8px;'>2× higher odds</td></tr>
        </table>

        <p><b>Best for:</b> Binary outcomes (success/failure, disease/healthy, present/absent)</p>
        <p><b>Conversion:</b> Odds Ratio (OR) = exp(logOR)</p>
        <p><b>Requirements:</b> Count data for binary outcomes in 2×2 contingency tables</p>
        <p><b>Note:</b> Zero cells typically handled with continuity correction (+0.5)</p>
    </div>
    """
}

info_output = widgets.Output()

def update_info_panel(change):
    """Update information panel when selection changes"""
    with info_output:
        clear_output()
        display(HTML(info_panels[change['new']]))

effect_size_widget.observe(update_info_panel, names='value')

# Initialize with recommended type info
with info_output:
    display(HTML(info_panels[recommended_type]))

# Proceed button
proceed_button = widgets.Button(
    description='✓ Confirm Selection & Calculate Effect Sizes',
    button_style='success',
    layout=widgets.Layout(width='450px', height='50px'),
    style={'font_weight': 'bold'}
)

proceed_output = widgets.Output()

def on_proceed_clicked(b):
    """Save selection and proceed"""
    with proceed_output:
        clear_output()
        selected_type = effect_size_widget.value

        print("\n" + "="*70)
        print("EFFECT SIZE CONFIGURATION CONFIRMED")
        print("="*70)

        # Map selection to display name
        type_names = {
            'lnRR': 'log Response Ratio (lnRR)',
            'hedges_g': "Hedges' g",
            'cohen_d': "Cohen's d",
            'log_or': 'log Odds Ratio (logOR)'
        }

        print(f"\n✓ Selected: {type_names[selected_type]}")

        # Show if different from recommendation
        if selected_type != recommended_type:
            print(f"\n⚠️  Note: You selected {type_names[selected_type]}")
            print(f"    Recommendation was: {type_names[recommended_type]} ({confidence} confidence)")
            print(f"    Your selection will be used for the analysis.")
        else:
            print(f"\n✓ Selection matches recommendation ({confidence} confidence)")

        # Configuration for each effect size type
        es_configs = {
            'lnRR': {
                'effect_col': 'lnRR',
                'var_col': 'var_lnRR',
                'se_col': 'SE_lnRR',
                'ci_lower_col': 'CI_lower_lnRR',
                'ci_upper_col': 'CI_upper_lnRR',
                'effect_label': 'log Response Ratio',
                'effect_label_short': 'lnRR',
                'has_fold_change': True,
                'fold_change_col': 'Response_Ratio',
                'percent_change_col': 'Percent_Change',
                'null_value': 0,
                'scale': 'log',
                'allows_negative': False,
                'allows_zero': False
            },
            'hedges_g': {
                'effect_col': 'hedges_g',
                'var_col': 'Vg',
                'se_col': 'SE_g',
                'ci_lower_col': 'CI_lower_g',
                'ci_upper_col': 'CI_upper_g',
                'effect_label': "Hedges' g",
                'effect_label_short': 'g',
                'has_fold_change': False,
                'null_value': 0,
                'scale': 'standardized',
                'allows_negative': True,
                'allows_zero': True,
                'correction_factor': 'J'
            },
            'cohen_d': {
                'effect_col': 'cohen_d',
                'var_col': 'Vd',
                'se_col': 'SE_d',
                'ci_lower_col': 'CI_lower_d',
                'ci_upper_col': 'CI_upper_d',
                'effect_label': "Cohen's d",
                'effect_label_short': 'd',
                'has_fold_change': False,
                'null_value': 0,
                'scale': 'standardized',
                'allows_negative': True,
                'allows_zero': True,
                'correction_factor': None
            },
            'log_or': {
                'effect_col': 'log_OR',
                'var_col': 'var_log_OR',
                'se_col': 'SE_log_OR',
                'ci_lower_col': 'CI_lower_log_OR',
                'ci_upper_col': 'CI_upper_log_OR',
                'effect_label': 'log Odds Ratio',
                'effect_label_short': 'logOR',
                'has_fold_change': True,
                'fold_change_col': 'Odds_Ratio',
                'null_value': 0,
                'scale': 'log',
                'allows_negative': False,
                'allows_zero': False,
                'requires_binary': True
            }
        }

        # Save to ANALYSIS_CONFIG
        ANALYSIS_CONFIG['effect_size_type'] = selected_type
        ANALYSIS_CONFIG['es_config'] = es_configs[selected_type]
        ANALYSIS_CONFIG['detection_metadata'] = DETECTION_METADATA

        print(f"\n📋 Configuration Details:")
        print(f"  Effect size column:      {ANALYSIS_CONFIG['es_config']['effect_col']}")
        print(f"  Variance column:         {ANALYSIS_CONFIG['es_config']['var_col']}")
        print(f"  Standard error column:   {ANALYSIS_CONFIG['es_config']['se_col']}")
        print(f"  Effect label:            {ANALYSIS_CONFIG['es_config']['effect_label']}")
        print(f"  Null hypothesis value:   {ANALYSIS_CONFIG['es_config']['null_value']}")
        print(f"  Scale type:              {ANALYSIS_CONFIG['es_config']['scale']}")
        print(f"  Allows negative values:  {ANALYSIS_CONFIG['es_config']['allows_negative']}")

        if ANALYSIS_CONFIG['es_config']['has_fold_change']:
            print(f"  Fold-change available:   Yes")
            print(f"    - Column: {ANALYSIS_CONFIG['es_config']['fold_change_col']}")
            if 'percent_change_col' in ANALYSIS_CONFIG['es_config']:
                print(f"    - % Change: {ANALYSIS_CONFIG['es_config']['percent_change_col']}")

        # Data compatibility check
        print(f"\n🔍 Data Compatibility Check:")

        if selected_type == 'lnRR':
            if has_negative_xe or has_negative_xc:
                print(f"  ❌ ERROR: lnRR requires all positive values")
                print(f"     Found {n_negative_xe + n_negative_xc} negative values")
                print(f"     Please select Hedges' g or Cohen's d instead")
                return
            if has_zero_xe or has_zero_xc:
                print(f"  ⚠️  Warning: {n_zero_xe + n_zero_xc} zero values found")
                print(f"     Small constant (0.001) will be added to avoid log(0)")
            else:
                print(f"  ✓ All values positive and non-zero")

        elif selected_type in ['hedges_g', 'cohen_d']:
            if sd_pct < 50:
                print(f"  ⚠️  Warning: Only {sd_pct:.1f}% of observations have SD data")
                print(f"     Effect size calculation may be limited")
            else:
                print(f"  ✓ {sd_pct:.1f}% of observations have complete SD data")

        elif selected_type == 'log_or':
            print(f"  ⚠️  Note: Assumes binary outcome data")
            print(f"     Ensure xe/xc represent event counts")

        print(f"\n" + "="*70)
        print("✅ CONFIGURATION COMPLETE")
        print("="*70)

        print(f"\n▶️  Next Steps:")
        print(f"  1. Review the configuration above")
        print(f"  2. Run the next cell to calculate effect sizes")
        print(f"  3. Effect sizes will be calculated for {len(data_filtered)} observations")

        print(f"\n💡 Tip: If you need to change the effect size type, modify the")
        print(f"    selection above and click Confirm again before proceeding.")

        print("\n" + "="*70)

proceed_button.on_click(on_proceed_clicked)

# --- ASSEMBLE WIDGET DISPLAY ---
display(widgets.VBox([
    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),
    widgets.HTML("<h3 style='color: #2E86AB;'>📊 Select Effect Size Type</h3>"),
    widgets.HTML("<p style='color: #666;'><i>Choose the effect size metric for your meta-analysis. "
                 "The recommendation is pre-selected but you can override it if needed.</i></p>"),
    effect_size_widget,
    info_output,
    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),
    proceed_button,
    proceed_output
]))

# --- FINAL STATUS ---
print("\n" + "="*70)
print("✓ Effect size detection and selection interface ready")
print("="*70)
print("\n👆 INSTRUCTIONS:")
print("  1. Review the recommendation above (based on data characteristics)")
print("  2. Select your preferred effect size type (or keep recommendation)")
print("  3. Review the detailed information for your selected type")
print("  4. Click 'Confirm Selection & Calculate Effect Sizes' to proceed")
print("\n" + "="*70)

# Store summary for downstream use
EFFECT_SIZE_SELECTION_SUMMARY = {
    'timestamp': datetime.datetime.now(),
    'data_characteristics': {
        'n_observations': len(data_filtered),
        'n_studies': data_filtered['id'].nunique(),
        'control_normalization_pct': pct_control_exactly_one,
        'has_negative_values': has_negative_xe or has_negative_xc,
        'has_zero_values': has_zero_xe or has_zero_xc,
        'scale_ratio': scale_ratio,
        'sd_availability_pct': sd_pct
    },
    'recommendation': {
        'type': recommended_type,
        'confidence': confidence,
        'score_lnRR': score_lnRR,
        'score_hedges_g': score_hedges_g,
        'key_factors': confidence_factors
    }
}

print(f"\n📊 Summary stored in EFFECT_SIZE_SELECTION_SUMMARY and DETECTION_METADATA")


EFFECT SIZE TYPE DETECTION & SELECTION
Timestamp: 2025-11-14 18:01:24

STEP 1: ANALYZING DATA CHARACTERISTICS

🔍 Examining 69 observations across 23 studies...

📊 Basic Statistics:
  Treatment (xe):
    Mean:     492.8312
    Median:    45.0000
    Std:      881.5370
    Range:  [1.1500, 3276.0000]

  Control (xc):
    Mean:     415.3678
    Median:    41.6000
    Std:      789.2837
    Range:  [1.0400, 2705.0000]

  Standard Deviations:
    Available: 69/69 (100.0%)

STEP 2: DETECTING DATA PATTERNS

1️⃣  Control Group Normalization:
    Exactly 1.0:          0 (  0.0%)
    Near 1.0 (±0.05):     1 (  1.4%)
    → No evidence of fold-change normalization

2️⃣  Negative Values (invalid for ratios):
    Treatment: 0 negative values (0.0%)
    Control:   0 negative values (0.0%)
    → All values positive (ratio measures possible) ✓

3️⃣  Zero Values (problematic for log ratios):
    Treatment: 0 zeros (0.0%)
    Control:   0 zeros (0.0%)
    → No zeros detected ✓

4️⃣  Scale Heterogeneity:


STEP 4: EFFECT SIZE SELECTION


VBox(children=(HTML(value="<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"), HTML(valu…


✓ Effect size detection and selection interface ready

👆 INSTRUCTIONS:
  1. Review the recommendation above (based on data characteristics)
  2. Select your preferred effect size type (or keep recommendation)
  3. Review the detailed information for your selected type
  4. Click 'Confirm Selection & Calculate Effect Sizes' to proceed


📊 Summary stored in EFFECT_SIZE_SELECTION_SUMMARY and DETECTION_METADATA


In [7]:
#@title 🧮 CALCULATE EFFECT SIZES

# =============================================================================
# CELL 5: EFFECT SIZE CALCULATION
# Purpose: Calculate effect sizes, variances, and weights for meta-analysis
# Dependencies: Cell 4 (ANALYSIS_CONFIG, data_filtered)
# Outputs: data_filtered with effect sizes, EFFECT_SIZE_METADATA
# =============================================================================

print("\n" + "="*70)
print("EFFECT SIZE CALCULATION")
print("="*70)
print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# --- STEP 1: LOAD CONFIGURATION ---
print("\n" + "="*70)
print("STEP 1: LOADING CONFIGURATION")
print("="*70)

try:
    effect_size_type = ANALYSIS_CONFIG['effect_size_type']
    es_config = ANALYSIS_CONFIG['es_config']
    print(f"✓ Configuration loaded successfully")
    print(f"  Effect size type: {es_config['effect_label']} ({es_config['effect_label_short']})")
    print(f"  Scale: {es_config['scale']}")
    print(f"  Allows negatives: {es_config['allows_negative']}")
    print(f"  Null value: {es_config['null_value']}")
except KeyError as e:
    print(f"❌ ERROR: Configuration not found - {e}")
    print("\nTroubleshooting:")
    print("  1. Ensure Cell 4 (effect size selection) was run successfully")
    print("  2. Check that you clicked 'Confirm Selection' button")
    print("  3. Verify ANALYSIS_CONFIG exists with 'effect_size_type' key")
    raise

# Store initial dataset size
initial_obs = len(data_filtered)
initial_papers = data_filtered['id'].nunique()

print(f"\n📊 Input Dataset:")
print(f"  Observations: {initial_obs}")
print(f"  Papers: {initial_papers}")

# --- STEP 2: VERIFY REQUIRED DATA COLUMNS ---
print("\n" + "="*70)
print("STEP 2: DATA VALIDATION")
print("="*70)

required_for_calculation = ['xe', 'sde', 'ne', 'xc', 'sdc', 'nc']
missing_cols = [col for col in required_for_calculation if col not in data_filtered.columns]

if missing_cols:
    print(f"❌ ERROR: Missing required columns: {missing_cols}")
    raise ValueError(f"Missing required columns: {missing_cols}")

print(f"✓ All required columns present")

# Check data availability
data_availability = {}
for col in required_for_calculation:
    n_valid = data_filtered[col].notna().sum()
    pct_valid = (n_valid / len(data_filtered)) * 100
    data_availability[col] = {'valid': n_valid, 'pct': pct_valid}
    print(f"  • {col}: {n_valid}/{len(data_filtered)} valid ({pct_valid:.1f}%)")

# --- STEP 3: HANDLE ZERO/MISSING STANDARD DEVIATIONS ---
print("\n" + "="*70)
print("STEP 3: STANDARD DEVIATION IMPUTATION")
print("="*70)

print("🔧 Processing standard deviations...")

# Track imputation statistics
imputation_log = {
    'method': 'median_cv',
    'sde_zeros': 0,
    'sdc_zeros': 0,
    'sde_missing': 0,
    'sdc_missing': 0,
    'sde_imputed': 0,
    'sdc_imputed': 0
}

# Count initial issues
imputation_log['sde_zeros'] = (data_filtered['sde'] == 0).sum()
imputation_log['sdc_zeros'] = (data_filtered['sdc'] == 0).sum()
imputation_log['sde_missing'] = data_filtered['sde'].isna().sum()
imputation_log['sdc_missing'] = data_filtered['sdc'].isna().sum()

print(f"\n📋 Initial SD Status:")
print(f"  Experimental (sde):")
print(f"    • Zero values:    {imputation_log['sde_zeros']}")
print(f"    • Missing values: {imputation_log['sde_missing']}")
print(f"    • Total issues:   {imputation_log['sde_zeros'] + imputation_log['sde_missing']}")
print(f"  Control (sdc):")
print(f"    • Zero values:    {imputation_log['sdc_zeros']}")
print(f"    • Missing values: {imputation_log['sdc_missing']}")
print(f"    • Total issues:   {imputation_log['sdc_zeros'] + imputation_log['sdc_missing']}")

# Replace zeros with NaN for proper imputation
data_filtered['sde'] = data_filtered['sde'].replace(0, np.nan)
data_filtered['sdc'] = data_filtered['sdc'].replace(0, np.nan)

# Calculate Coefficient of Variation (CV = SD/Mean) for imputation
print(f"\n🔬 Calculating Coefficient of Variation (CV)...")

data_filtered['cv_e'] = np.nan
data_filtered['cv_c'] = np.nan

# Calculate CV only for valid entries (non-missing SD, positive mean)
valid_cv_e = (data_filtered['sde'] > 0) & (data_filtered['xe'] > 0)
valid_cv_c = (data_filtered['sdc'] > 0) & (data_filtered['xc'] > 0)

data_filtered.loc[valid_cv_e, 'cv_e'] = data_filtered.loc[valid_cv_e, 'sde'] / data_filtered.loc[valid_cv_e, 'xe']
data_filtered.loc[valid_cv_c, 'cv_c'] = data_filtered.loc[valid_cv_c, 'sdc'] / data_filtered.loc[valid_cv_c, 'xc']

# Use MEDIAN CV for robustness (less sensitive to outliers than mean)
median_cv_e = data_filtered['cv_e'].median()
median_cv_c = data_filtered['cv_c'].median()
mean_cv_e = data_filtered['cv_e'].mean()
mean_cv_c = data_filtered['cv_c'].mean()

print(f"\n  CV Statistics (Experimental):")
print(f"    • Valid CVs:   {valid_cv_e.sum()}/{len(data_filtered)} ({(valid_cv_e.sum()/len(data_filtered))*100:.1f}%)")
print(f"    • Median CV:   {median_cv_e:.4f}")
print(f"    • Mean CV:     {mean_cv_e:.4f}")
print(f"    • Min CV:      {data_filtered['cv_e'].min():.4f}")
print(f"    • Max CV:      {data_filtered['cv_e'].max():.4f}")

print(f"\n  CV Statistics (Control):")
print(f"    • Valid CVs:   {valid_cv_c.sum()}/{len(data_filtered)} ({(valid_cv_c.sum()/len(data_filtered))*100:.1f}%)")
print(f"    • Median CV:   {median_cv_c:.4f}")
print(f"    • Mean CV:     {mean_cv_c:.4f}")
print(f"    • Min CV:      {data_filtered['cv_c'].min():.4f}")
print(f"    • Max CV:      {data_filtered['cv_c'].max():.4f}")

# Store CV statistics
imputation_log['median_cv_e'] = median_cv_e
imputation_log['median_cv_c'] = median_cv_c
imputation_log['mean_cv_e'] = mean_cv_e
imputation_log['mean_cv_c'] = mean_cv_c
imputation_log['n_valid_cv_e'] = valid_cv_e.sum()
imputation_log['n_valid_cv_c'] = valid_cv_c.sum()

# Create imputed SD columns
print(f"\n🔧 Applying imputation...")

data_filtered['sde_imputed'] = data_filtered['sde'].copy()
data_filtered['sdc_imputed'] = data_filtered['sdc'].copy()

# Track which rows were imputed
data_filtered['sde_was_imputed'] = False
data_filtered['sdc_was_imputed'] = False

# Impute experimental group
impute_e = (data_filtered['sde_imputed'].isna()) & (data_filtered['xe'] > 0)
n_imputed_e = impute_e.sum()

if n_imputed_e > 0 and pd.notna(median_cv_e):
    data_filtered.loc[impute_e, 'sde_imputed'] = median_cv_e * data_filtered.loc[impute_e, 'xe']
    data_filtered.loc[impute_e, 'sde_was_imputed'] = True
    imputation_log['sde_imputed'] = n_imputed_e
    print(f"  ✓ Imputed {n_imputed_e} experimental SDs using median CV method")
    print(f"    Formula: SD_imputed = {median_cv_e:.4f} × mean")
elif n_imputed_e > 0:
    print(f"  ⚠️  Warning: {n_imputed_e} experimental SDs need imputation but CV unavailable")

# Impute control group
impute_c = (data_filtered['sdc_imputed'].isna()) & (data_filtered['xc'] > 0)
n_imputed_c = impute_c.sum()

if n_imputed_c > 0 and pd.notna(median_cv_c):
    data_filtered.loc[impute_c, 'sdc_imputed'] = median_cv_c * data_filtered.loc[impute_c, 'xc']
    data_filtered.loc[impute_c, 'sdc_was_imputed'] = True
    imputation_log['sdc_imputed'] = n_imputed_c
    print(f"  ✓ Imputed {n_imputed_c} control SDs using median CV method")
    print(f"    Formula: SD_imputed = {median_cv_c:.4f} × mean")
elif n_imputed_c > 0:
    print(f"  ⚠️  Warning: {n_imputed_c} control SDs need imputation but CV unavailable")

# Final check for remaining issues
remaining_issues_e = (data_filtered['sde_imputed'].isna()) | (data_filtered['sde_imputed'] <= 0)
remaining_issues_c = (data_filtered['sdc_imputed'].isna()) | (data_filtered['sdc_imputed'] <= 0)
remaining_issues = remaining_issues_e | remaining_issues_c

if remaining_issues.any():
    n_issues = remaining_issues.sum()
    print(f"\n  ⚠️  WARNING: {n_issues} observations still have invalid SDs after imputation")
    print(f"    These observations will be removed from analysis")

    # Show details
    print(f"\n    Breakdown:")
    print(f"      • Experimental SD issues: {remaining_issues_e.sum()}")
    print(f"      • Control SD issues:      {remaining_issues_c.sum()}")

    # Remove problematic rows
    data_filtered = data_filtered[~remaining_issues].copy()
    imputation_log['removed_after_imputation'] = n_issues
else:
    print(f"\n  ✓ All observations have valid SDs (original or imputed)")
    imputation_log['removed_after_imputation'] = 0

# Summary of imputation
total_imputed = n_imputed_e + n_imputed_c
total_original_issues = (imputation_log['sde_zeros'] + imputation_log['sde_missing'] +
                         imputation_log['sdc_zeros'] + imputation_log['sdc_missing'])

print(f"\n📊 Imputation Summary:")
print(f"  Total SD issues found:     {total_original_issues}")
print(f"  Total SDs imputed:         {total_imputed}")
print(f"  Observations removed:      {imputation_log['removed_after_imputation']}")
print(f"  Observations remaining:    {len(data_filtered)}")
print(f"  Imputation success rate:   {(total_imputed/(total_original_issues + 0.0001))*100:.1f}%")

# --- STEP 4: HANDLE ZERO/NEGATIVE VALUES (FOR RATIO MEASURES) ---
if effect_size_type in ['lnRR', 'log_or']:
    print("\n" + "="*70)
    print("STEP 4: ZERO/NEGATIVE VALUE HANDLING (RATIO MEASURES)")
    print("="*70)

    print(f"\n🔍 Checking for incompatible values...")

    # Check for zero values
    zero_xe = (data_filtered['xe'] == 0).sum()
    zero_xc = (data_filtered['xc'] == 0).sum()

    # Check for negative values
    neg_xe = (data_filtered['xe'] < 0).sum()
    neg_xc = (data_filtered['xc'] < 0).sum()

    print(f"\n  Zero values:")
    print(f"    • Treatment (xe): {zero_xe}")
    print(f"    • Control (xc):   {zero_xc}")
    print(f"    • Total:          {zero_xe + zero_xc}")

    print(f"\n  Negative values:")
    print(f"    • Treatment (xe): {neg_xe}")
    print(f"    • Control (xc):   {neg_xc}")
    print(f"    • Total:          {neg_xe + neg_xc}")

    # Handle negative values (must be removed)
    if neg_xe > 0 or neg_xc > 0:
        print(f"\n  ❌ Removing {neg_xe + neg_xc} observations with negative values")
        print(f"     (log ratio requires all positive values)")
        negative_mask = (data_filtered['xe'] < 0) | (data_filtered['xc'] < 0)
        data_filtered = data_filtered[~negative_mask].copy()

    # Handle zero values (add small constant)
    if zero_xe > 0 or zero_xc > 0:
        ZERO_CONSTANT = 0.001
        print(f"\n  🔧 Handling {zero_xe + zero_xc} zero values:")
        print(f"     Adding small constant: {ZERO_CONSTANT}")

        data_filtered.loc[data_filtered['xe'] == 0, 'xe'] = ZERO_CONSTANT
        data_filtered.loc[data_filtered['xc'] == 0, 'xc'] = ZERO_CONSTANT

        print(f"     ✓ Zero values adjusted to avoid log(0)")

    if neg_xe + neg_xc + zero_xe + zero_xc == 0:
        print(f"\n  ✓ All values positive and non-zero")

    print(f"\n  Observations remaining: {len(data_filtered)}")
    # --- STEP 5: CALCULATE EFFECT SIZE BASED ON TYPE ---
print("\n" + "="*70)
print("STEP 5: EFFECT SIZE CALCULATION")
print("="*70)

calculation_log = {
    'type': effect_size_type,
    'timestamp': datetime.datetime.now(),
    'n_observations': len(data_filtered)
}

print(f"\n🧮 Calculating {es_config['effect_label']}...")
print(f"   Method: {effect_size_type}")
print(f"   Observations: {len(data_filtered)}")

if effect_size_type == 'lnRR':
    # ========================================
    # LOG RESPONSE RATIO (lnRR)
    # ========================================

    print(f"\n📐 Formula: lnRR = ln(x̄ₑ / x̄ₜ)")
    print(f"   Variance: Var(lnRR) = SD²ₑ/(nₑ·x̄²ₑ) + SD²ₜ/(nₜ·x̄²ₜ)")

    # Calculate lnRR
    data_filtered['lnRR'] = np.log(data_filtered['xe'] / data_filtered['xc'])

    # Calculate variance using delta method
    data_filtered['var_lnRR'] = (
        (data_filtered['sde_imputed']**2 / (data_filtered['ne'] * data_filtered['xe']**2)) +
        (data_filtered['sdc_imputed']**2 / (data_filtered['nc'] * data_filtered['xc']**2))
    )

    # Calculate standard error
    data_filtered['SE_lnRR'] = np.sqrt(data_filtered['var_lnRR'])

    # Calculate 95% confidence intervals
    data_filtered['CI_lower_lnRR'] = data_filtered['lnRR'] - 1.96 * data_filtered['SE_lnRR']
    data_filtered['CI_upper_lnRR'] = data_filtered['lnRR'] + 1.96 * data_filtered['SE_lnRR']

    # Convert to Response Ratio (RR) for interpretation
    data_filtered['Response_Ratio'] = np.exp(data_filtered['lnRR'])
    data_filtered['RR_CI_lower'] = np.exp(data_filtered['CI_lower_lnRR'])
    data_filtered['RR_CI_upper'] = np.exp(data_filtered['CI_upper_lnRR'])

    # Calculate fold-change (with sign for direction)
    # Positive lnRR = upregulation (e.g., 2-fold increase = 2×)
    # Negative lnRR = downregulation (e.g., 2-fold decrease = -2×)
    data_filtered['fold_change'] = data_filtered.apply(
        lambda row: row['Response_Ratio'] if row['lnRR'] >= 0 else -1/row['Response_Ratio'],
        axis=1
    )

    # Calculate percent change
    data_filtered['Percent_Change'] = (data_filtered['Response_Ratio'] - 1) * 100

    # Set primary effect size column names
    effect_col = 'lnRR'
    var_col = 'var_lnRR'
    se_col = 'SE_lnRR'

    calculation_log['columns_created'] = [
        'lnRR', 'var_lnRR', 'SE_lnRR', 'CI_lower_lnRR', 'CI_upper_lnRR',
        'Response_Ratio', 'RR_CI_lower', 'RR_CI_upper', 'fold_change', 'Percent_Change'
    ]

    print(f"\n  ✓ lnRR calculated for {len(data_filtered)} observations")
    print(f"\n  📊 Columns created:")
    print(f"     • lnRR: Log response ratio (effect size)")
    print(f"     • var_lnRR: Variance of lnRR")
    print(f"     • SE_lnRR: Standard error of lnRR")
    print(f"     • CI_lower/upper_lnRR: 95% confidence intervals")
    print(f"     • Response_Ratio: RR = exp(lnRR)")
    print(f"     • fold_change: Directional fold-change")
    print(f"     • Percent_Change: % change from control")

elif effect_size_type == 'hedges_g':
    # ========================================
    # HEDGES' G (STANDARDIZED MEAN DIFFERENCE)
    # ========================================

    print(f"\n📐 Formula: g = [(x̄ₑ - x̄ₜ) / SDₚₒₒₗₑ𝒹] × J")
    print(f"   J = 1 - 3/(4·df - 1)  [small-sample correction]")
    print(f"   Variance: Vg = [(nₑ+nₜ)/(nₑ·nₜ) + g²/(2(nₑ+nₜ))] × J²")

    # Degrees of freedom
    data_filtered['df'] = data_filtered['ne'] + data_filtered['nc'] - 2

    print(f"\n  🔢 Calculating pooled standard deviation...")

    # Pooled Standard Deviation
    data_filtered['sp_squared'] = (
        ((data_filtered['ne'] - 1) * data_filtered['sde_imputed']**2 +
         (data_filtered['nc'] - 1) * data_filtered['sdc_imputed']**2) /
        data_filtered['df']
    )
    data_filtered['sp'] = np.sqrt(data_filtered['sp_squared'])

    print(f"     • Mean pooled SD: {data_filtered['sp'].mean():.4f}")
    print(f"     • Median pooled SD: {data_filtered['sp'].median():.4f}")

    # Cohen's d (uncorrected)
    data_filtered['cohen_d'] = (data_filtered['xe'] - data_filtered['xc']) / data_filtered['sp']

    print(f"\n  🔢 Applying Hedges' correction for small samples...")

    # Hedges' g correction factor (J)
    # Using approximation: J ≈ 1 - 3/(4*df - 1)
    data_filtered['hedges_j'] = 1 - (3 / (4 * data_filtered['df'] - 1))

    print(f"     • Mean J factor: {data_filtered['hedges_j'].mean():.6f}")
    print(f"     • Min J factor: {data_filtered['hedges_j'].min():.6f}")
    print(f"     • Max J factor: {data_filtered['hedges_j'].max():.6f}")

    # Hedges' g
    data_filtered['hedges_g'] = data_filtered['cohen_d'] * data_filtered['hedges_j']

    # Variance of Hedges' g
    data_filtered['Vg'] = (
        ((data_filtered['ne'] + data_filtered['nc']) / (data_filtered['ne'] * data_filtered['nc']) +
         (data_filtered['hedges_g']**2) / (2 * (data_filtered['ne'] + data_filtered['nc']))) *
        (data_filtered['hedges_j']**2)
    )

    # Standard error
    data_filtered['SE_g'] = np.sqrt(data_filtered['Vg'])

    # Calculate 95% confidence intervals
    data_filtered['CI_lower_g'] = data_filtered['hedges_g'] - 1.96 * data_filtered['SE_g']
    data_filtered['CI_upper_g'] = data_filtered['hedges_g'] + 1.96 * data_filtered['SE_g']

    # Set primary effect size column names
    effect_col = 'hedges_g'
    var_col = 'Vg'
    se_col = 'SE_g'

    calculation_log['columns_created'] = [
        'hedges_g', 'Vg', 'SE_g', 'CI_lower_g', 'CI_upper_g',
        'cohen_d', 'hedges_j', 'sp', 'sp_squared', 'df'
    ]

    print(f"\n  ✓ Hedges' g calculated for {len(data_filtered)} observations")
    print(f"\n  📊 Columns created:")
    print(f"     • hedges_g: Hedges' g (effect size with correction)")
    print(f"     • cohen_d: Cohen's d (uncorrected)")
    print(f"     • Vg: Variance of Hedges' g")
    print(f"     • SE_g: Standard error of Hedges' g")
    print(f"     • CI_lower/upper_g: 95% confidence intervals")
    print(f"     • sp: Pooled standard deviation")
    print(f"     • hedges_j: Small-sample correction factor")

    # Effect size magnitude classification
    small = ((data_filtered['hedges_g'].abs() >= 0.2) & (data_filtered['hedges_g'].abs() < 0.5)).sum()
    medium = ((data_filtered['hedges_g'].abs() >= 0.5) & (data_filtered['hedges_g'].abs() < 0.8)).sum()
    large = (data_filtered['hedges_g'].abs() >= 0.8).sum()
    negligible = (data_filtered['hedges_g'].abs() < 0.2).sum()

    print(f"\n  📏 Effect Size Magnitude (Cohen's benchmarks):")
    print(f"     • Negligible (|g| < 0.2):   {negligible} ({negligible/len(data_filtered)*100:.1f}%)")
    print(f"     • Small (0.2 ≤ |g| < 0.5):  {small} ({small/len(data_filtered)*100:.1f}%)")
    print(f"     • Medium (0.5 ≤ |g| < 0.8): {medium} ({medium/len(data_filtered)*100:.1f}%)")
    print(f"     • Large (|g| ≥ 0.8):        {large} ({large/len(data_filtered)*100:.1f}%)")

elif effect_size_type == 'cohen_d':
    # ========================================
    # COHEN'S D (NO SMALL-SAMPLE CORRECTION)
    # ========================================

    print(f"\n📐 Formula: d = (x̄ₑ - x̄ₜ) / SDₚₒₒₗₑ𝒹")
    print(f"   Variance: Vd = (nₑ+nₜ)/(nₑ·nₜ) + d²/(2(nₑ+nₜ))")
    print(f"   Note: No small-sample correction applied")

    # Degrees of freedom
    data_filtered['df'] = data_filtered['ne'] + data_filtered['nc'] - 2

    print(f"\n  🔢 Calculating pooled standard deviation...")

    # Pooled Standard Deviation
    data_filtered['sp_squared'] = (
        ((data_filtered['ne'] - 1) * data_filtered['sde_imputed']**2 +
         (data_filtered['nc'] - 1) * data_filtered['sdc_imputed']**2) /
        data_filtered['df']
    )
    data_filtered['sp'] = np.sqrt(data_filtered['sp_squared'])

    print(f"     • Mean pooled SD: {data_filtered['sp'].mean():.4f}")
    print(f"     • Median pooled SD: {data_filtered['sp'].median():.4f}")

    # Cohen's d
    data_filtered['cohen_d'] = (data_filtered['xe'] - data_filtered['xc']) / data_filtered['sp']

    # Variance of Cohen's d
    data_filtered['Vd'] = (
        (data_filtered['ne'] + data_filtered['nc']) / (data_filtered['ne'] * data_filtered['nc']) +
        (data_filtered['cohen_d']**2) / (2 * (data_filtered['ne'] + data_filtered['nc']))
    )

    # Standard error
    data_filtered['SE_d'] = np.sqrt(data_filtered['Vd'])

    # Calculate 95% confidence intervals
    data_filtered['CI_lower_d'] = data_filtered['cohen_d'] - 1.96 * data_filtered['SE_d']
    data_filtered['CI_upper_d'] = data_filtered['cohen_d'] + 1.96 * data_filtered['SE_d']

    # Set primary effect size column names
    effect_col = 'cohen_d'
    var_col = 'Vd'
    se_col = 'SE_d'

    calculation_log['columns_created'] = [
        'cohen_d', 'Vd', 'SE_d', 'CI_lower_d', 'CI_upper_d',
        'sp', 'sp_squared', 'df'
    ]

    print(f"\n  ✓ Cohen's d calculated for {len(data_filtered)} observations")
    print(f"\n  📊 Columns created:")
    print(f"     • cohen_d: Cohen's d (effect size)")
    print(f"     • Vd: Variance of Cohen's d")
    print(f"     • SE_d: Standard error of Cohen's d")
    print(f"     • CI_lower/upper_d: 95% confidence intervals")
    print(f"     • sp: Pooled standard deviation")

    # Effect size magnitude classification
    small = ((data_filtered['cohen_d'].abs() >= 0.2) & (data_filtered['cohen_d'].abs() < 0.5)).sum()
    medium = ((data_filtered['cohen_d'].abs() >= 0.5) & (data_filtered['cohen_d'].abs() < 0.8)).sum()
    large = (data_filtered['cohen_d'].abs() >= 0.8).sum()
    negligible = (data_filtered['cohen_d'].abs() < 0.2).sum()

    print(f"\n  📏 Effect Size Magnitude (Cohen's benchmarks):")
    print(f"     • Negligible (|d| < 0.2):   {negligible} ({negligible/len(data_filtered)*100:.1f}%)")
    print(f"     • Small (0.2 ≤ |d| < 0.5):  {small} ({small/len(data_filtered)*100:.1f}%)")
    print(f"     • Medium (0.5 ≤ |d| < 0.8): {medium} ({medium/len(data_filtered)*100:.1f}%)")
    print(f"     • Large (|d| ≥ 0.8):        {large} ({large/len(data_filtered)*100:.1f}%)")

    # Sample size warning
    small_samples = (data_filtered['df'] < 20).sum()
    if small_samples > 0:
        print(f"\n  ⚠️  Warning: {small_samples} observations have small samples (df < 20)")
        print(f"     Consider using Hedges' g instead for small-sample correction")

elif effect_size_type == 'log_or':
    # ========================================
    # LOG ODDS RATIO
    # ========================================

    print(f"\n⚠️  Note: log Odds Ratio implementation")
    print(f"   Current implementation treats xe/xc as odds or proportions")
    print(f"   For 2×2 contingency tables, ensure proper data format")

    print(f"\n📐 Formula: logOR = ln(xe / xc)")
    print(f"   Variance: Var(logOR) ≈ SD²ₑ/(nₑ·xe²) + SD²ₜ/(nₜ·xc²)")

    # Check for values in valid range
    invalid_values = ((data_filtered['xe'] < 0) | (data_filtered['xc'] < 0) |
                      (data_filtered['xe'] == 0) | (data_filtered['xc'] == 0))

    if invalid_values.any():
        print(f"\n  ⚠️  WARNING: {invalid_values.sum()} observations have invalid values")
        print(f"     Removing observations with xe ≤ 0 or xc ≤ 0")
        data_filtered = data_filtered[~invalid_values].copy()

    # Calculate log OR
    data_filtered['log_OR'] = np.log(data_filtered['xe'] / data_filtered['xc'])

    # Calculate variance (simplified - assumes xe, xc are odds/proportions)
    data_filtered['var_log_OR'] = (
        (data_filtered['sde_imputed']**2 / (data_filtered['ne'] * data_filtered['xe']**2)) +
        (data_filtered['sdc_imputed']**2 / (data_filtered['nc'] * data_filtered['xc']**2))
    )

    # Standard error
    data_filtered['SE_log_OR'] = np.sqrt(data_filtered['var_log_OR'])

    # Calculate 95% confidence intervals
    data_filtered['CI_lower_log_OR'] = data_filtered['log_OR'] - 1.96 * data_filtered['SE_log_OR']
    data_filtered['CI_upper_log_OR'] = data_filtered['log_OR'] + 1.96 * data_filtered['SE_log_OR']

    # Convert to Odds Ratio
    data_filtered['Odds_Ratio'] = np.exp(data_filtered['log_OR'])
    data_filtered['OR_CI_lower'] = np.exp(data_filtered['CI_lower_log_OR'])
    data_filtered['OR_CI_upper'] = np.exp(data_filtered['CI_upper_log_OR'])

    # Set primary effect size column names
    effect_col = 'log_OR'
    var_col = 'var_log_OR'
    se_col = 'SE_log_OR'

    calculation_log['columns_created'] = [
        'log_OR', 'var_log_OR', 'SE_log_OR', 'CI_lower_log_OR', 'CI_upper_log_OR',
        'Odds_Ratio', 'OR_CI_lower', 'OR_CI_upper'
    ]

    print(f"\n  ✓ log Odds Ratio calculated for {len(data_filtered)} observations")
    print(f"\n  📊 Columns created:")
    print(f"     • log_OR: Log odds ratio (effect size)")
    print(f"     • var_log_OR: Variance of log OR")
    print(f"     • SE_log_OR: Standard error of log OR")
    print(f"     • CI_lower/upper_log_OR: 95% confidence intervals")
    print(f"     • Odds_Ratio: OR = exp(logOR)")
    print(f"\n  ⚠️  Please verify results are appropriate for your data structure")

else:
    raise ValueError(f"Unknown effect size type: {effect_size_type}")

calculation_log['effect_col'] = effect_col
calculation_log['var_col'] = var_col
calculation_log['se_col'] = se_col

# --- STEP 6: CALCULATE FIXED-EFFECTS WEIGHTS ---
print("\n" + "="*70)
print("STEP 6: CALCULATING WEIGHTS")
print("="*70)

print(f"\n⚖️  Calculating inverse-variance weights...")
print(f"   Formula: w = 1 / Var({es_config['effect_label_short']})")

data_filtered['w_fixed'] = 1 / data_filtered[var_col]

# Handle infinite weights
inf_weights = np.isinf(data_filtered['w_fixed']).sum()
if inf_weights > 0:
    print(f"\n  ⚠️  Warning: {inf_weights} infinite weights detected (variance = 0)")
    print(f"     Replacing with NaN for removal")
    data_filtered['w_fixed'] = data_filtered['w_fixed'].replace([np.inf, -np.inf], np.nan)

# Weight statistics
print(f"\n  📊 Weight Statistics:")
print(f"     • Mean weight:   {data_filtered['w_fixed'].mean():.2f}")
print(f"     • Median weight: {data_filtered['w_fixed'].median():.2f}")
print(f"     • Min weight:    {data_filtered['w_fixed'].min():.2f}")
print(f"     • Max weight:    {data_filtered['w_fixed'].max():.2f}")
print(f"     • Std weight:    {data_filtered['w_fixed'].std():.2f}")

# Check weight distribution
weight_ratio = data_filtered['w_fixed'].max() / (data_filtered['w_fixed'].min() + 0.0001)
print(f"\n  📏 Weight ratio (max/min): {weight_ratio:.2f}")

if weight_ratio > 1000:
    print(f"     ⚠️  Very large weight range - one study may dominate")
elif weight_ratio > 100:
    print(f"     ⚠️  Large weight range - check for influential studies")
else:
    print(f"     ✓ Reasonable weight range")

print(f"\n  ✓ Fixed-effects weights calculated")

# --- STEP 7: CLEAN DATA ---
print("\n" + "="*70)
print("STEP 7: FINAL DATA CLEANING")
print("="*70)

print(f"\n🧹 Removing observations with missing critical values...")

# Define critical columns
critical_cols = [effect_col, var_col, se_col, 'w_fixed']
initial_n = len(data_filtered)

# Check for missing values
missing_summary = {}
for col in critical_cols:
    n_missing = data_filtered[col].isna().sum()
    missing_summary[col] = n_missing
    if n_missing > 0:
        print(f"  • {col}: {n_missing} missing")

# Remove rows with NaN in critical columns
data_filtered = data_filtered.dropna(subset=critical_cols).copy()
final_n = len(data_filtered)
removed = initial_n - final_n

if removed > 0:
    print(f"\n  ⚠️  Removed {removed} observations with missing critical values")
    print(f"     ({(removed/initial_n)*100:.1f}% of dataset)")
else:
    print(f"\n  ✓ No missing values in critical columns")

print(f"\n  📊 Final dataset: {final_n} observations")

calculation_log['final_n'] = final_n
calculation_log['removed_in_cleaning'] = removed

# Continue to Part 3...
# --- STEP 8: EFFECT SIZE SUMMARY STATISTICS ---
print("\n" + "="*70)
print("STEP 8: EFFECT SIZE SUMMARY STATISTICS")
print("="*70)

# Calculate comprehensive statistics
effect_stats = {
    'count': data_filtered[effect_col].count(),
    'mean': data_filtered[effect_col].mean(),
    'median': data_filtered[effect_col].median(),
    'std': data_filtered[effect_col].std(),
    'min': data_filtered[effect_col].min(),
    'max': data_filtered[effect_col].max(),
    'q25': data_filtered[effect_col].quantile(0.25),
    'q75': data_filtered[effect_col].quantile(0.75),
    'iqr': data_filtered[effect_col].quantile(0.75) - data_filtered[effect_col].quantile(0.25)
}

var_stats = {
    'mean': data_filtered[var_col].mean(),
    'median': data_filtered[var_col].median(),
    'std': data_filtered[var_col].std(),
    'min': data_filtered[var_col].min(),
    'max': data_filtered[var_col].max()
}

se_stats = {
    'mean': data_filtered[se_col].mean(),
    'median': data_filtered[se_col].median(),
    'std': data_filtered[se_col].std(),
    'min': data_filtered[se_col].min(),
    'max': data_filtered[se_col].max()
}

print(f"\n📊 {es_config['effect_label']} ({es_config['effect_label_short']}):")
print(f"  {'Statistic':<15} {'Value':>12}")
print(f"  {'-'*15} {'-'*12}")
print(f"  {'Count':<15} {effect_stats['count']:>12}")
print(f"  {'Mean':<15} {effect_stats['mean']:>12.4f}")
print(f"  {'Median':<15} {effect_stats['median']:>12.4f}")
print(f"  {'Std Dev':<15} {effect_stats['std']:>12.4f}")
print(f"  {'Min':<15} {effect_stats['min']:>12.4f}")
print(f"  {'Q1 (25%)':<15} {effect_stats['q25']:>12.4f}")
print(f"  {'Q3 (75%)':<15} {effect_stats['q75']:>12.4f}")
print(f"  {'Max':<15} {effect_stats['max']:>12.4f}")
print(f"  {'IQR':<15} {effect_stats['iqr']:>12.4f}")

print(f"\n📊 Variance ({var_col}):")
print(f"  {'Statistic':<15} {'Value':>12}")
print(f"  {'-'*15} {'-'*12}")
print(f"  {'Mean':<15} {var_stats['mean']:>12.6f}")
print(f"  {'Median':<15} {var_stats['median']:>12.6f}")
print(f"  {'Std Dev':<15} {var_stats['std']:>12.6f}")
print(f"  {'Min':<15} {var_stats['min']:>12.6f}")
print(f"  {'Max':<15} {var_stats['max']:>12.6f}")

print(f"\n📊 Standard Error ({se_col}):")
print(f"  {'Statistic':<15} {'Value':>12}")
print(f"  {'-'*15} {'-'*12}")
print(f"  {'Mean':<15} {se_stats['mean']:>12.4f}")
print(f"  {'Median':<15} {se_stats['median']:>12.4f}")
print(f"  {'Std Dev':<15} {se_stats['std']:>12.4f}")
print(f"  {'Min':<15} {se_stats['min']:>12.4f}")
print(f"  {'Max':<15} {se_stats['max']:>12.4f}")

# Store statistics
calculation_log['effect_stats'] = effect_stats
calculation_log['var_stats'] = var_stats
calculation_log['se_stats'] = se_stats

# --- STEP 9: DIRECTION AND MAGNITUDE ANALYSIS ---
print("\n" + "="*70)
print("STEP 9: EFFECT DIRECTION & MAGNITUDE ANALYSIS")
print("="*70)

# Analysis depends on effect size type
if effect_size_type == 'lnRR':
    # Direction analysis for log response ratio
    print(f"\n📈 Effect Direction Analysis:")

    # Define thresholds
    upregulation_threshold = 0.05  # ~5% increase
    downregulation_threshold = -0.05  # ~5% decrease

    n_upregulation = (data_filtered[effect_col] > upregulation_threshold).sum()
    n_downregulation = (data_filtered[effect_col] < downregulation_threshold).sum()
    n_no_change = len(data_filtered) - n_upregulation - n_downregulation

    print(f"\n  Based on lnRR threshold = ±{abs(upregulation_threshold):.2f}:")
    print(f"  {'Direction':<25} {'Count':>8} {'Percentage':>12}")
    print(f"  {'-'*25} {'-'*8} {'-'*12}")
    print(f"  {'Upregulated (lnRR > 0.05)':<25} {n_upregulation:>8} {(n_upregulation/len(data_filtered)*100):>11.1f}%")
    print(f"  {'No change (|lnRR| ≤ 0.05)':<25} {n_no_change:>8} {(n_no_change/len(data_filtered)*100):>11.1f}%")
    print(f"  {'Downregulated (lnRR < -0.05)':<25} {n_downregulation:>8} {(n_downregulation/len(data_filtered)*100):>11.1f}%")

    # Fold-change magnitude categories
    print(f"\n📏 Fold-Change Magnitude:")

    fc_2x_up = (data_filtered['Response_Ratio'] >= 2.0).sum()
    fc_2x_down = (data_filtered['Response_Ratio'] <= 0.5).sum()
    fc_3x_up = (data_filtered['Response_Ratio'] >= 3.0).sum()
    fc_3x_down = (data_filtered['Response_Ratio'] <= 0.33).sum()
    fc_5x_up = (data_filtered['Response_Ratio'] >= 5.0).sum()
    fc_5x_down = (data_filtered['Response_Ratio'] <= 0.2).sum()

    print(f"  {'Category':<30} {'Count':>8} {'Percentage':>12}")
    print(f"  {'-'*30} {'-'*8} {'-'*12}")
    print(f"  {'≥5× increase (RR ≥ 5.0)':<30} {fc_5x_up:>8} {(fc_5x_up/len(data_filtered)*100):>11.1f}%")
    print(f"  {'≥3× increase (RR ≥ 3.0)':<30} {fc_3x_up:>8} {(fc_3x_up/len(data_filtered)*100):>11.1f}%")
    print(f"  {'≥2× increase (RR ≥ 2.0)':<30} {fc_2x_up:>8} {(fc_2x_up/len(data_filtered)*100):>11.1f}%")
    print(f"  {'≥2× decrease (RR ≤ 0.5)':<30} {fc_2x_down:>8} {(fc_2x_down/len(data_filtered)*100):>11.1f}%")
    print(f"  {'≥3× decrease (RR ≤ 0.33)':<30} {fc_3x_down:>8} {(fc_3x_down/len(data_filtered)*100):>11.1f}%")
    print(f"  {'≥5× decrease (RR ≤ 0.2)':<30} {fc_5x_down:>8} {(fc_5x_down/len(data_filtered)*100):>11.1f}%")

    # Percent change summary
    print(f"\n📊 Percent Change from Control:")
    print(f"  Mean: {data_filtered['Percent_Change'].mean():+.1f}%")
    print(f"  Median: {data_filtered['Percent_Change'].median():+.1f}%")
    print(f"  Range: [{data_filtered['Percent_Change'].min():+.1f}%, {data_filtered['Percent_Change'].max():+.1f}%]")

    calculation_log['direction_analysis'] = {
        'upregulated': n_upregulation,
        'downregulated': n_downregulation,
        'no_change': n_no_change,
        'fc_2x_up': fc_2x_up,
        'fc_2x_down': fc_2x_down,
        'fc_3x_up': fc_3x_up,
        'fc_3x_down': fc_3x_down
    }

elif effect_size_type in ['hedges_g', 'cohen_d']:
    # Direction and magnitude for standardized mean differences
    print(f"\n📈 Effect Direction:")

    n_positive = (data_filtered[effect_col] > 0).sum()
    n_negative = (data_filtered[effect_col] < 0).sum()
    n_zero = (data_filtered[effect_col] == 0).sum()

    print(f"  {'Direction':<25} {'Count':>8} {'Percentage':>12}")
    print(f"  {'-'*25} {'-'*8} {'-'*12}")
    print(f"  {'Positive effect (g > 0)':<25} {n_positive:>8} {(n_positive/len(data_filtered)*100):>11.1f}%")
    print(f"  {'No effect (g = 0)':<25} {n_zero:>8} {(n_zero/len(data_filtered)*100):>11.1f}%")
    print(f"  {'Negative effect (g < 0)':<25} {n_negative:>8} {(n_negative/len(data_filtered)*100):>11.1f}%")

    # Already calculated in step 5, but show again for clarity
    negligible = (data_filtered[effect_col].abs() < 0.2).sum()
    small = ((data_filtered[effect_col].abs() >= 0.2) & (data_filtered[effect_col].abs() < 0.5)).sum()
    medium = ((data_filtered[effect_col].abs() >= 0.5) & (data_filtered[effect_col].abs() < 0.8)).sum()
    large = (data_filtered[effect_col].abs() >= 0.8).sum()

    print(f"\n📏 Effect Magnitude (Cohen's benchmarks):")
    print(f"  {'Category':<30} {'Count':>8} {'Percentage':>12}")
    print(f"  {'-'*30} {'-'*8} {'-'*12}")
    print(f"  {'Negligible (|g| < 0.2)':<30} {negligible:>8} {(negligible/len(data_filtered)*100):>11.1f}%")
    print(f"  {'Small (0.2 ≤ |g| < 0.5)':<30} {small:>8} {(small/len(data_filtered)*100):>11.1f}%")
    print(f"  {'Medium (0.5 ≤ |g| < 0.8)':<30} {medium:>8} {(medium/len(data_filtered)*100):>11.1f}%")
    print(f"  {'Large (|g| ≥ 0.8)':<30} {large:>8} {(large/len(data_filtered)*100):>11.1f}%")

    calculation_log['direction_analysis'] = {
        'positive': n_positive,
        'negative': n_negative,
        'negligible': negligible,
        'small': small,
        'medium': medium,
        'large': large
    }

elif effect_size_type == 'log_or':
    # Direction for odds ratios
    print(f"\n📈 Effect Direction:")

    n_positive = (data_filtered[effect_col] > 0).sum()
    n_negative = (data_filtered[effect_col] < 0).sum()
    n_null = (data_filtered[effect_col] == 0).sum()

    print(f"  {'Direction':<30} {'Count':>8} {'Percentage':>12}")
    print(f"  {'-'*30} {'-'*8} {'-'*12}")
    print(f"  {'Positive association (OR > 1)':<30} {n_positive:>8} {(n_positive/len(data_filtered)*100):>11.1f}%")
    print(f"  {'No association (OR = 1)':<30} {n_null:>8} {(n_null/len(data_filtered)*100):>11.1f}%")
    print(f"  {'Negative association (OR < 1)':<30} {n_negative:>8} {(n_negative/len(data_filtered)*100):>11.1f}%")

    print(f"\n📊 Odds Ratio Summary:")
    print(f"  Mean OR: {data_filtered['Odds_Ratio'].mean():.3f}")
    print(f"  Median OR: {data_filtered['Odds_Ratio'].median():.3f}")
    print(f"  Range: [{data_filtered['Odds_Ratio'].min():.3f}, {data_filtered['Odds_Ratio'].max():.3f}]")

# --- STEP 10: IDENTIFY EXTREME VALUES ---
print("\n" + "="*70)
print("STEP 10: EXTREME VALUE DETECTION")
print("="*70)

print(f"\n🔍 Identifying outliers and extreme effect sizes...")

# Define thresholds based on effect size type
if effect_size_type == 'lnRR':
    threshold = 3.0  # ~20-fold change
    extreme_label = "RR > 20× or RR < 0.05×"
    interpretation = "More than 20-fold change"
elif effect_size_type in ['hedges_g', 'cohen_d']:
    threshold = 2.0  # Very large standardized effect
    extreme_label = "|g| > 2.0"
    interpretation = "Very large effect (exceeds typical benchmarks)"
elif effect_size_type == 'log_or':
    threshold = 3.0  # OR > 20
    extreme_label = "OR > 20 or OR < 0.05"
    interpretation = "Odds ratio > 20× or < 0.05×"

extreme_effects = data_filtered[np.abs(data_filtered[effect_col]) > threshold].copy()

print(f"\n  Threshold: {extreme_label}")
print(f"  Interpretation: {interpretation}")

if len(extreme_effects) > 0:
    print(f"\n  ⚠️  Found {len(extreme_effects)} extreme effects ({len(extreme_effects)/len(data_filtered)*100:.1f}% of dataset):")
    print(f"\n  {'Paper ID':<15} {es_config['effect_label_short']:>10} {'SE':>10} {'Treatment':>12} {'Control':>12}")
    print(f"  {'-'*15} {'-'*10} {'-'*10} {'-'*12} {'-'*12}")

    # Show extreme effects
    for idx, row in extreme_effects.head(20).iterrows():
        paper_id = str(row['id'])[:15]
        effect = row[effect_col]
        se = row[se_col]
        xe = row['xe']
        xc = row['xc']
        print(f"  {paper_id:<15} {effect:>10.4f} {se:>10.4f} {xe:>12.4f} {xc:>12.4f}")

    if len(extreme_effects) > 20:
        print(f"  ... and {len(extreme_effects) - 20} more")

    print(f"\n  💡 Recommendations:")
    print(f"     1. Review these observations for data entry errors")
    print(f"     2. Check original papers for these effect sizes")
    print(f"     3. Consider sensitivity analysis excluding these values")
    print(f"     4. Examine if they represent true biological phenomena")

    calculation_log['extreme_effects'] = {
        'count': len(extreme_effects),
        'threshold': threshold,
        'paper_ids': extreme_effects['id'].tolist()
    }
else:
    print(f"\n  ✓ No extreme values detected")
    print(f"    All effect sizes within expected range")

    calculation_log['extreme_effects'] = {
        'count': 0,
        'threshold': threshold
    }

# Additional outlier detection using IQR method
print(f"\n📊 Outlier Detection (IQR Method):")
q1 = data_filtered[effect_col].quantile(0.25)
q3 = data_filtered[effect_col].quantile(0.75)
iqr = q3 - q1
lower_fence = q1 - 1.5 * iqr
upper_fence = q3 + 1.5 * iqr

outliers_iqr = data_filtered[(data_filtered[effect_col] < lower_fence) |
                              (data_filtered[effect_col] > upper_fence)]

print(f"  Q1 (25th percentile): {q1:.4f}")
print(f"  Q3 (75th percentile): {q3:.4f}")
print(f"  IQR: {iqr:.4f}")
print(f"  Lower fence: {lower_fence:.4f}")
print(f"  Upper fence: {upper_fence:.4f}")
print(f"\n  Outliers detected: {len(outliers_iqr)} ({len(outliers_iqr)/len(data_filtered)*100:.1f}%)")

if len(outliers_iqr) > 0:
    print(f"    • Below lower fence: {(data_filtered[effect_col] < lower_fence).sum()}")
    print(f"    • Above upper fence: {(data_filtered[effect_col] > upper_fence).sum()}")

calculation_log['outliers_iqr'] = {
    'count': len(outliers_iqr),
    'lower_fence': lower_fence,
    'upper_fence': upper_fence,
    'paper_ids': outliers_iqr['id'].tolist()
}

# --- STEP 11: CONFIDENCE INTERVAL COVERAGE ---
print("\n" + "="*70)
print("STEP 11: CONFIDENCE INTERVAL ANALYSIS")
print("="*70)

ci_lower_col = es_config['ci_lower_col']
ci_upper_col = es_config['ci_upper_col']

# Check CI coverage of null hypothesis
null_value = es_config['null_value']
ci_includes_null = ((data_filtered[ci_lower_col] <= null_value) &
                    (data_filtered[ci_upper_col] >= null_value)).sum()
ci_excludes_null = len(data_filtered) - ci_includes_null

print(f"\n📊 95% Confidence Interval Coverage:")
print(f"  Null hypothesis value: {null_value}")
print(f"\n  {'Category':<35} {'Count':>8} {'Percentage':>12}")
print(f"  {'-'*35} {'-'*8} {'-'*12}")
print(f"  {'CI includes null (not significant)':<35} {ci_includes_null:>8} {(ci_includes_null/len(data_filtered)*100):>11.1f}%")
print(f"  {'CI excludes null (significant)':<35} {ci_excludes_null:>8} {(ci_excludes_null/len(data_filtered)*100):>11.1f}%")

# Average CI width
data_filtered['ci_width'] = data_filtered[ci_upper_col] - data_filtered[ci_lower_col]
mean_ci_width = data_filtered['ci_width'].mean()
median_ci_width = data_filtered['ci_width'].median()

print(f"\n📏 Confidence Interval Width:")
print(f"  Mean CI width:   {mean_ci_width:.4f}")
print(f"  Median CI width: {median_ci_width:.4f}")
print(f"  Min CI width:    {data_filtered['ci_width'].min():.4f}")
print(f"  Max CI width:    {data_filtered['ci_width'].max():.4f}")

# Precision categories
narrow_ci = (data_filtered['ci_width'] < median_ci_width * 0.5).sum()
moderate_ci = ((data_filtered['ci_width'] >= median_ci_width * 0.5) &
               (data_filtered['ci_width'] <= median_ci_width * 2)).sum()
wide_ci = (data_filtered['ci_width'] > median_ci_width * 2).sum()

print(f"\n📊 Precision Distribution:")
print(f"  {'Category':<30} {'Count':>8} {'Percentage':>12}")
print(f"  {'-'*30} {'-'*8} {'-'*12}")
print(f"  {'High precision (narrow CI)':<30} {narrow_ci:>8} {(narrow_ci/len(data_filtered)*100):>11.1f}%")
print(f"  {'Moderate precision':<30} {moderate_ci:>8} {(moderate_ci/len(data_filtered)*100):>11.1f}%")
print(f"  {'Low precision (wide CI)':<30} {wide_ci:>8} {(wide_ci/len(data_filtered)*100):>11.1f}%")

calculation_log['ci_analysis'] = {
    'ci_includes_null': ci_includes_null,
    'ci_excludes_null': ci_excludes_null,
    'mean_ci_width': mean_ci_width,
    'median_ci_width': median_ci_width
}

# --- STEP 12: UPDATE CONFIGURATION ---
print("\n" + "="*70)
print("STEP 12: UPDATING CONFIGURATION")
print("="*70)

ANALYSIS_CONFIG['effect_col'] = effect_col
ANALYSIS_CONFIG['var_col'] = var_col
ANALYSIS_CONFIG['se_col'] = se_col
ANALYSIS_CONFIG['ci_lower_col'] = ci_lower_col
ANALYSIS_CONFIG['ci_upper_col'] = ci_upper_col
ANALYSIS_CONFIG['final_n'] = len(data_filtered)
ANALYSIS_CONFIG['calculation_timestamp'] = datetime.datetime.now()

print(f"\n✓ Configuration updated with effect size information:")
print(f"  • Effect column:    {effect_col}")
print(f"  • Variance column:  {var_col}")
print(f"  • SE column:        {se_col}")
print(f"  • CI columns:       {ci_lower_col}, {ci_upper_col}")
print(f"  • Final n:          {len(data_filtered)}")

# Store comprehensive metadata
EFFECT_SIZE_METADATA = {
    'timestamp': datetime.datetime.now(),
    'effect_size_type': effect_size_type,
    'n_initial': initial_obs,
    'n_final': len(data_filtered),
    'n_removed': initial_obs - len(data_filtered),
    'papers_initial': initial_papers,
    'papers_final': data_filtered['id'].nunique(),
    'imputation_log': imputation_log,
    'calculation_log': calculation_log,
    'effect_stats': effect_stats,
    'var_stats': var_stats,
    'se_stats': se_stats,
    'columns_created': calculation_log['columns_created']
}

print(f"\n✓ Metadata saved to EFFECT_SIZE_METADATA")

# --- STEP 13: DATA PREVIEW ---
print("\n" + "="*70)
print("STEP 13: DATA PREVIEW")
print("="*70)

print(f"\n📋 Preview of Calculated Data (first 10 observations):\n")

# Select columns for preview
preview_cols = ['id', 'xe', 'xc', 'ne', 'nc', effect_col, se_col]

# Add CI columns
preview_cols.extend([ci_lower_col, ci_upper_col])

# Add fold-change if available
if es_config['has_fold_change']:
    if 'fold_change' in data_filtered.columns:
        preview_cols.append('fold_change')
    if 'Response_Ratio' in data_filtered.columns:
        preview_cols.append('Response_Ratio')
    elif 'Odds_Ratio' in data_filtered.columns:
        preview_cols.append('Odds_Ratio')

# Add weight
preview_cols.append('w_fixed')

# Display preview
preview_df = data_filtered[preview_cols].head(10).copy()

# Format numeric columns
for col in preview_df.select_dtypes(include=[np.number]).columns:
    if col in ['ne', 'nc']:
        preview_df[col] = preview_df[col].astype(int)
    elif col == 'w_fixed':
        preview_df[col] = preview_df[col].apply(lambda x: f'{x:.2f}')
    else:
        preview_df[col] = preview_df[col].apply(lambda x: f'{x:.4f}')

print(preview_df.to_string(index=False))

if len(data_filtered) > 10:
    print(f"\n... and {len(data_filtered) - 10} more observations")

# --- FINAL STATUS ---
print("\n" + "="*70)
print("✅ EFFECT SIZE CALCULATION COMPLETE")
print("="*70)

print(f"\n📊 Final Dataset Summary:")
print(f"  • Observations:           {len(data_filtered)}")
print(f"  • Unique papers:          {data_filtered['id'].nunique()}")
print(f"  • Effect size type:       {es_config['effect_label']} ({es_config['effect_label_short']})")
print(f"  • Mean effect size:       {effect_stats['mean']:.4f}")
print(f"  • Median effect size:     {effect_stats['median']:.4f}")
print(f"  • Effect size range:      [{effect_stats['min']:.4f}, {effect_stats['max']:.4f}]")

if es_config['has_fold_change']:
    if 'Response_Ratio' in data_filtered.columns:
        print(f"  • Mean response ratio:    {data_filtered['Response_Ratio'].mean():.3f}")
        print(f"  • Median fold-change:     {data_filtered['fold_change'].median():.2f}×")

print(f"\n📁 Columns Available:")
print(f"  Primary: {effect_col}, {var_col}, {se_col}")
print(f"  CI: {ci_lower_col}, {ci_upper_col}")
print(f"  Weight: w_fixed")
if es_config['has_fold_change']:
    print(f"  Interpretation: {', '.join([c for c in data_filtered.columns if 'fold' in c.lower() or 'ratio' in c.lower() or 'percent' in c.lower()])}")

print(f"\n⚠️  Quality Notes:")
if imputation_log['sde_imputed'] + imputation_log['sdc_imputed'] > 0:
    print(f"  • {imputation_log['sde_imputed'] + imputation_log['sdc_imputed']} SDs were imputed using median CV")
if calculation_log.get('extreme_effects', {}).get('count', 0) > 0:
    print(f"  • {calculation_log['extreme_effects']['count']} extreme effect sizes detected")
if outliers_iqr is not None and len(outliers_iqr) > 0:
    print(f"  • {len(outliers_iqr)} outliers detected using IQR method")

print(f"\n▶️  Next Steps:")
print(f"  1. Review the summary statistics and data quality notes")
print(f"  2. Run the next cell to perform meta-analysis and calculate pooled estimates")
print(f"  3. Consider the extreme values and outliers flagged above")

print("\n" + "="*70)


EFFECT SIZE CALCULATION
Timestamp: 2025-11-14 18:01:29

STEP 1: LOADING CONFIGURATION
✓ Configuration loaded successfully
  Effect size type: Hedges' g (g)
  Scale: standardized
  Allows negatives: True
  Null value: 0

📊 Input Dataset:
  Observations: 69
  Papers: 23

STEP 2: DATA VALIDATION
✓ All required columns present
  • xe: 69/69 valid (100.0%)
  • sde: 69/69 valid (100.0%)
  • ne: 69/69 valid (100.0%)
  • xc: 69/69 valid (100.0%)
  • sdc: 69/69 valid (100.0%)
  • nc: 69/69 valid (100.0%)

STEP 3: STANDARD DEVIATION IMPUTATION
🔧 Processing standard deviations...

📋 Initial SD Status:
  Experimental (sde):
    • Zero values:    56
    • Missing values: 0
    • Total issues:   56
  Control (sdc):
    • Zero values:    55
    • Missing values: 0
    • Total issues:   55

🔬 Calculating Coefficient of Variation (CV)...

  CV Statistics (Experimental):
    • Valid CVs:   13/69 (18.8%)
    • Median CV:   0.0560
    • Mean CV:     0.0578
    • Min CV:      0.0039
    • Max CV:      0.0

In [15]:
#@title 📊 OVERALL POOLED EFFECT SIZE & HETEROGENEITY

# =============================================================================
# CELL 6: OVERALL META-ANALYSIS
# Purpose: Calculate pooled effect sizes and assess heterogeneity
# Dependencies: Cell 5 (data_filtered with effect sizes, ANALYSIS_CONFIG)
# Outputs: Overall pooled estimates (fixed & random effects), heterogeneity stats
# =============================================================================

from scipy.stats import norm, chi2, t

# --- ADD THIS AT THE START OF CELL 6 (before main analysis) ---

print("\n" + "="*70)
print("TAU-SQUARED ESTIMATOR SELECTION")
print("="*70)

# Check if advanced estimators available
if 'calculate_tau_squared' in globals():
    print("✅ Advanced estimators available")

    method_options = [
        ('REML (Recommended)', 'REML'),
        ('DerSimonian-Laird (Classic)', 'DL'),
        ('Maximum Likelihood', 'ML'),
        ('Paule-Mandel', 'PM'),
        ('Sidik-Jonkman', 'SJ')
    ]

    method_help = widgets.HTML(
        "<div style='background-color: #e8f4f8; padding: 10px; margin: 10px 0; border-radius: 5px;'>"
        "<b>💡 Method Guide:</b><br>"
        "• <b>REML:</b> ⭐ Best choice for most analyses. Unbiased and accurate.<br>"
        "• <b>DL:</b> Fast but can underestimate τ² with few studies.<br>"
        "• <b>ML:</b> Efficient but biased downward.<br>"
        "• <b>PM:</b> Exact Q = k-1 solution.<br>"
        "• <b>SJ:</b> Conservative, good for k < 10."
        "</div>"
    )
else:
    print("⚠️  Using DerSimonian-Laird method only")
    print("   Run Cell 4.5 to enable REML and other methods")

    method_options = [('DerSimonian-Laird', 'DL')]

    method_help = widgets.HTML(
        "<div style='background-color: #fff3cd; padding: 10px; margin: 10px 0; border-radius: 5px;'>"
        "⚠️ Run <b>Cell 4.5 (Heterogeneity Estimators)</b> to access REML and other methods."
        "</div>"
    )

tau_method_widget = widgets.Dropdown(
    options=method_options,
    value='REML' if 'calculate_tau_squared' in globals() else 'DL',
    description='τ² Method:',
    style={'description_width': '100px'},
    layout=widgets.Layout(width='400px')
)

# Save selection to config
ANALYSIS_CONFIG['tau_method'] = tau_method_widget.value

def on_method_change(change):
    ANALYSIS_CONFIG['tau_method'] = change['new']

tau_method_widget.observe(on_method_change, names='value')

display(widgets.VBox([
    method_help,
    tau_method_widget
]))

# Display re-run reminder
rerun_message = widgets.HTML(
    "<div style='background-color: #fffbf0; padding: 8px; margin: 10px 0; border-left: 3px solid #ff9800; border-radius: 3px;'>"
    "⚠️ <b>Important:</b> After changing the method, you must re-run this cell to apply the new estimator."
    "</div>"
)
display(rerun_message)

print("\n" + "="*70)

print("\n" + "="*70)
print("OVERALL META-ANALYSIS")
print("="*70)
print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# --- STEP 1: LOAD CONFIGURATION ---
print("\n" + "="*70)
print("STEP 1: LOADING CONFIGURATION")
print("="*70)

try:
    effect_col = ANALYSIS_CONFIG['effect_col']
    var_col = ANALYSIS_CONFIG['var_col']
    se_col = ANALYSIS_CONFIG['se_col']
    es_config = ANALYSIS_CONFIG['es_config']
    effect_type = ANALYSIS_CONFIG['effect_size_type']

    print(f"✓ Configuration loaded successfully")
    print(f"  Effect size: {es_config['effect_label']} ({es_config['effect_label_short']})")
    print(f"  Effect column: {effect_col}")
    print(f"  Variance column: {var_col}")
    print(f"  SE column: {se_col}")
except KeyError as e:
    print(f"❌ ERROR: Configuration not found - {e}")
    print("\nTroubleshooting:")
    print("  1. Ensure Cell 5 (effect size calculation) was run successfully")
    print("  2. Check that ANALYSIS_CONFIG dictionary exists")
    print("  3. Verify effect sizes were calculated properly")
    raise

# --- STEP 2: PREPARE ANALYSIS DATA ---
print("\n" + "="*70)
print("STEP 2: DATA PREPARATION")
print("="*70)

print(f"\n🔍 Preparing data for meta-analysis...")

# Store initial counts
initial_count = len(data_filtered)
initial_papers = data_filtered['id'].nunique()

print(f"\n  Initial dataset:")
print(f"    • Observations: {initial_count}")
print(f"    • Unique papers: {initial_papers}")

# Use only valid data points (non-missing effect size, variance, and weight)
analysis_data = data_filtered.dropna(subset=[effect_col, var_col, 'w_fixed']).copy()

# Ensure variance is positive
positive_var = analysis_data[var_col] > 0
n_non_positive = (~positive_var).sum()

if n_non_positive > 0:
    print(f"\n  ⚠️  Removing {n_non_positive} observations with non-positive variance")
    analysis_data = analysis_data[positive_var].copy()

# Final counts
k = len(analysis_data)
k_papers = analysis_data['id'].nunique()

if k < 1:
    print(f"\n❌ ERROR: No valid studies available for meta-analysis")
    print(f"   Possible causes:")
    print(f"     • All variances are zero or negative")
    print(f"     • Missing effect size data")
    print(f"     • All weights are invalid")
    raise ValueError("No valid studies available for meta-analysis after filtering.")

print(f"\n  ✓ Final analysis dataset:")
print(f"    • Observations (k): {k}")
print(f"    • Unique papers: {k_papers}")
print(f"    • Removed: {initial_count - k} observations")

# Calculate average observations per paper
avg_obs_per_paper = k / k_papers if k_papers > 0 else 0
print(f"    • Avg obs per paper: {avg_obs_per_paper:.2f}")

# --- STEP 3: HANDLE SINGLE STUDY CASE ---
if k == 1:
    print("\n" + "="*70)
    print("⚠️  SINGLE STUDY ANALYSIS")
    print("="*70)

    print(f"\n⚠️  WARNING: Only one observation available (k=1)")
    print(f"   Meta-analysis requires multiple studies")
    print(f"   Reporting single study results:")

    single_study = analysis_data.iloc[0]

    print(f"\n📋 Single Study Details:")
    print(f"  Study ID: {single_study.get('id', 'N/A')}")
    print(f"  {es_config['effect_label_short']}: {single_study[effect_col]:.4f}")
    print(f"  Variance: {single_study[var_col]:.6f}")
    print(f"  SE: {single_study[se_col]:.4f}")
    print(f"  Treatment mean: {single_study['xe']:.4f}")
    print(f"  Control mean: {single_study['xc']:.4f}")
    print(f"  Sample size (treatment): {int(single_study['ne'])}")
    print(f"  Sample size (control): {int(single_study['nc'])}")

    if es_config['has_fold_change']:
        if 'fold_change' in single_study:
            print(f"  Fold-change: {single_study['fold_change']:.2f}×")
        if 'Response_Ratio' in single_study:
            print(f"  Response Ratio: {single_study['Response_Ratio']:.3f}")

    # Calculate confidence interval
    z_crit = norm.ppf(0.975)  # 1.96
    ci_lower = single_study[effect_col] - z_crit * single_study[se_col]
    ci_upper = single_study[effect_col] + z_crit * single_study[se_col]

    print(f"\n  95% CI: [{ci_lower:.4f}, {ci_upper:.4f}]")

    # Set variables to NaN for consistency
    pooled_effect_fixed = single_study[effect_col]
    pooled_var_fixed = single_study[var_col]
    pooled_SE_fixed = single_study[se_col]
    ci_lower_fixed = ci_lower
    ci_upper_fixed = ci_upper
    p_value_fixed = np.nan

    Qt = np.nan
    p_heterogeneity = np.nan
    I_squared = np.nan
    tau_squared_DL = np.nan

    pooled_effect_random = pooled_effect_fixed
    pooled_var_random = pooled_var_fixed
    pooled_SE_random = pooled_SE_fixed
    ci_lower_random = ci_lower
    ci_upper_random = ci_upper
    p_value_random = np.nan

    pi_lower_random = np.nan
    pi_upper_random = np.nan

    print(f"\n" + "="*70)
    print(f"⚠️  META-ANALYSIS NOT POSSIBLE WITH ONE STUDY")
    print(f"="*70)
    print(f"\nRecommendations:")
    print(f"  1. Report single study results with appropriate caution")
    print(f"  2. Cannot assess heterogeneity or publication bias")
    print(f"  3. Consider collecting more studies before drawing conclusions")

else:
    # --- STEP 4: FIXED-EFFECTS MODEL ---
    print("\n" + "="*70)
    print("STEP 3: FIXED-EFFECTS MODEL")
    print("="*70)

    print(f"\n📐 Model Assumption:")
    print(f"   All studies share a common true effect size")
    print(f"   Differences between studies are due to sampling error only")

    print(f"\n🔢 Calculating inverse-variance weighted mean...")

    # Significance level
    alpha = 0.05
    z_crit = norm.ppf(1 - alpha / 2)  # ~1.96 for 95% CI

    # Calculate sum of weights
    sum_w_fixed = analysis_data['w_fixed'].sum()

    if sum_w_fixed <= 0:
        print(f"❌ ERROR: Sum of fixed-effects weights is non-positive")
        raise ValueError("Sum of fixed-effects weights is non-positive. Check variance values.")

    print(f"   Sum of weights: {sum_w_fixed:.2f}")

    # Pooled effect size (weighted mean)
    pooled_effect_fixed = (analysis_data['w_fixed'] * analysis_data[effect_col]).sum() / sum_w_fixed

    # Variance of pooled effect
    pooled_var_fixed = 1 / sum_w_fixed
    pooled_SE_fixed = np.sqrt(pooled_var_fixed)

    # 95% Confidence Interval
    ci_lower_fixed = pooled_effect_fixed - z_crit * pooled_SE_fixed
    ci_upper_fixed = pooled_effect_fixed + z_crit * pooled_SE_fixed

    # Test significance (H0: effect = 0)
    z_stat_fixed = pooled_effect_fixed / pooled_SE_fixed
    p_value_fixed = 2 * (1 - norm.cdf(abs(z_stat_fixed)))

    # Display results
    print(f"\n📊 Fixed-Effects Results:")
    print(f"  {'Statistic':<25} {'Value':>15}")
    print(f"  {'-'*25} {'-'*15}")
    print(f"  {'Pooled ' + es_config['effect_label_short']:<25} {pooled_effect_fixed:>15.4f}")
    print(f"  {'Standard Error':<25} {pooled_SE_fixed:>15.4f}")
    print(f"  {'Variance':<25} {pooled_var_fixed:>15.6f}")
    print(f"  {'95% CI Lower':<25} {ci_lower_fixed:>15.4f}")
    print(f"  {'95% CI Upper':<25} {ci_upper_fixed:>15.4f}")
    print(f"  {'Z-statistic':<25} {z_stat_fixed:>15.4f}")
    print(f"  {'P-value':<25} {p_value_fixed:>15.4g}")

    # Interpretation for ratio-based measures
    if es_config['has_fold_change']:
        print(f"\n📈 Biological Interpretation:")

        if effect_type == 'lnRR':
            pooled_RR_fixed = np.exp(pooled_effect_fixed)
            pooled_fold_fixed = pooled_RR_fixed if pooled_effect_fixed >= 0 else -1/pooled_RR_fixed
            pooled_pct_fixed = (pooled_RR_fixed - 1) * 100
            ci_lower_RR = np.exp(ci_lower_fixed)
            ci_upper_RR = np.exp(ci_upper_fixed)

            print(f"  {'Metric':<30} {'Value':>15}")
            print(f"  {'-'*30} {'-'*15}")
            print(f"  {'Response Ratio (RR)':<30} {pooled_RR_fixed:>15.3f}")
            print(f"  {'Fold-change':<30} {pooled_fold_fixed:>+14.2f}×")
            print(f"  {'Percent change':<30} {pooled_pct_fixed:>+14.1f}%")
            print(f"  {'95% CI (RR scale)':<30} [{ci_lower_RR:.3f}, {ci_upper_RR:.3f}]")

            # Direction interpretation
            if pooled_effect_fixed > 0.05:
                direction = "INCREASE (upregulation)"
            elif pooled_effect_fixed < -0.05:
                direction = "DECREASE (downregulation)"
            else:
                direction = "NO CHANGE"
            print(f"\n  Overall direction: {direction}")

        elif effect_type == 'log_or':
            pooled_OR_fixed = np.exp(pooled_effect_fixed)
            ci_lower_OR = np.exp(ci_lower_fixed)
            ci_upper_OR = np.exp(ci_upper_fixed)

            print(f"  {'Metric':<30} {'Value':>15}")
            print(f"  {'-'*30} {'-'*15}")
            print(f"  {'Odds Ratio (OR)':<30} {pooled_OR_fixed:>15.3f}")
            print(f"  {'95% CI (OR scale)':<30} [{ci_lower_OR:.3f}, {ci_upper_OR:.3f}]")

            if pooled_OR_fixed > 1:
                direction = "Positive association"
            elif pooled_OR_fixed < 1:
                direction = "Negative association"
            else:
                direction = "No association"
            print(f"\n  Interpretation: {direction}")

    # Significance interpretation
    print(f"\n📌 Statistical Significance:")
    if p_value_fixed < 0.001:
        sig_text = "HIGHLY SIGNIFICANT (p < 0.001)"
        sig_symbol = "***"
    elif p_value_fixed < 0.01:
        sig_text = "VERY SIGNIFICANT (p < 0.01)"
        sig_symbol = "**"
    elif p_value_fixed < 0.05:
        sig_text = "SIGNIFICANT (p < 0.05)"
        sig_symbol = "*"
    else:
        sig_text = "NOT SIGNIFICANT (p ≥ 0.05)"
        sig_symbol = "ns"

    print(f"  The overall effect is {sig_text} {sig_symbol}")

    # --- STEP 5: HETEROGENEITY ASSESSMENT ---
    print("\n" + "="*70)
    print("STEP 4: HETEROGENEITY ASSESSMENT")
    print("="*70)

    print(f"\n📊 Testing for variability across studies...")

    # Cochran's Q statistic
    Qt = (analysis_data['w_fixed'] * (analysis_data[effect_col] - pooled_effect_fixed)**2).sum()
    df_Q = k - 1

    print(f"\n🔬 Cochran's Q Test:")
    print(f"  Q statistic: {Qt:.4f}")
    print(f"  Degrees of freedom: {df_Q}")
    print(f"  Expected value under H₀: {df_Q}")

    # P-value for Q test (H0: homogeneous effects)
    if df_Q > 0:
        p_heterogeneity = 1 - chi2.cdf(Qt, df_Q)
        print(f"  P-value (χ² test): {p_heterogeneity:.4g}")

        if p_heterogeneity < 0.001:
            q_interp = "Highly significant heterogeneity (p < 0.001)"
        elif p_heterogeneity < 0.01:
            q_interp = "Very significant heterogeneity (p < 0.01)"
        elif p_heterogeneity < 0.10:
            q_interp = "Significant heterogeneity (p < 0.10)"
        else:
            q_interp = "No significant heterogeneity (p ≥ 0.10)"

        print(f"  Interpretation: {q_interp}")
    else:
        p_heterogeneity = np.nan
        print(f"  P-value: N/A (only one study)")

    # I-squared (proportion of variance due to heterogeneity)
    print(f"\n📏 I² (I-squared) Statistic:")

    if Qt > df_Q:
        I_squared = ((Qt - df_Q) / Qt) * 100
    else:
        I_squared = 0

    print(f"  I² = {I_squared:.2f}%")
    print(f"  Interpretation: {I_squared:.2f}% of total variation is due to heterogeneity")

    # Interpretation of I² with color coding
    if I_squared < 25:
        i2_interp = "Low heterogeneity (might not be important)"
        i2_color = "🟢"
        i2_recommendation = "Fixed or random effects both acceptable"
    elif I_squared < 50:
        i2_interp = "Moderate heterogeneity"
        i2_color = "🟡"
        i2_recommendation = "Consider random effects model"
    elif I_squared < 75:
        i2_interp = "Substantial heterogeneity"
        i2_color = "🟠"
        i2_recommendation = "Use random effects model; explore sources"
    else:
        i2_interp = "Considerable heterogeneity"
        i2_color = "🔴"
        i2_recommendation = "Use random effects model; investigate thoroughly"

    print(f"  {i2_color} {i2_interp}")
    print(f"  → {i2_recommendation}")

    # Tau-squared (between-study variance) - using selected method
    print(f"\n🔬 Between-Study Variance (Tau²):")

    # Get selected method from config
    selected_method = ANALYSIS_CONFIG.get('tau_method', 'DL')

    # Calculate tau-squared using selected method
    if 'calculate_tau_squared' in globals() and selected_method != 'DL':
        # Use advanced estimators from Cell 4.5
        print(f"  Using {selected_method} estimator...")
        tau_squared_DL = float(calculate_tau_squared(
            analysis_data,
            effect_col,
            var_col,
            method=selected_method
        ))
        tau_DL = np.sqrt(tau_squared_DL)
        method_used = selected_method

        # Also calculate DL for comparison
        sum_w_fixed_sq = (analysis_data['w_fixed']**2).sum()
        C = sum_w_fixed - (sum_w_fixed_sq / sum_w_fixed)
        if C > 0 and Qt > df_Q:
            tau_squared_DL_comparison = (Qt - df_Q) / C
        else:
            tau_squared_DL_comparison = 0
    else:
        # Fallback to DerSimonian-Laird (inline calculation)
        sum_w_fixed_sq = (analysis_data['w_fixed']**2).sum()
        C = sum_w_fixed - (sum_w_fixed_sq / sum_w_fixed)

        print(f"  C constant: {C:.4f}")

        if C > 0 and Qt > df_Q:
            tau_squared_DL = (Qt - df_Q) / C
        else:
            tau_squared_DL = 0

        tau_DL = np.sqrt(tau_squared_DL)
        method_used = 'DL'
        tau_squared_DL_comparison = None

    print(f"  Tau² (variance): {tau_squared_DL:.6f}")
    print(f"  Tau (SD): {tau_DL:.4f}")
    print(f"  Method: {method_used}")

    if tau_squared_DL > 0:
        print(f"  Interpretation: Average between-study variation = {tau_DL:.4f} {es_config['effect_label_short']} units")
    else:
        print(f"  Interpretation: No detectable between-study variation")

    # Display method status and comparison
    if method_used != 'DL' and k >= 5:
        # Enhanced comparison using all tau estimators
        print(f"\n" + "="*70)
        print("📊 TAU-SQUARED ESTIMATOR COMPARISON")
        print("="*70)
        print(f"\nComparing all available tau-squared estimation methods:")
        print(f"(Sample size: k = {k} studies)\n")
        
        # Get all estimator results
        comparison_results = compare_tau_estimators(analysis_data, effect_col, var_col)
        
        # Display formatted table
        print(f"{'Method':<15} {'τ²':>12} {'τ':>12} {'% Diff from REML':>18}   ")
        print(f"{'-'*15} {'-'*12} {'-'*12} {'-'*18}   {'-'*10}")
        
        # Get REML value for comparison
        reml_tau_sq = float(comparison_results['REML'])
        
        # Display each method
        for method_name, tau_sq in comparison_results.items():
            tau = np.sqrt(float(tau_sq))
            
            # Calculate % difference from REML
            if reml_tau_sq > 0:
                pct_diff = ((tau_sq - reml_tau_sq) / reml_tau_sq) * 100
            else:
                pct_diff = 0
            
            # Add indicator for the method that was actually used
            indicator = " ←" if method_name == method_used else ""
            
            print(f"{method_name:<15} {tau_sq:>12.6f} {tau:>12.4f} {pct_diff:>17.1f}% {indicator:>2}")
        
        print()
        
        # Calculate REML vs DL difference for interpretation
        dl_tau_sq = float(comparison_results['DL'])
        if reml_tau_sq > 0:
            reml_dl_diff = abs((reml_tau_sq - dl_tau_sq) / reml_tau_sq) * 100
        else:
            reml_dl_diff = 0
        
        # Provide interpretation
        print(f"📋 Interpretation:")
        print(f"   REML vs DL difference: {reml_dl_diff:.1f}%")
        
        if reml_dl_diff > 20:
            print(f"   ⚠️  Large difference - method choice is important")
            print(f"   → REML provides more accurate estimate for this dataset")
        elif reml_dl_diff > 10:
            print(f"   ℹ️  Moderate difference - REML recommended")
            print(f"   → Consider using REML for more reliable heterogeneity estimates")
        else:
            print(f"   ✓ Small difference - methods agree")
            print(f"   → All methods provide similar tau-squared estimates")
        
        print(f"\n💡 Note: The method marked with ← was used in this analysis")
        
    elif tau_squared_DL_comparison is not None and method_used != 'DL':
        # Fallback to simple comparison for k < 5
        # Calculate difference
        diff_abs = abs(tau_squared_DL - tau_squared_DL_comparison)
        if tau_squared_DL_comparison > 0:
            diff_pct = (diff_abs / tau_squared_DL_comparison) * 100
        else:
            diff_pct = 0

        # Display comparison
        print(f"\n📊 Method Comparison:")
        print(f"  {method_used} τ²: {tau_squared_DL:.6f}")
        print(f"  DL τ²:   {tau_squared_DL_comparison:.6f}")
        print(f"  Difference: {diff_abs:.6f} ({diff_pct:.1f}%)")

        if diff_pct > 10:
            print(f"  ⚠️  WARNING: Difference >10% - method choice may substantially affect results")
        elif diff_pct > 5:
            print(f"  ⚡ Moderate difference - {method_used} provides more accurate estimate")
        else:
            print(f"  ✓ Methods agree closely")
        
        print(f"\n💡 Note: Full comparison available with k ≥ 5 studies (current: k = {k})")



    # Overall heterogeneity summary
    print(f"\n📋 Heterogeneity Summary:")
    print(f"  {'Statistic':<20} {'Value':>15} {'Interpretation':<30}")
    print(f"  {'-'*20} {'-'*15} {'-'*30}")
    print(f"  {'Q':<20} {Qt:>15.2f} {'Test statistic':<30}")
    print(f"  {'P-value':<20} {p_heterogeneity:>15.4g} {q_interp.split('(')[0].strip():<30}")
    print(f"  {'I²':<20} {I_squared:>14.1f}% {i2_interp.split('(')[0].strip():<30}")
    print(f"  {'Tau²':<20} {tau_squared_DL:>15.4f} {'Between-study variance':<30}")
    print(f"  {'Tau':<20} {tau_DL:>15.4f} {'Between-study SD':<30}")

    # Continue to Part 2...
    # --- STEP 6: RANDOM-EFFECTS MODEL ---
    print("\n" + "="*70)
    print("STEP 5: RANDOM-EFFECTS MODEL")
    print("="*70)

    print(f"\n📐 Model Assumption:")
    print(f"   Studies estimate different but related true effects")
    print(f"   Accounts for both within-study and between-study variation")
    print(f"   More conservative when heterogeneity is present")

    print(f"\n🔢 Calculating random-effects weights...")
    print(f"   Formula: w_random = 1 / (variance + τ²)")

    # Calculate random-effects weights
    analysis_data['w_random'] = 1 / (analysis_data[var_col] + tau_squared_DL)
    sum_w_random = analysis_data['w_random'].sum()

    if sum_w_random <= 0:
        print(f"\n❌ WARNING: Sum of random-effects weights is non-positive")
        print(f"   This should not occur with valid data")

        pooled_effect_random = np.nan
        pooled_var_random = np.nan
        pooled_SE_random = np.nan
        ci_lower_random = np.nan
        ci_upper_random = np.nan
        z_stat_random = np.nan
        p_value_random = np.nan
        pi_lower_random = np.nan
        pi_upper_random = np.nan
    else:
        print(f"   Sum of random-effects weights: {sum_w_random:.2f}")
        print(f"   Sum of fixed-effects weights:  {sum_w_fixed:.2f}")

        # Ratio comparison
        weight_ratio = sum_w_random / sum_w_fixed
        print(f"   Weight ratio (RE/FE): {weight_ratio:.3f}")

        if weight_ratio < 0.5:
            print(f"   → Random effects gives much less weight to studies (high heterogeneity)")
        elif weight_ratio < 0.8:
            print(f"   → Random effects moderately reduces weights")
        else:
            print(f"   → Random effects similar to fixed effects (low heterogeneity)")

        # Pooled effect size
        pooled_effect_random = (analysis_data['w_random'] * analysis_data[effect_col]).sum() / sum_w_random

        # Variance of pooled effect
        pooled_var_random = 1 / sum_w_random
        pooled_SE_random = np.sqrt(pooled_var_random)

        # 95% CI
        ci_lower_random = pooled_effect_random - z_crit * pooled_SE_random
        ci_upper_random = pooled_effect_random + z_crit * pooled_SE_random

        # Test significance
        z_stat_random = pooled_effect_random / pooled_SE_random
        p_value_random = 2 * (1 - norm.cdf(abs(z_stat_random)))

        # Display results
        print(f"\n📊 Random-Effects Results:")
        print(f"  {'Statistic':<25} {'Value':>15}")
        print(f"  {'-'*25} {'-'*15}")
        print(f"  {'Pooled ' + es_config['effect_label_short']:<25} {pooled_effect_random:>15.4f}")
        print(f"  {'Standard Error':<25} {pooled_SE_random:>15.4f}")
        print(f"  {'Variance':<25} {pooled_var_random:>15.6f}")
        print(f"  {'95% CI Lower':<25} {ci_lower_random:>15.4f}")
        print(f"  {'95% CI Upper':<25} {ci_upper_random:>15.4f}")
        print(f"  {'Z-statistic':<25} {z_stat_random:>15.4f}")
        print(f"  {'P-value':<25} {p_value_random:>15.4g}")

        # Interpretation for ratio-based measures
        if es_config['has_fold_change']:
            print(f"\n📈 Biological Interpretation:")

            if effect_type == 'lnRR':
                pooled_RR_random = np.exp(pooled_effect_random)
                pooled_fold_random = pooled_RR_random if pooled_effect_random >= 0 else -1/pooled_RR_random
                pooled_pct_random = (pooled_RR_random - 1) * 100
                ci_lower_RR_random = np.exp(ci_lower_random)
                ci_upper_RR_random = np.exp(ci_upper_random)

                print(f"  {'Metric':<30} {'Value':>15}")
                print(f"  {'-'*30} {'-'*15}")
                print(f"  {'Response Ratio (RR)':<30} {pooled_RR_random:>15.3f}")
                print(f"  {'Fold-change':<30} {pooled_fold_random:>+14.2f}×")
                print(f"  {'Percent change':<30} {pooled_pct_random:>+14.1f}%")
                print(f"  {'95% CI (RR scale)':<30} [{ci_lower_RR_random:.3f}, {ci_upper_RR_random:.3f}]")

                # Direction interpretation
                if pooled_effect_random > 0.05:
                    direction = "INCREASE (upregulation)"
                elif pooled_effect_random < -0.05:
                    direction = "DECREASE (downregulation)"
                else:
                    direction = "NO CHANGE"
                print(f"\n  Overall direction: {direction}")

            elif effect_type == 'log_or':
                pooled_OR_random = np.exp(pooled_effect_random)
                ci_lower_OR_random = np.exp(ci_lower_random)
                ci_upper_OR_random = np.exp(ci_upper_random)

                print(f"  {'Metric':<30} {'Value':>15}")
                print(f"  {'-'*30} {'-'*15}")
                print(f"  {'Odds Ratio (OR)':<30} {pooled_OR_random:>15.3f}")
                print(f"  {'95% CI (OR scale)':<30} [{ci_lower_OR_random:.3f}, {ci_upper_OR_random:.3f}]")

                if pooled_OR_random > 1:
                    direction = "Positive association"
                elif pooled_OR_random < 1:
                    direction = "Negative association"
                else:
                    direction = "No association"
                print(f"\n  Interpretation: {direction}")

        # Significance interpretation
        print(f"\n📌 Statistical Significance:")
        if p_value_random < 0.001:
            sig_text_re = "HIGHLY SIGNIFICANT (p < 0.001)"
            sig_symbol_re = "***"
        elif p_value_random < 0.01:
            sig_text_re = "VERY SIGNIFICANT (p < 0.01)"
            sig_symbol_re = "**"
        elif p_value_random < 0.05:
            sig_text_re = "SIGNIFICANT (p < 0.05)"
            sig_symbol_re = "*"
        else:
            sig_text_re = "NOT SIGNIFICANT (p ≥ 0.05)"
            sig_symbol_re = "ns"

        print(f"  The overall effect is {sig_text_re} {sig_symbol_re}")

        # --- STEP 7: 95% PREDICTION INTERVAL ---
        print("\n" + "="*70)
        print("STEP 6: 95% PREDICTION INTERVAL")
        print("="*70)

        print(f"\n📊 Prediction Interval (PI):")
        print(f"   Estimates where the true effect in a NEW study is expected to fall")
        print(f"   Wider than CI because it accounts for between-study heterogeneity")
        print(f"   More clinically relevant than CI for assessing effect consistency")

        if k > 2:
            # Degrees of freedom for t-distribution
            df_pi = k - 2
            t_crit = t.ppf(1 - alpha / 2, df=df_pi)

            # Standard error for prediction
            # SE_prediction = sqrt(τ² + SE²_pooled)
            se_prediction = np.sqrt(tau_squared_DL + pooled_var_random)

            # Calculate prediction interval
            pi_lower_random = pooled_effect_random - t_crit * se_prediction
            pi_upper_random = pooled_effect_random + t_crit * se_prediction

            print(f"\n  📏 Calculation Details:")
            print(f"     Pooled effect: {pooled_effect_random:.4f}")
            print(f"     Tau² (between-study var): {tau_squared_DL:.6f}")
            print(f"     SE² (pooled estimate): {pooled_var_random:.6f}")
            print(f"     SE (prediction): {se_prediction:.4f}")
            print(f"     t-critical value (df={df_pi}): {t_crit:.3f}")
            print(f"     Margin of error: ±{t_crit * se_prediction:.4f}")

            print(f"\n  📊 Results:")
            print(f"     95% Prediction Interval: [{pi_lower_random:.4f}, {pi_upper_random:.4f}]")

            # Compare PI width to CI width
            ci_width = ci_upper_random - ci_lower_random
            pi_width = pi_upper_random - pi_lower_random
            width_ratio = pi_width / ci_width if ci_width > 0 else np.inf

            print(f"\n  📐 Interval Comparison:")
            print(f"     CI width: {ci_width:.4f}")
            print(f"     PI width: {pi_width:.4f}")
            print(f"     Ratio (PI/CI): {width_ratio:.2f}×")

            if width_ratio > 3:
                print(f"     → PI much wider than CI (substantial heterogeneity)")
            elif width_ratio > 1.5:
                print(f"     → PI moderately wider than CI (moderate heterogeneity)")
            else:
                print(f"     → PI similar to CI (low heterogeneity)")

            # Interpretation for ratio measures
            if es_config['has_fold_change'] and effect_type == 'lnRR':
                pi_lower_RR = np.exp(pi_lower_random)
                pi_upper_RR = np.exp(pi_upper_random)

                print(f"\n  📈 Prediction Interval (RR scale):")
                print(f"     95% PI: [{pi_lower_RR:.3f}, {pi_upper_RR:.3f}]")

            # Check if PI includes null
            null_value = es_config['null_value']
            pi_includes_null = (pi_lower_random <= null_value <= pi_upper_random)

            print(f"\n  💡 Interpretation:")
            if pi_includes_null:
                print(f"     ⚠️  PI includes null effect ({null_value})")
                print(f"     → A future study could plausibly find no effect")
                print(f"     → Effect direction may not be consistent across all contexts")
            else:
                print(f"     ✓ PI excludes null effect ({null_value})")
                print(f"     → Future studies expected to show consistent effect direction")
                print(f"     → High confidence in effect direction")

            print(f"\n  📝 Note: In 95% of similar future studies, the true effect")
            print(f"     is predicted to lie between {pi_lower_random:.4f} and {pi_upper_random:.4f}")

        else:
            print(f"\n  ⚠️  Skipped: Not enough studies for prediction interval")
            print(f"     Requires at least 3 studies (k ≥ 3)")
            print(f"     Current k = {k}")

            pi_lower_random = np.nan
            pi_upper_random = np.nan

# --- STEP 8: MODEL COMPARISON ---
print("\n" + "="*70)
print("STEP 7: MODEL COMPARISON")
print("="*70)

if k > 1:
    print(f"\n📊 Side-by-Side Comparison:")
    print(f"\n  {'Model':<20} {'Effect':>12} {'SE':>10} {'95% CI':>28} {'P-value':>10}")
    print(f"  {'-'*82}")

    # Fixed-effects
    fe_ci_str = f"[{ci_lower_fixed:>7.4f}, {ci_upper_fixed:>7.4f}]"
    print(f"  {'Fixed-Effects':<20} {pooled_effect_fixed:>12.4f} {pooled_SE_fixed:>10.4f} {fe_ci_str:>28} {p_value_fixed:>10.4g}")

    # Random-effects
    if pd.notna(pooled_effect_random):
        re_ci_str = f"[{ci_lower_random:>7.4f}, {ci_upper_random:>7.4f}]"
        print(f"  {'Random-Effects':<20} {pooled_effect_random:>12.4f} {pooled_SE_random:>10.4f} {re_ci_str:>28} {p_value_random:>10.4g}")

        # Prediction interval
        if pd.notna(pi_lower_random):
            pi_str = f"[{pi_lower_random:>7.4f}, {pi_upper_random:>7.4f}]"
            print(f"  {'95% Pred. Interval':<20} {'':<12} {'':<10} {pi_str:>28} {'':<10}")

    # Calculate and display differences
    if pd.notna(pooled_effect_random):
        effect_diff = pooled_effect_random - pooled_effect_fixed
        effect_diff_pct = (effect_diff / abs(pooled_effect_fixed)) * 100 if pooled_effect_fixed != 0 else np.inf
        se_diff = pooled_SE_random - pooled_SE_fixed
        se_ratio = pooled_SE_random / pooled_SE_fixed if pooled_SE_fixed > 0 else np.inf

        print(f"\n  📏 Model Differences:")
        print(f"     Effect difference (RE - FE): {effect_diff:+.4f} ({effect_diff_pct:+.1f}%)")
        print(f"     SE difference (RE - FE): {se_diff:+.4f}")
        print(f"     SE ratio (RE / FE): {se_ratio:.2f}×")

        # Interpretation
        print(f"\n  💡 Interpretation:")
        if abs(effect_diff) < 0.05:
            print(f"     ✓ Models agree very closely")
            print(f"       → Low heterogeneity, either model acceptable")
        elif abs(effect_diff) < 0.15:
            print(f"     ⚠️  Models show small differences")
            print(f"       → Some heterogeneity present, random-effects preferred")
        elif abs(effect_diff) < 0.3:
            print(f"     ⚠️  Models show moderate differences")
            print(f"       → Moderate heterogeneity, use random-effects")
        else:
            print(f"     🔴 Models show substantial differences")
            print(f"       → High heterogeneity, must use random-effects")
            print(f"       → Investigate sources of heterogeneity")

        if se_ratio > 1.5:
            print(f"\n     ⚠️  Random-effects SE is {se_ratio:.1f}× larger than fixed-effects")
            print(f"       → Random-effects provides more conservative estimates")

        # Check agreement on significance
        fe_sig = p_value_fixed < 0.05
        re_sig = p_value_random < 0.05

        if fe_sig == re_sig:
            print(f"\n     ✓ Both models agree on statistical significance")
        else:
            print(f"\n     ⚠️  Models disagree on statistical significance!")
            if fe_sig and not re_sig:
                print(f"       → Fixed-effects significant, random-effects not")
                print(f"       → Use random-effects (more conservative)")
            else:
                print(f"       → Random-effects significant, fixed-effects not")
                print(f"       → Unlikely scenario, verify data")

# --- STEP 9: RECOMMENDATIONS ---
print("\n" + "="*70)
print("STEP 8: INTERPRETATION & RECOMMENDATIONS")
print("="*70)

if k == 1:
    print(f"\n🔴 SINGLE STUDY LIMITATION")
    print(f"\n   Current Status:")
    print(f"   • Only one observation available")
    print(f"   • Cannot perform meta-analysis")
    print(f"   • Cannot assess heterogeneity")
    print(f"   • Cannot evaluate publication bias")

    print(f"\n   Recommendations:")
    print(f"   1. Report single study results with appropriate caution")
    print(f"   2. Acknowledge inability to generalize findings")
    print(f"   3. Collect additional studies before drawing conclusions")
    print(f"   4. Consider this a preliminary finding only")

elif I_squared > 50 or (pd.notna(p_heterogeneity) and p_heterogeneity < 0.10):
    print(f"\n🔴 HIGH HETEROGENEITY DETECTED")
    print(f"\n   Heterogeneity Metrics:")
    print(f"   • I² = {I_squared:.1f}% ({i2_interp})")
    print(f"   • Q test p-value = {p_heterogeneity:.4g}")
    print(f"   • Tau² = {tau_squared_DL:.4f}")
    print(f"   • Tau = {tau_DL:.4f}")

    print(f"\n   📋 Required Actions:")
    print(f"   1. ✓ Use RANDOM-EFFECTS model (more conservative)")
    print(f"   2. ✓ Report prediction interval in addition to confidence interval")
    print(f"   3. ⚠️  Interpret pooled effect with caution")
    print(f"   4. 🔍 Investigate sources of heterogeneity:")

    print(f"\n   🔍 Heterogeneity Investigation Plan:")
    print(f"      a) Run subgroup analyses (available in next cells)")
    print(f"         • Compare effects across study characteristics")
    print(f"         • Test if moderators explain heterogeneity")

    print(f"      b) Consider meta-regression (if sufficient studies)")
    print(f"         • Continuous moderators (year, dose, duration)")
    print(f"         • Categorical moderators (treatment type, outcome)")

    print(f"      c) Conduct sensitivity analyses")
    print(f"         • Remove outliers and reassess")
    print(f"         • Exclude small studies")
    print(f"         • Leave-one-out analysis")

    print(f"      d) Check for influential studies")
    print(f"         • Studies with very large/small effects")
    print(f"         • Studies with large weights")

    print(f"\n   💡 Reporting Guidelines:")
    print(f"      • State: 'Substantial heterogeneity was present (I²={I_squared:.1f}%)'")
    print(f"      • Report both CI and PI for random-effects model")
    if pd.notna(pi_lower_random) and (pi_lower_random <= 0 <= pi_upper_random):
        print(f"      • Note: PI includes null, indicating effect may vary by context")
    print(f"      • Discuss clinical/biological sources of heterogeneity")
    print(f"      • Consider whether pooled estimate is meaningful")

else:
    print(f"\n🟢 LOW-TO-MODERATE HETEROGENEITY")
    print(f"\n   Heterogeneity Metrics:")
    print(f"   • I² = {I_squared:.1f}% ({i2_interp})")
    print(f"   • Q test p-value = {p_heterogeneity:.4g}")
    print(f"   • Tau² = {tau_squared_DL:.4f}")

    print(f"\n   📋 Recommendations:")
    print(f"   1. ✓ RANDOM-EFFECTS model preferred (conservative approach)")
    print(f"   2. ✓ Pooled effect is reliable and interpretable")
    print(f"   3. ✓ Both CI and PI relatively narrow and consistent")
    print(f"   4. ⚠️  Subgroup analyses still valuable for exploration")

    print(f"\n   💡 Reporting Guidelines:")
    print(f"      • State: 'Low heterogeneity was observed (I²={I_squared:.1f}%)'")
    print(f"      • Report random-effects pooled estimate as primary result")
    print(f"      • Can mention fixed-effects agrees with random-effects")
    print(f"      • Pooled estimate likely generalizable")

# Effect size type specific recommendations
print(f"\n📊 Recommendations for {es_config['effect_label']}:")

if effect_type == 'lnRR':
    print(f"\n   📈 Log Response Ratio Reporting:")
    print(f"   • Always report both lnRR and fold-change for clarity")
    print(f"   • State whether effect represents increase or decrease")
    print(f"   • Consider reporting percent change for accessibility")
    print(f"   • Compare magnitude to biologically relevant thresholds")
    if k > 1:
        print(f"   • Current pooled fold-change: {pooled_fold_random:+.2f}×")

    print(f"\n   🔍 Further Analyses to Consider:")
    print(f"   • Separate meta-analyses for upregulation vs downregulation")
    print(f"   • Check if effect varies by magnitude (small vs large changes)")
    print(f"   • Assess if treatment duration affects effect size")

elif effect_type in ['hedges_g', 'cohen_d']:
    print(f"\n   📊 Standardized Mean Difference Reporting:")
    print(f"   • Report effect size with Cohen's benchmark interpretation")
    print(f"   • Provide context-specific interpretation when possible")
    print(f"   • Note: Benchmarks are guidelines, not absolute rules")
    print(f"   • Be cautious with effects |g| > 2 (often outliers)")
    if k > 1:
        # Classify pooled effect
        abs_effect = abs(pooled_effect_random) if pd.notna(pooled_effect_random) else 0
        if abs_effect >= 0.8:
            magnitude = "LARGE"
        elif abs_effect >= 0.5:
            magnitude = "MEDIUM"
        elif abs_effect >= 0.2:
            magnitude = "SMALL"
        else:
            magnitude = "NEGLIGIBLE"
        print(f"   • Current pooled effect magnitude: {magnitude} (|g|={abs_effect:.2f})")

elif effect_type == 'log_or':
    print(f"\n   📊 Log Odds Ratio Reporting:")
    print(f"   • Always convert to OR for interpretation")
    print(f"   • Clarify what OR > 1 vs OR < 1 means in your context")
    print(f"   • Consider reporting as risk ratio if appropriate")
    if k > 1 and pd.notna(pooled_OR_random):
        print(f"   • Current pooled OR: {pooled_OR_random:.3f}")

# Statistical power consideration
if k > 1:
    print(f"\n⚡ Statistical Power Consideration:")
    if k < 5:
        print(f"   ⚠️  Small number of studies (k={k})")
        print(f"      • Limited power to detect heterogeneity")
        print(f"      • Subgroup analyses may be underpowered")
        print(f"      • I² estimate may be imprecise")
    elif k < 10:
        print(f"   📊 Moderate number of studies (k={k})")
        print(f"      • Adequate power for overall effect")
        print(f"      • Moderate power for heterogeneity tests")
        print(f"      • Some subgroup analyses may be feasible")
    else:
        print(f"   ✓ Good number of studies (k={k})")
        print(f"      • Good power for all analyses")
        print(f"      • Reliable heterogeneity estimates")
        print(f"      • Subgroup analyses well-powered")

# --- STEP 10: SAVE RESULTS ---
print("\n" + "="*70)
print("STEP 9: SAVING RESULTS")
print("="*70)

ANALYSIS_CONFIG['overall_results'] = {
    'timestamp': datetime.datetime.now(),
    'k': k,
    'k_papers': k_papers,

    # Fixed-effects
    'pooled_effect_fixed': pooled_effect_fixed,
    'pooled_var_fixed': pooled_var_fixed,
    'pooled_SE_fixed': pooled_SE_fixed if k > 1 else np.nan,
    'ci_lower_fixed': ci_lower_fixed if k > 1 else np.nan,
    'ci_upper_fixed': ci_upper_fixed if k > 1 else np.nan,
    'z_stat_fixed': z_stat_fixed if k > 1 else np.nan,
    'p_value_fixed': p_value_fixed if k > 1 else np.nan,

    # Heterogeneity
    'Qt': Qt,
    'df_Q': df_Q if k > 1 else np.nan,
    'p_heterogeneity': p_heterogeneity,
    'I_squared': I_squared,
    'I_squared_interpretation': i2_interp if k > 1 else 'N/A',
    'tau_squared': tau_squared_DL,
    'tau': tau_DL if k > 1 else np.nan,

    # Random-effects
    'pooled_effect_random': pooled_effect_random,
    'pooled_var_random': pooled_var_random,
    'pooled_SE_random': pooled_SE_random if k > 1 and pd.notna(pooled_effect_random) else np.nan,
    'ci_lower_random': ci_lower_random if k > 1 and pd.notna(pooled_effect_random) else np.nan,
    'ci_upper_random': ci_upper_random if k > 1 and pd.notna(pooled_effect_random) else np.nan,
    'z_stat_random': z_stat_random if k > 1 and pd.notna(pooled_effect_random) else np.nan,
    'p_value_random': p_value_random if k > 1 and pd.notna(pooled_effect_random) else np.nan,

    # Prediction interval
    'pi_lower_random': pi_lower_random,
    'pi_upper_random': pi_upper_random,
    'pi_df': df_pi if k > 2 and pd.notna(pi_lower_random) else np.nan,

    # Model comparison
    'effect_difference': effect_diff if k > 1 and pd.notna(pooled_effect_random) else np.nan,
    'se_ratio': se_ratio if k > 1 and pd.notna(pooled_effect_random) else np.nan,

    # Interpretation
    'recommended_model': 'random-effects' if k > 1 and (I_squared > 25 or p_heterogeneity < 0.10) else 'either',
    'heterogeneity_level': i2_color if k > 1 else 'N/A'
}

# Add fold-changes if applicable
if es_config['has_fold_change'] and k > 1:
    if effect_type == 'lnRR':
        ANALYSIS_CONFIG['overall_results']['pooled_fold_fixed'] = pooled_fold_fixed
        ANALYSIS_CONFIG['overall_results']['pooled_fold_random'] = pooled_fold_random
        ANALYSIS_CONFIG['overall_results']['pooled_RR_fixed'] = pooled_RR_fixed
        ANALYSIS_CONFIG['overall_results']['pooled_RR_random'] = pooled_RR_random
        ANALYSIS_CONFIG['overall_results']['pooled_pct_change_random'] = pooled_pct_random
    elif effect_type == 'log_or':
        ANALYSIS_CONFIG['overall_results']['pooled_OR_fixed'] = pooled_OR_fixed
        ANALYSIS_CONFIG['overall_results']['pooled_OR_random'] = pooled_OR_random

print(f"\n✓ Results saved to ANALYSIS_CONFIG['overall_results']")
print(f"\n📊 Saved metrics include:")
print(f"  • Pooled effects (fixed & random)")
print(f"  • Confidence intervals")
print(f"  • Prediction interval")
print(f"  • Heterogeneity statistics (Q, I², Tau²)")
print(f"  • P-values and significance tests")
if es_config['has_fold_change']:
    print(f"  • Fold-changes and interpretations")

# Create summary metadata
OVERALL_META_METADATA = {
    'timestamp': datetime.datetime.now(),
    'n_studies': k,
    'n_papers': k_papers,
    'model_recommended': ANALYSIS_CONFIG['overall_results']['recommended_model'],
    'heterogeneity': {
        'I_squared': I_squared,
        'level': i2_interp if k > 1 else 'N/A',
        'p_value': p_heterogeneity,
        'tau_squared': tau_squared_DL
    },
    'primary_result': {
        'effect': pooled_effect_random if k > 1 else pooled_effect_fixed,
        'ci_lower': ci_lower_random if k > 1 else ci_lower_fixed,
        'ci_upper': ci_upper_random if k > 1 else ci_upper_fixed,
        'p_value': p_value_random if k > 1 else p_value_fixed,
        'significant': (p_value_random < 0.05) if k > 1 else False
    }
}

print(f"\n✓ Metadata saved to OVERALL_META_METADATA")

# --- FINAL STATUS ---
print("\n" + "="*70)
print("✅ OVERALL META-ANALYSIS COMPLETE")
print("="*70)

if k > 1:
    print(f"\n📊 Key Findings Summary:")
    print(f"  • Studies analyzed: {k} observations from {k_papers} papers")
    print(f"  • Pooled effect ({ANALYSIS_CONFIG['overall_results']['recommended_model']}): {pooled_effect_random:.4f}")
    print(f"  • 95% CI: [{ci_lower_random:.4f}, {ci_upper_random:.4f}]")
    if pd.notna(pi_lower_random):
        print(f"  • 95% PI: [{pi_lower_random:.4f}, {pi_upper_random:.4f}]")
    print(f"  • Statistical significance: {sig_text_re}")
    print(f"  • Heterogeneity (I²): {I_squared:.1f}% - {i2_interp}")

    if es_config['has_fold_change'] and effect_type == 'lnRR':
        print(f"\n📈 Biological Interpretation:")
        print(f"  • Pooled fold-change: {pooled_fold_random:+.2f}×")
        print(f"  • Response ratio: {pooled_RR_random:.3f}")
        print(f"  • Percent change: {pooled_pct_random:+.1f}%")

    print(f"\n🎯 Conclusion:")
    if p_value_random < 0.05:
        conclusion_sig = "statistically significant"
    else:
        conclusion_sig = "not statistically significant"

    if I_squared < 50:
        conclusion_het = "with low-to-moderate heterogeneity"
    else:
        conclusion_het = "with substantial heterogeneity"

    print(f"  The overall effect is {conclusion_sig} {conclusion_het}.")

    if I_squared > 50:
        print(f"  Further investigation of heterogeneity sources is recommended.")

    if pd.notna(pi_lower_random) and (pi_lower_random <= es_config['null_value'] <= pi_upper_random):
        print(f"  ⚠️  Note: Prediction interval includes null effect,")
        print(f"      suggesting effect may vary substantially by context.")
else:
    print(f"\n📊 Single Study Summary:")
    print(f"  • Effect size: {pooled_effect_fixed:.4f}")
    print(f"  • 95% CI: [{ci_lower_fixed:.4f}, {ci_upper_fixed:.4f}]")
    print(f"  • Meta-analysis not performed (k=1)")

print(f"\n▶️  Next Steps:")
print(f"  1. Review the overall pooled estimates above")
print(f"  2. Run SUBGROUP ANALYSIS to explore heterogeneity (next cell)")
print(f"  3. Create FOREST PLOTS for visualization")
print(f"  4. Assess PUBLICATION BIAS with funnel plots")
print(f"  5. Conduct SENSITIVITY ANALYSES (leave-one-out)")

if I_squared > 50:
    print(f"\n💡 Priority Recommendations:")
    print(f"  • High heterogeneity detected - subgroup analysis is essential")
    print(f"  • Consider meta-regression if moderators are available")
    print(f"  • Check for outliers and influential studies")

print("\n" + "="*70)



TAU-SQUARED ESTIMATOR SELECTION
✅ Advanced estimators available


VBox(children=(HTML(value="<div style='background-color: #e8f4f8; padding: 10px; margin: 10px 0; border-radius…



OVERALL META-ANALYSIS
Timestamp: 2025-11-14 18:04:36

STEP 1: LOADING CONFIGURATION
✓ Configuration loaded successfully
  Effect size: Hedges' g (g)
  Effect column: hedges_g
  Variance column: Vg
  SE column: SE_g

STEP 2: DATA PREPARATION

🔍 Preparing data for meta-analysis...

  Initial dataset:
    • Observations: 69
    • Unique papers: 23

  ✓ Final analysis dataset:
    • Observations (k): 69
    • Unique papers: 23
    • Removed: 0 observations
    • Avg obs per paper: 3.00

STEP 3: FIXED-EFFECTS MODEL

📐 Model Assumption:
   All studies share a common true effect size
   Differences between studies are due to sampling error only

🔢 Calculating inverse-variance weighted mean...
   Sum of weights: 101.12

📊 Fixed-Effects Results:
  Statistic                           Value
  ------------------------- ---------------
  Pooled g                           1.1835
  Standard Error                     0.0994
  Variance                         0.009889
  95% CI Lower                 

In [9]:
#@title ⚙️ SUBGROUP ANALYSIS CONFIGURATION

# =============================================================================
# CELL 7: SUBGROUP ANALYSIS CONFIGURATION
# Purpose: Configure moderator variables and settings for subgroup analysis
# Dependencies: Cell 6 (overall_results, analysis_data)
# Outputs: ANALYSIS_CONFIG['subgroup_config'], interactive widgets
# =============================================================================

print("\n" + "="*70)
print("SUBGROUP ANALYSIS CONFIGURATION")
print("="*70)
print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# --- STEP 1: CHECK PREREQUISITES ---
print("\n" + "="*70)
print("STEP 1: VERIFYING PREREQUISITES")
print("="*70)

try:
    effect_col = ANALYSIS_CONFIG['effect_col']
    var_col = ANALYSIS_CONFIG['var_col']
    se_col = ANALYSIS_CONFIG['se_col']
    es_config = ANALYSIS_CONFIG['es_config']
    overall_results = ANALYSIS_CONFIG['overall_results']
    Qt_overall = overall_results['Qt']
    I_squared_overall = overall_results['I_squared']

    print(f"✓ Overall analysis results loaded successfully")
    print(f"  Effect size: {es_config['effect_label']} ({es_config['effect_label_short']})")
    print(f"  Effect column: {effect_col}")
    print(f"  Overall Q statistic: {Qt_overall:.4f}")
    print(f"  Overall I²: {I_squared_overall:.2f}%")

except KeyError as e:
    print(f"❌ ERROR: Overall analysis results not found - {e}")
    print("\nTroubleshooting:")
    print("  1. Ensure Cell 6 (overall meta-analysis) was run successfully")
    print("  2. Check that ANALYSIS_CONFIG['overall_results'] exists")
    print("  3. Verify that analysis_data DataFrame is available")
    raise

# Check if analysis_data exists
if 'analysis_data' not in globals():
    print(f"\n❌ ERROR: analysis_data not found")
    print(f"   Please ensure Cell 6 was executed successfully")
    raise NameError("analysis_data not defined")

# Dataset information
k_total = len(analysis_data)
k_papers = analysis_data['id'].nunique()

print(f"\n📊 Dataset Summary:")
print(f"  • Total observations: {k_total}")
print(f"  • Unique papers: {k_papers}")
print(f"  • Avg obs per paper: {k_total/k_papers:.2f}")

# Check if subgroup analysis is appropriate
if k_total < 10:
    print(f"\n⚠️  WARNING: Limited data for subgroup analysis")
    print(f"   With only {k_total} observations, subgroup analyses may be underpowered")
    print(f"   Results should be interpreted with caution")
elif k_total < 20:
    print(f"\n⚠️  CAUTION: Moderate data for subgroup analysis")
    print(f"   With {k_total} observations, some subgroup combinations may have few studies")
else:
    print(f"\n✓ Adequate data for subgroup analysis ({k_total} observations)")

# --- STEP 2: IDENTIFY AVAILABLE MODERATOR COLUMNS ---
print("\n" + "="*70)
print("STEP 2: IDENTIFYING MODERATOR VARIABLES")
print("="*70)

print(f"\n🔍 Scanning dataset for potential moderator variables...")

# Exclude technical columns
excluded_cols = [
    'xe', 'sde', 'ne', 'xc', 'sdc', 'nc', 'id',
    'sde_imputed', 'sdc_imputed', 'cv_e', 'cv_c',
    'sde_was_imputed', 'sdc_was_imputed',
    effect_col, var_col, se_col, 'w_fixed', 'w_random',
    'ci_width'
]

# Add effect-size-specific columns to exclude
if es_config['has_fold_change']:
    if 'Response_Ratio' in analysis_data.columns:
        excluded_cols.extend(['Response_Ratio', 'RR_CI_lower', 'RR_CI_upper',
                             'fold_change', 'Percent_Change'])
    if 'Odds_Ratio' in analysis_data.columns:
        excluded_cols.extend(['Odds_Ratio', 'OR_CI_lower', 'OR_CI_upper'])

if 'hedges_g' in effect_col or 'cohen_d' in effect_col:
    excluded_cols.extend(['df', 'sp', 'sp_squared', 'cohen_d', 'hedges_j'])

# Add CI columns
ci_cols = [c for c in analysis_data.columns if 'CI_' in c or 'ci_' in c]
excluded_cols.extend(ci_cols)

# Get categorical columns (potential moderators)
available_moderators = [
    col for col in analysis_data.columns
    if col not in excluded_cols
    and analysis_data[col].dtype == 'object'
    and analysis_data[col].notna().sum() > 0  # Has some non-missing values
]

print(f"\n📋 Available Moderator Variables: {len(available_moderators)}")

if not available_moderators:
    print(f"\n❌ ERROR: No categorical moderator columns found in dataset")
    print(f"\nAvailable columns in dataset:")
    for col in analysis_data.columns:
        dtype = analysis_data[col].dtype
        n_unique = analysis_data[col].nunique()
        print(f"  • {col}: {dtype} ({n_unique} unique values)")

    print(f"\n💡 Troubleshooting:")
    print(f"  1. Ensure your dataset contains categorical variables for grouping")
    print(f"  2. Check that moderator columns are not all numeric")
    print(f"  3. Verify column names match expected moderator variables")
    raise ValueError("No moderators available for subgroup analysis")

# Analyze moderator characteristics
moderator_info = []
for col in available_moderators:
    n_categories = analysis_data[col].nunique()
    n_missing = analysis_data[col].isna().sum()
    pct_missing = (n_missing / len(analysis_data)) * 100
    categories = sorted(analysis_data[col].dropna().unique())

    # Calculate distribution statistics
    value_counts = analysis_data[col].value_counts()
    min_count = value_counts.min()
    max_count = value_counts.max()

    moderator_info.append({
        'variable': col,
        'n_categories': n_categories,
        'n_missing': n_missing,
        'pct_missing': pct_missing,
        'categories': categories,
        'min_count': min_count,
        'max_count': max_count,
        'value_counts': value_counts
    })

# Display moderator information
print(f"\n{'Variable':<25} {'Categories':>12} {'Missing':>10} {'Range':>15}")
print(f"{'-'*25} {'-'*12} {'-'*10} {'-'*15}")

for info in moderator_info:
    print(f"{info['variable']:<25} {info['n_categories']:>12} "
          f"{info['n_missing']:>10} {info['min_count']:>6}-{info['max_count']:<6}")

print(f"\n📊 Detailed Moderator Information:")
for info in moderator_info:
    print(f"\n  🔹 {info['variable']}")
    print(f"     Categories: {info['n_categories']}")
    print(f"     Missing: {info['n_missing']} ({info['pct_missing']:.1f}%)")
    print(f"     Values: {', '.join(str(c) for c in info['categories'][:5])}"
          f"{' ...' if len(info['categories']) > 5 else ''}")

    # Show distribution
    print(f"     Distribution:")
    for category, count in info['value_counts'].items():
        papers = analysis_data[analysis_data[info['variable']] == category]['id'].nunique()
        pct = (count / len(analysis_data)) * 100
        print(f"       • {category}: {count} obs ({pct:.1f}%), {papers} papers")

    # Warning for imbalanced categories
    if info['min_count'] < 3:
        print(f"     ⚠️  Warning: Some categories have very few observations")

# --- STEP 3: CREATE ANALYSIS TYPE SELECTION ---
print("\n" + "="*70)
print("STEP 3: CREATING INTERACTIVE CONFIGURATION")
print("="*70)

print(f"\n🎨 Building interactive widgets...")

# Analysis type selection
analysis_type_widget = widgets.RadioButtons(
    options=[
        ('Single-Factor Subgroup Analysis', 'single'),
        ('Two-Factor Subgroup Analysis (Interaction)', 'two_way')
    ],
    value='single',
    description='Analysis Type:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='600px')
)

# Info panel for analysis types
analysis_type_info = {
    'single': f"""
    <div style='background-color: #e7f3ff; padding: 15px; border-radius: 8px; margin-top: 10px; border-left: 4px solid #0066cc;'>
        <h4 style='margin-top: 0; color: #0066cc;'>📊 Single-Factor Subgroup Analysis</h4>

        <p><b>Purpose:</b> Test if effect size varies across levels of <b>ONE</b> moderator variable</p>

        <p><b>Example Question:</b></p>
        <ul>
            <li>Does the treatment effect differ between Blood Feeders vs. Herbivores?</li>
            <li>Is the effect larger for JH Addition vs. JH Inhibition?</li>
        </ul>

        <p><b>Statistical Output:</b></p>
        <ul>
            <li>Pooled effect size for each subgroup (with 95% CI)</li>
            <li>Test for differences between subgroups (Q<sub>between</sub> test)</li>
            <li>Heterogeneity within each subgroup (Q<sub>within</sub>, I²)</li>
            <li>Proportion of heterogeneity explained by moderator (R²)</li>
        </ul>

        <p><b>Best For:</b></p>
        <ul>
            <li>Exploring one main source of variation</li>
            <li>When you have a primary moderator hypothesis</li>
            <li>Datasets with 10+ observations per subgroup</li>
        </ul>

        <p><b>Current Dataset:</b> {k_total} total observations</p>
    </div>
    """,
    'two_way': f"""
    <div style='background-color: #fff3cd; padding: 15px; border-radius: 8px; margin-top: 10px; border-left: 4px solid #ff9800;'>
        <h4 style='margin-top: 0; color: #856404;'>📊 Two-Factor Subgroup Analysis (Interaction)</h4>

        <p><b>Purpose:</b> Test if effect size varies across combinations of <b>TWO</b> moderator variables</p>

        <p><b>Example Question:</b></p>
        <ul>
            <li>Is the effect of treatment type (JH Addition vs. Inhibition) different for Blood Feeders vs. Herbivores?</li>
            <li>Does the combination of diet type and treatment method influence effect size?</li>
        </ul>

        <p><b>Statistical Output:</b></p>
        <ul>
            <li>Pooled effect for each combination (e.g., Blood Feeders × JH Addition)</li>
            <li>Test for overall differences across all combinations</li>
            <li>Main effect of each factor</li>
            <li>Interaction test (does Factor 1 effect depend on Factor 2?)</li>
        </ul>

        <p><b>Best For:</b></p>
        <ul>
            <li>Testing interaction effects between two variables</li>
            <li>When effect of one moderator may depend on another</li>
            <li>Datasets with sufficient observations per combination</li>
        </ul>

        <p><b>⚠️ Requirements:</b></p>
        <ul>
            <li>Minimum 3-5 studies per combination cell</li>
            <li>Ideally 20+ total observations</li>
            <li>Balanced or near-balanced design preferred</li>
        </ul>

        <p><b>Current Dataset:</b> {k_total} total observations → check distribution carefully!</p>
    </div>
    """
}

analysis_type_output = widgets.Output()

def update_analysis_type_info(change):
    """Update info panel when analysis type changes"""
    with analysis_type_output:
        clear_output()
        display(HTML(analysis_type_info[change['new']]))

        # Update visibility of second moderator selector
        if change['new'] == 'single':
            moderator2_container.layout.visibility = 'hidden'
            moderator2_container.layout.display = 'none'
        else:
            moderator2_container.layout.visibility = 'visible'
            moderator2_container.layout.display = 'block'

analysis_type_widget.observe(update_analysis_type_info, names='value')

# Initialize with default
with analysis_type_output:
    display(HTML(analysis_type_info['single']))

# --- STEP 4: CREATE MODERATOR SELECTION WIDGETS ---
print(f"  ✓ Analysis type selector created")

moderator1_label = widgets.HTML(
    "<h4 style='color: #2E86AB; margin-bottom: 5px;'>🔍 Select Moderator Variable(s)</h4>"
    "<p style='margin-top: 0; color: #666;'><i>Choose categorical variables to explore sources of heterogeneity</i></p>"
)

moderator1_widget = widgets.Dropdown(
    options=available_moderators,
    value=available_moderators[0],
    description='Moderator 1:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='600px')
)

# Moderator 2 (only for two-way analysis)
moderator2_widget = widgets.Dropdown(
    options=['None'] + available_moderators,
    value='None',
    description='Moderator 2:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='600px')
)

moderator2_container = widgets.VBox([moderator2_widget])
moderator2_container.layout.visibility = 'hidden'
moderator2_container.layout.display = 'none'

print(f"  ✓ Moderator selectors created")

# Preview of selected moderator(s)
preview_output = widgets.Output()

def update_moderator_preview(change=None):
    """Show preview of selected moderator(s)"""
    with preview_output:
        clear_output()

        mod1 = moderator1_widget.value
        mod2 = moderator2_widget.value if analysis_type_widget.value == 'two_way' else None

        print("\n" + "="*70)
        print("MODERATOR SELECTION PREVIEW")
        print("="*70)

        # Moderator 1 info
        print(f"\n📊 Moderator 1: {mod1}")
        mod1_counts = analysis_data[mod1].value_counts().sort_index()

        print(f"\n  Distribution:")
        print(f"  {'Category':<30} {'Observations':>15} {'Papers':>10} {'Percent':>10}")
        print(f"  {'-'*30} {'-'*15} {'-'*10} {'-'*10}")

        for category, count in mod1_counts.items():
            papers = analysis_data[analysis_data[mod1] == category]['id'].nunique()
            pct = (count / len(analysis_data)) * 100
            print(f"  {str(category):<30} {count:>15} {papers:>10} {pct:>9.1f}%")

        print(f"  {'-'*30} {'-'*15} {'-'*10} {'-'*10}")
        print(f"  {'TOTAL':<30} {len(analysis_data):>15} {analysis_data['id'].nunique():>10} {'100.0':>9}%")

        # Check for adequate sample sizes
        min_group = mod1_counts.min()
        if min_group < 5:
            print(f"\n  ⚠️  WARNING: Smallest group has only {min_group} observations")
            print(f"     Consider raising minimum thresholds or combining categories")
        else:
            print(f"\n  ✓ All groups have ≥ 5 observations")

        # Moderator 2 info (if two-way)
        if mod2 and mod2 != 'None':
            print(f"\n{'─'*70}")
            print(f"📊 Moderator 2: {mod2}")
            mod2_counts = analysis_data[mod2].value_counts().sort_index()

            print(f"\n  Distribution:")
            print(f"  {'Category':<30} {'Observations':>15} {'Papers':>10} {'Percent':>10}")
            print(f"  {'-'*30} {'-'*15} {'-'*10} {'-'*10}")

            for category, count in mod2_counts.items():
                papers = analysis_data[analysis_data[mod2] == category]['id'].nunique()
                pct = (count / len(analysis_data)) * 100
                print(f"  {str(category):<30} {count:>15} {papers:>10} {pct:>9.1f}%")

            # Show combination matrix
            print(f"\n{'─'*70}")
            print(f"📊 Combination Matrix: {mod1} × {mod2}")
            print(f"\n  Number of observations in each combination:\n")

            crosstab = pd.crosstab(
                analysis_data[mod1],
                analysis_data[mod2],
                margins=True,
                margins_name='Total'
            )
            print(crosstab.to_string())

            # Detailed cell analysis
            print(f"\n  📋 Cell-by-Cell Analysis:")
            for cat1 in mod1_counts.index:
                for cat2 in mod2_counts.index:
                    cell_data = analysis_data[(analysis_data[mod1] == cat1) & (analysis_data[mod2] == cat2)]
                    n_obs = len(cell_data)
                    n_papers = cell_data['id'].nunique()

                    if n_obs > 0:
                        status = "✓" if n_obs >= 5 else "⚠️"
                        print(f"    {status} {cat1} × {cat2}: {n_obs} obs, {n_papers} papers")

            # Warnings for small cells
            min_cell = crosstab.iloc[:-1, :-1].min().min()
            if min_cell == 0:
                print(f"\n  🔴 ERROR: Some combinations have ZERO observations!")
                print(f"     Two-way analysis not possible with empty cells")
                print(f"     Recommendation: Use single-factor analysis")
            elif min_cell < 3:
                print(f"\n  ⚠️  WARNING: Some combinations have very few observations (min = {min_cell})")
                print(f"     Recommendations:")
                print(f"       1. Increase minimum thresholds")
                print(f"       2. Consider combining categories")
                print(f"       3. Use single-factor analysis instead")
            elif min_cell < 5:
                print(f"\n  ⚠️  CAUTION: Some combinations have limited observations (min = {min_cell})")
                print(f"     Results may be unstable for small groups")
            else:
                print(f"\n  ✓ All combinations have ≥ 5 observations")

# Attach observers
moderator1_widget.observe(update_moderator_preview, names='value')
moderator2_widget.observe(update_moderator_preview, names='value')
analysis_type_widget.observe(lambda change: update_moderator_preview(), names='value')

print(f"  ✓ Preview function configured")

# Initialize preview
update_moderator_preview()

# Continue to Part 2...
# --- STEP 5: MINIMUM THRESHOLDS ---
print("\n" + "="*70)
print("STEP 4: QUALITY THRESHOLD CONFIGURATION")
print("="*70)

thresholds_label = widgets.HTML(
    "<h4 style='color: #2E86AB; margin-bottom: 5px;'>⚙️ Quality Thresholds</h4>"
    "<p style='margin-top: 0; color: #666;'><i>Subgroups not meeting these criteria will be excluded from analysis</i></p>"
)

thresholds_desc = widgets.HTML("""
    <div style='background-color: #f8f9fa; padding: 12px; border-radius: 5px; margin-bottom: 10px;'>
        <p style='margin: 0;'><b>Purpose:</b> Ensure each subgroup has sufficient data for reliable estimation</p>
        <ul style='margin: 5px 0;'>
            <li><b>Min Papers:</b> Accounts for multiple observations from same study</li>
            <li><b>Min Observations:</b> Total data points needed for stable estimates</li>
        </ul>
        <p style='margin: 0;'><b>Recommendation:</b> Higher thresholds = more reliable but fewer subgroups</p>
    </div>
""")

min_papers_subgroup = widgets.IntSlider(
    value=3,
    min=1,
    max=10,
    step=1,
    description='Min Papers/Group:',
    style={'description_width': '150px'},
    layout=widgets.Layout(width='550px')
)

min_obs_subgroup = widgets.IntSlider(
    value=5,
    min=2,
    max=20,
    step=1,
    description='Min Observations/Group:',
    style={'description_width': '150px'},
    layout=widgets.Layout(width='550px')
)

# Dynamic threshold feedback
threshold_feedback = widgets.Output()

def update_threshold_feedback(change=None):
    """Show impact of current thresholds"""
    with threshold_feedback:
        clear_output()

        min_papers = min_papers_subgroup.value
        min_obs = min_obs_subgroup.value
        mod1 = moderator1_widget.value

        print("\n📊 Impact Analysis:")
        print(f"  Current thresholds: ≥{min_papers} papers AND ≥{min_obs} observations")
        print(f"\n  Checking subgroups in '{mod1}'...")

        # Check which subgroups meet criteria
        groups_meeting_criteria = []
        groups_failing_criteria = []

        for category in analysis_data[mod1].dropna().unique():
            group_data = analysis_data[analysis_data[mod1] == category]
            n_papers = group_data['id'].nunique()
            n_obs = len(group_data)

            if n_papers >= min_papers and n_obs >= min_obs:
                groups_meeting_criteria.append((category, n_obs, n_papers))
            else:
                reason = []
                if n_papers < min_papers:
                    reason.append(f"papers: {n_papers}<{min_papers}")
                if n_obs < min_obs:
                    reason.append(f"obs: {n_obs}<{min_obs}")
                groups_failing_criteria.append((category, n_obs, n_papers, ", ".join(reason)))

        print(f"\n  ✓ Groups meeting criteria: {len(groups_meeting_criteria)}")
        for cat, obs, papers in groups_meeting_criteria:
            print(f"    • {cat}: {obs} obs, {papers} papers")

        if groups_failing_criteria:
            print(f"\n  ✗ Groups excluded: {len(groups_failing_criteria)}")
            for cat, obs, papers, reason in groups_failing_criteria:
                print(f"    • {cat}: {obs} obs, {papers} papers (excluded: {reason})")

        # Overall assessment
        if len(groups_meeting_criteria) < 2:
            print(f"\n  🔴 ERROR: Need at least 2 groups for subgroup analysis!")
            print(f"     Current thresholds too strict - please lower them")
        elif len(groups_meeting_criteria) == 2:
            print(f"\n  ⚠️  WARNING: Only 2 groups available")
            print(f"     Analysis will be limited to comparing these two groups")
        else:
            print(f"\n  ✓ {len(groups_meeting_criteria)} groups available for analysis")

        # Calculate total retained data
        total_retained_obs = sum(obs for _, obs, _ in groups_meeting_criteria)
        retention_rate = (total_retained_obs / len(analysis_data)) * 100

        print(f"\n  📈 Data Retention:")
        print(f"     Observations retained: {total_retained_obs}/{len(analysis_data)} ({retention_rate:.1f}%)")

        if retention_rate < 50:
            print(f"     ⚠️  Less than 50% of data retained - consider lowering thresholds")
        elif retention_rate < 75:
            print(f"     ⚠️  Moderate data loss - verify this is acceptable")
        else:
            print(f"     ✓ Good data retention")

# Attach observers to thresholds
min_papers_subgroup.observe(update_threshold_feedback, names='value')
min_obs_subgroup.observe(update_threshold_feedback, names='value')
moderator1_widget.observe(update_threshold_feedback, names='value')

print(f"  ✓ Threshold widgets created")

# Initialize threshold feedback
update_threshold_feedback()

# --- STEP 6: RUN ANALYSIS BUTTON ---
print("\n" + "="*70)
print("STEP 5: CREATING RUN BUTTON")
print("="*70)

run_button = widgets.Button(
    description='▶ Run Subgroup Analysis',
    button_style='success',
    layout=widgets.Layout(width='450px', height='50px'),
    style={'font_weight': 'bold', 'font_size': '14px'}
)

run_output = widgets.Output()

def on_run_button_clicked(b):
    """Save configuration and prepare for analysis"""
    with run_output:
        clear_output()

        print("\n" + "="*70)
        print("VALIDATING CONFIGURATION")
        print("="*70)

        # Get selections
        analysis_type = analysis_type_widget.value
        moderator1 = moderator1_widget.value
        moderator2 = moderator2_widget.value if analysis_type == 'two_way' and moderator2_widget.value != 'None' else None
        min_papers = min_papers_subgroup.value
        min_obs = min_obs_subgroup.value

        # --- Validation Checks ---
        validation_errors = []
        validation_warnings = []

        # Check 1: Two-way analysis requires moderator 2
        if analysis_type == 'two_way' and not moderator2:
            validation_errors.append("Two-way analysis requires selecting Moderator 2")

        # Check 2: Moderators cannot be the same
        if moderator1 == moderator2:
            validation_errors.append("Moderator 1 and Moderator 2 cannot be the same variable")

        # Check 3: At least 2 groups must meet criteria
        groups_meeting_criteria = 0
        valid_groups_list = []

        if analysis_type == 'single':
            for category in analysis_data[moderator1].dropna().unique():
                group_data = analysis_data[analysis_data[moderator1] == category]
                n_papers = group_data['id'].nunique()
                n_obs = len(group_data)
                if n_papers >= min_papers and n_obs >= min_obs:
                    groups_meeting_criteria += 1
                    valid_groups_list.append(category)
        else:
            # Two-way analysis - check each combination
            for cat1 in analysis_data[moderator1].dropna().unique():
                for cat2 in analysis_data[moderator2].dropna().unique():
                    cell_data = analysis_data[(analysis_data[moderator1] == cat1) &
                                             (analysis_data[moderator2] == cat2)]
                    n_papers = cell_data['id'].nunique()
                    n_obs = len(cell_data)
                    if n_papers >= min_papers and n_obs >= min_obs:
                        groups_meeting_criteria += 1
                        valid_groups_list.append((cat1, cat2))

        if groups_meeting_criteria < 2:
            validation_errors.append(f"Only {groups_meeting_criteria} group(s) meet criteria. Need at least 2 groups for subgroup analysis. Lower thresholds or choose different moderator.")

        # Check 4: For two-way, check for empty cells (WARNING, not ERROR)
        if analysis_type == 'two_way' and moderator2:
            crosstab = pd.crosstab(analysis_data[moderator1], analysis_data[moderator2])
            n_empty_cells = (crosstab == 0).sum().sum()
            total_cells = crosstab.shape[0] * crosstab.shape[1]

            if n_empty_cells > 0:
                validation_warnings.append(
                    f"{n_empty_cells}/{total_cells} combinations have zero observations. "
                    f"These empty cells will be automatically excluded from analysis. "
                    f"Proceeding with {groups_meeting_criteria} valid combinations."
                )

            # Check for very small cells
            min_cell = crosstab[crosstab > 0].min().min() if (crosstab > 0).any().any() else 0
            if min_cell > 0 and min_cell < 3:
                validation_warnings.append(
                    f"Some combinations have very few observations (minimum = {min_cell}). "
                    f"Results for these groups may be unstable."
                )

        # Check 5: Sufficient overall sample size
        if len(analysis_data) < 10:
            validation_warnings.append(
                f"Limited total sample size ({len(analysis_data)} observations). "
                f"Subgroup analysis may be underpowered."
            )

        # Display validation results
        if validation_errors:
            print("\n❌ VALIDATION FAILED")
            print("\nErrors that must be fixed:")
            for i, error in enumerate(validation_errors, 1):
                print(f"  {i}. {error}")
            print("\n⚠️  Please fix the errors above and try again")
            return

        if validation_warnings:
            print("\n⚠️  VALIDATION WARNINGS")
            print("\nWarnings (analysis will proceed, but be cautious):")
            for i, warning in enumerate(validation_warnings, 1):
                print(f"  {i}. {warning}")
            print("\n✓ Analysis can proceed - empty cells will be automatically excluded")

        # --- Configuration Summary ---
        print("\n" + "="*70)
        print("✓ VALIDATION PASSED - CONFIGURATION SAVED")
        print("="*70)

        print(f"\n📋 Subgroup Analysis Configuration:")
        print(f"  {'Parameter':<30} {'Value':<40}")
        print(f"  {'-'*30} {'-'*40}")
        print(f"  {'Analysis Type':<30} {analysis_type:<40}")
        print(f"  {'Primary Moderator':<30} {moderator1:<40}")

        if moderator2:
            print(f"  {'Secondary Moderator':<30} {moderator2:<40}")

        print(f"  {'Min Papers per Group':<30} {min_papers:<40}")
        print(f"  {'Min Observations per Group':<30} {min_obs:<40}")
        print(f"  {'Valid Groups/Combinations':<30} {groups_meeting_criteria:<40}")

        # Calculate expected data retention
        if analysis_type == 'single':
            retained_data = analysis_data[analysis_data[moderator1].isin(valid_groups_list)].copy()
        else:
            retained_data = analysis_data[
                analysis_data.apply(
                    lambda row: (row[moderator1], row[moderator2]) in valid_groups_list,
                    axis=1
                )
            ].copy()

        retention_pct = (len(retained_data) / len(analysis_data)) * 100
        print(f"  {'Data Retained':<30} {len(retained_data)}/{len(analysis_data)} ({retention_pct:.1f}%)")

        # Show which groups will be included
        if analysis_type == 'two_way' and n_empty_cells > 0:
            print(f"\n📊 Valid Combinations to be Analyzed:")
            for i, (cat1, cat2) in enumerate(valid_groups_list, 1):
                cell_data = analysis_data[(analysis_data[moderator1] == cat1) &
                                         (analysis_data[moderator2] == cat2)]
                print(f"  {i}. {cat1} × {cat2}: k={len(cell_data)}, papers={cell_data['id'].nunique()}")

        # Save to config
        ANALYSIS_CONFIG['subgroup_config'] = {
            'timestamp': datetime.datetime.now(),
            'analysis_type': analysis_type,
            'moderator1': moderator1,
            'moderator2': moderator2,
            'min_papers': min_papers,
            'min_obs': min_obs,
            'expected_groups': groups_meeting_criteria,
            'valid_groups_list': valid_groups_list,  # NEW: Store valid groups
            'data_retained': len(retained_data),
            'retention_pct': retention_pct,
            'has_empty_cells': n_empty_cells > 0 if analysis_type == 'two_way' else False,
            'n_empty_cells': n_empty_cells if analysis_type == 'two_way' else 0
        }

        # Save moderator information
        ANALYSIS_CONFIG['subgroup_config']['moderator1_info'] = {
            'name': moderator1,
            'n_categories': analysis_data[moderator1].nunique(),
            'categories': sorted(analysis_data[moderator1].dropna().unique().tolist())
        }

        if moderator2:
            ANALYSIS_CONFIG['subgroup_config']['moderator2_info'] = {
                'name': moderator2,
                'n_categories': analysis_data[moderator2].nunique(),
                'categories': sorted(analysis_data[moderator2].dropna().unique().tolist())
            }

        print(f"\n" + "="*70)
        print("✓ CONFIGURATION SAVED SUCCESSFULLY")
        print("="*70)

        print(f"\n📊 Configuration saved to: ANALYSIS_CONFIG['subgroup_config']")

        print(f"\n▶️  Next Steps:")
        print(f"  1. Review the configuration summary above")
        if validation_warnings:
            print(f"  2. Note the warnings - empty combinations will be excluded automatically")
            print(f"  3. Run the next cell to perform subgroup analysis")
        else:
            print(f"  2. Run the next cell to perform subgroup analysis")
        print(f"  4. Results will include:")
        if analysis_type == 'single':
            print(f"     • Pooled effects for each subgroup")
            print(f"     • Test for between-group differences (Q-test)")
            print(f"     • Within-group heterogeneity (I²)")
            print(f"     • Proportion of heterogeneity explained (R²)")
        else:
            print(f"     • Pooled effects for {groups_meeting_criteria} valid combinations")
            print(f"     • Main effects and interaction tests")
            print(f"     • Heterogeneity decomposition")
            if n_empty_cells > 0:
                print(f"     • Note: {n_empty_cells} empty combinations automatically excluded")

        print("\n" + "="*70)

run_button.on_click(on_run_button_clicked)

print(f"  ✓ Run button configured with validation")

# --- STEP 7: ASSEMBLE WIDGET LAYOUT ---
print("\n" + "="*70)
print("STEP 6: ASSEMBLING WIDGET INTERFACE")
print("="*70)

widget_layout = widgets.VBox([
    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),

    # Analysis Type Section
    widgets.HTML("<h3 style='color: #2E86AB;'>1️⃣ Select Analysis Type</h3>"),
    analysis_type_widget,
    analysis_type_output,

    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),

    # Moderator Selection Section
    widgets.HTML("<h3 style='color: #2E86AB;'>2️⃣ Select Moderator Variable(s)</h3>"),
    moderator1_label,
    moderator1_widget,
    moderator2_container,
    preview_output,

    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),

    # Threshold Section
    widgets.HTML("<h3 style='color: #2E86AB;'>3️⃣ Set Quality Thresholds</h3>"),
    thresholds_label,
    thresholds_desc,
    min_papers_subgroup,
    min_obs_subgroup,
    threshold_feedback,

    widgets.HTML("<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"),

    # Run Button Section
    widgets.HTML("<h3 style='color: #2E86AB;'>4️⃣ Run Analysis</h3>"),
    widgets.HTML("<p style='color: #666;'><i>Review your configuration above, then click the button to proceed</i></p>"),
    run_button,
    run_output
])

print(f"  ✓ Widget layout assembled")

# Display widgets
display(widget_layout)

print(f"\n✓ Interactive interface displayed")

# --- FINAL STATUS ---
print("\n" + "="*70)
print("✅ SUBGROUP ANALYSIS CONFIGURATION READY")
print("="*70)

print(f"\n📊 Configuration Summary:")
print(f"  • Available moderators: {len(available_moderators)}")
print(f"  • Total observations: {k_total}")
print(f"  • Unique papers: {k_papers}")
print(f"  • Overall heterogeneity (I²): {I_squared_overall:.2f}%")

if I_squared_overall > 50:
    print(f"\n  🔴 High heterogeneity detected - subgroup analysis highly recommended")
    print(f"     Explore which moderators explain the variation between studies")
elif I_squared_overall > 25:
    print(f"\n  🟡 Moderate heterogeneity - subgroup analysis may be informative")
else:
    print(f"\n  🟢 Low heterogeneity - subgroup analysis exploratory")

print(f"\n👆 INSTRUCTIONS:")
print(f"  1. Select analysis type (single-factor or two-factor)")
print(f"  2. Choose moderator variable(s) from the dropdown(s)")
print(f"  3. Review the distribution preview")
print(f"  4. Adjust quality thresholds if needed")
print(f"  5. Click '▶ Run Subgroup Analysis' button")
print(f"  6. After validation, proceed to next cell for results")

print(f"\n💡 Tips:")
print(f"  • Start with single-factor analysis to identify main moderators")
print(f"  • Use two-factor analysis to test interactions")
print(f"  • Higher thresholds = more reliable but fewer groups")
print(f"  • Check distribution preview for balance and sample sizes")

print("\n" + "="*70)

# Store configuration metadata
SUBGROUP_CONFIG_METADATA = {
    'timestamp': datetime.datetime.now(),
    'available_moderators': available_moderators,
    'moderator_info': moderator_info,
    'total_observations': k_total,
    'total_papers': k_papers,
    'overall_heterogeneity_I2': I_squared_overall,
    'interface_created': True
}

print(f"\n📊 Metadata saved to SUBGROUP_CONFIG_METADATA")


SUBGROUP ANALYSIS CONFIGURATION
Timestamp: 2025-11-14 18:01:37

STEP 1: VERIFYING PREREQUISITES
✓ Overall analysis results loaded successfully
  Effect size: Hedges' g (g)
  Effect column: hedges_g
  Overall Q statistic: 475.6386
  Overall I²: 85.70%

📊 Dataset Summary:
  • Total observations: 69
  • Unique papers: 23
  • Avg obs per paper: 3.00

✓ Adequate data for subgroup analysis (69 observations)

STEP 2: IDENTIFYING MODERATOR VARIABLES

🔍 Scanning dataset for potential moderator variables...

📋 Available Moderator Variables: 11

Variable                    Categories    Missing           Range
------------------------- ------------ ---------- ---------------
Inoculation                          2          0     17-52    
Conditions                           1          0     69-69    
Crop                                 6          0      1-34    
Bacteria                             3          0     12-33    
kgPot                               11          0      1-21    
plants

VBox(children=(HTML(value="<hr style='margin: 20px 0; border: none; border-top: 2px solid #ddd;'>"), HTML(valu…


✓ Interactive interface displayed

✅ SUBGROUP ANALYSIS CONFIGURATION READY

📊 Configuration Summary:
  • Available moderators: 11
  • Total observations: 69
  • Unique papers: 23
  • Overall heterogeneity (I²): 85.70%

  🔴 High heterogeneity detected - subgroup analysis highly recommended
     Explore which moderators explain the variation between studies

👆 INSTRUCTIONS:
  1. Select analysis type (single-factor or two-factor)
  2. Choose moderator variable(s) from the dropdown(s)
  3. Review the distribution preview
  4. Adjust quality thresholds if needed
  5. Click '▶ Run Subgroup Analysis' button
  6. After validation, proceed to next cell for results

💡 Tips:
  • Start with single-factor analysis to identify main moderators
  • Use two-factor analysis to test interactions
  • Higher thresholds = more reliable but fewer groups
  • Check distribution preview for balance and sample sizes


📊 Metadata saved to SUBGROUP_CONFIG_METADATA


In [None]:
#@title 🔬 PERFORM SUBGROUP ANALYSIS

# =============================================================================
# CELL 8: SUBGROUP ANALYSIS EXECUTION
# Purpose: Calculate pooled effects for each subgroup and test for differences
# Dependencies: Cell 7 (subgroup_config, analysis_data)
# Outputs: Subgroup results, heterogeneity partitioning, ANALYSIS_CONFIG['subgroup_results']
# =============================================================================

print("\n" + "="*70)
print("SUBGROUP ANALYSIS EXECUTION")
print("="*70)
print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")

# --- STEP 1: LOAD CONFIGURATION ---
print("\n" + "="*70)
print("STEP 1: LOADING CONFIGURATION")
print("="*70)

try:
    subgroup_config = ANALYSIS_CONFIG['subgroup_config']
    effect_col = ANALYSIS_CONFIG['effect_col']
    var_col = ANALYSIS_CONFIG['var_col']
    se_col = ANALYSIS_CONFIG['se_col']
    es_config = ANALYSIS_CONFIG['es_config']
    overall_results = ANALYSIS_CONFIG['overall_results']
    Qt_overall = overall_results['Qt']
    k_overall = overall_results['k']
    I_squared_overall = overall_results['I_squared']

    analysis_type = subgroup_config['analysis_type']
    moderator1 = subgroup_config['moderator1']
    moderator2 = subgroup_config['moderator2']
    min_papers = subgroup_config['min_papers']
    min_obs = subgroup_config['min_obs']

    print(f"✓ Configuration loaded successfully")
    print(f"\n  📊 Analysis Parameters:")
    print(f"     Type: {analysis_type.upper()}")
    print(f"     Primary moderator: {moderator1}")
    if moderator2:
        print(f"     Secondary moderator: {moderator2}")
    print(f"     Min papers/group: {min_papers}")
    print(f"     Min observations/group: {min_obs}")

    print(f"\n  📊 Overall Meta-Analysis Context:")
    print(f"     Total observations: {k_overall}")
    print(f"     Overall Q: {Qt_overall:.4f}")
    print(f"     Overall I²: {I_squared_overall:.2f}%")

except KeyError as e:
    print(f"❌ ERROR: Configuration not found - {e}")
    print("\nTroubleshooting:")
    print("  1. Ensure Cell 7 (subgroup configuration) was run successfully")
    print("  2. Check that you clicked 'Run Subgroup Analysis' button")
    print("  3. Verify ANALYSIS_CONFIG['subgroup_config'] exists")
    raise

# --- STEP 2: PREPARE GROUPING VARIABLES ---
print("\n" + "="*70)
print("STEP 2: PREPARING DATA FOR SUBGROUP ANALYSIS")
print("="*70)

if analysis_type == 'single':
    grouping_cols = [moderator1]
    analysis_label = f"by {moderator1}"
    print(f"\n📊 Single-Factor Analysis: {analysis_label}")
else:  # two_way
    grouping_cols = [moderator1, moderator2]
    analysis_label = f"by {moderator1} × {moderator2}"
    print(f"\n📊 Two-Factor Analysis: {analysis_label}")

# Ensure grouping columns are strings and clean
for col in grouping_cols:
    analysis_data[col] = analysis_data[col].astype(str).str.strip()
    print(f"  ✓ {col} prepared ({analysis_data[col].nunique()} unique values)")

# --- STEP 3: IDENTIFY AND FILTER SUBGROUPS ---
print("\n" + "="*70)
print("STEP 3: IDENTIFYING VALID SUBGROUPS")
print("="*70)

print(f"\n🔍 Calculating group statistics...")

# Group data and calculate statistics
group_stats = analysis_data.groupby(grouping_cols).agg(
    n_obs=('id', 'size'),
    n_papers=('id', 'nunique')
).reset_index()

print(f"  ✓ Found {len(group_stats)} potential subgroups")

# Apply quality filters
groups_to_keep = []
groups_discarded = []

print(f"\n📋 Applying quality thresholds:")
print(f"  • Minimum papers: {min_papers}")
print(f"  • Minimum observations: {min_obs}")

for _, row in group_stats.iterrows():
    if analysis_type == 'single':
        group_name = row[moderator1]
        group_tuple = (row[moderator1],)
    else:
        group_name = f"{row[moderator1]} × {row[moderator2]}"
        group_tuple = (row[moderator1], row[moderator2])

    n_papers = row['n_papers']
    n_obs = row['n_obs']

    # Check against thresholds
    if n_papers < min_papers:
        groups_discarded.append({
            'group': group_name,
            'reason': f'Insufficient papers ({n_papers} < {min_papers})',
            'n_papers': n_papers,
            'n_obs': n_obs
        })
    elif n_obs < min_obs:
        groups_discarded.append({
            'group': group_name,
            'reason': f'Insufficient observations ({n_obs} < {min_obs})',
            'n_papers': n_papers,
            'n_obs': n_obs
        })
    else:
        groups_to_keep.append(group_tuple)

# Filter dataset to include only valid groups
if analysis_type == 'single':
    analysis_data_subgroups = analysis_data[
        analysis_data[moderator1].isin([g[0] for g in groups_to_keep])
    ].copy()
else:
    analysis_data_subgroups = analysis_data[
        analysis_data.apply(lambda row: (row[moderator1], row[moderator2]) in groups_to_keep, axis=1)
    ].copy()

# Report filtering results
print(f"\n📊 Filtering Results:")
print(f"  ✓ Groups retained: {len(groups_to_keep)}")
print(f"  ✓ Total observations: {len(analysis_data_subgroups)}")
print(f"  ✓ Retention rate: {(len(analysis_data_subgroups)/len(analysis_data)*100):.1f}%")

if groups_discarded:
    print(f"\n  ⚠️  Groups excluded: {len(groups_discarded)}")
    print(f"\n  {'Group':<40} {'Observations':>15} {'Papers':>10} {'Reason':<40}")
    print(f"  {'-'*40} {'-'*15} {'-'*10} {'-'*40}")
    for g in groups_discarded:
        print(f"  {g['group']:<40} {g['n_obs']:>15} {g['n_papers']:>10} {g['reason']:<40}")
else:
    print(f"\n  ✓ No groups excluded (all meet quality criteria)")

# Validation check
if len(groups_to_keep) < 2:
    print(f"\n❌ ERROR: Need at least 2 groups for subgroup analysis")
    print(f"   Currently have {len(groups_to_keep)} valid group(s)")
    print(f"   Consider lowering quality thresholds or choosing different moderator")
    raise ValueError("Insufficient groups for subgroup analysis")

# --- STEP 4: ANALYZE EACH SUBGROUP ---
print("\n" + "="*70)
print("STEP 4: CALCULATING SUBGROUP-SPECIFIC EFFECTS")
print("="*70)

print(f"\n🔬 Performing meta-analysis within each subgroup...")

# Statistical parameters
alpha = 0.05
z_crit = norm.ppf(1 - alpha / 2)  # 1.96 for 95% CI

# Storage for results
subgroup_results = []

# Group data
grouped = analysis_data_subgroups.groupby(grouping_cols)

# Analyze each subgroup
for group_idx, (group_values, group_data) in enumerate(grouped, 1):
    # --- Group Identification ---
    if analysis_type == 'single':
        group_name = group_values  # For single factor, group_values is just the value
        print(f"\n{'─'*70}")
        print(f"SUBGROUP {group_idx}/{len(groups_to_keep)}: {group_name}")
        print(f"{'─'*70}")
    else:
        group_name = f"{group_values[0]} × {group_values[1]}"
        print(f"\n{'─'*70}")
        print(f"SUBGROUP {group_idx}/{len(groups_to_keep)}: {group_name}")
        print(f"  • {moderator1}: {group_values[0]}")
        print(f"  • {moderator2}: {group_values[1]}")
        print(f"{'─'*70}")

    # Sample size information
    k_group = len(group_data)
    n_papers_group = group_data['id'].nunique()

    print(f"\n📊 Sample Size:")
    print(f"  • Observations (k): {k_group}")
    print(f"  • Unique papers: {n_papers_group}")
    print(f"  • Avg obs per paper: {k_group/n_papers_group:.2f}")

    # Effect size distribution in this group
    mean_effect = group_data[effect_col].mean()
    median_effect = group_data[effect_col].median()
    min_effect = group_data[effect_col].min()
    max_effect = group_data[effect_col].max()

    print(f"\n📈 Effect Size Distribution:")
    print(f"  • Mean: {mean_effect:.4f}")
    print(f"  • Median: {median_effect:.4f}")
    print(f"  • Range: [{min_effect:.4f}, {max_effect:.4f}]")

    # --- FIXED-EFFECTS MODEL ---
    print(f"\n🔢 Fixed-Effects Model:")

    sum_w_fixed_group = group_data['w_fixed'].sum()

    if sum_w_fixed_group <= 0:
        print(f"  ⚠️  WARNING: Non-positive weights, skipping this group")
        continue

    # Pooled effect
    pooled_effect_fe = (group_data['w_fixed'] * group_data[effect_col]).sum() / sum_w_fixed_group
    pooled_var_fe = 1 / sum_w_fixed_group
    pooled_se_fe = np.sqrt(pooled_var_fe)
    ci_lower_fe = pooled_effect_fe - z_crit * pooled_se_fe
    ci_upper_fe = pooled_effect_fe + z_crit * pooled_se_fe
    z_stat_fe = pooled_effect_fe / pooled_se_fe
    p_value_fe = 2 * (1 - norm.cdf(abs(z_stat_fe)))

    print(f"  • Pooled effect: {pooled_effect_fe:.4f}")
    print(f"  • 95% CI: [{ci_lower_fe:.4f}, {ci_upper_fe:.4f}]")
    print(f"  • SE: {pooled_se_fe:.4f}")
    print(f"  • Z: {z_stat_fe:.4f}")
    print(f"  • P-value: {p_value_fe:.4g} {'***' if p_value_fe < 0.001 else '**' if p_value_fe < 0.01 else '*' if p_value_fe < 0.05 else 'ns'}")

    # --- HETEROGENEITY WITHIN GROUP ---
    print(f"\n📊 Within-Group Heterogeneity:")

    Q_within = (group_data['w_fixed'] * (group_data[effect_col] - pooled_effect_fe)**2).sum()
    df_Q_group = k_group - 1

    print(f"  • Q_within: {Q_within:.4f}")
    print(f"  • df: {df_Q_group}")

    # P-value for Q
    if df_Q_group > 0:
        p_Q_within = 1 - chi2.cdf(Q_within, df_Q_group)
        print(f"  • P-value: {p_Q_within:.4g}")
    else:
        p_Q_within = np.nan
        print(f"  • P-value: N/A (k=1)")

    # I² within group
    if df_Q_group > 0 and Q_within > df_Q_group:
        I_squared_group = ((Q_within - df_Q_group) / Q_within) * 100
    else:
        I_squared_group = 0

    print(f"  • I²: {I_squared_group:.2f}%", end='')

    if I_squared_group < 25:
        print(f" (Low heterogeneity 🟢)")
    elif I_squared_group < 50:
        print(f" (Moderate heterogeneity 🟡)")
    elif I_squared_group < 75:
        print(f" (Substantial heterogeneity 🟠)")
    else:
        print(f" (Considerable heterogeneity 🔴)")

    # Tau² within group (DerSimonian-Laird method)
    if df_Q_group > 0:
        sum_w_sq = (group_data['w_fixed']**2).sum()
        C_group = sum_w_fixed_group - (sum_w_sq / sum_w_fixed_group)

        if C_group > 0 and Q_within > df_Q_group:
            tau_squared_group = (Q_within - df_Q_group) / C_group
        else:
            tau_squared_group = 0
    else:
        tau_squared_group = 0

    tau_group = np.sqrt(tau_squared_group)
    print(f"  • τ²: {tau_squared_group:.6f}")
    print(f"  • τ: {tau_group:.4f}")

    # --- RANDOM-EFFECTS MODEL ---
    print(f"\n🔢 Random-Effects Model:")

    if tau_squared_group >= 0:
        w_random_group = 1 / (group_data[var_col] + tau_squared_group)
        sum_w_random_group = w_random_group.sum()

        if sum_w_random_group > 0:
            pooled_effect_re = (w_random_group * group_data[effect_col]).sum() / sum_w_random_group
            pooled_var_re = 1 / sum_w_random_group
            pooled_se_re = np.sqrt(pooled_var_re)
            ci_lower_re = pooled_effect_re - z_crit * pooled_se_re
            ci_upper_re = pooled_effect_re + z_crit * pooled_se_re
            z_stat_re = pooled_effect_re / pooled_se_re
            p_value_re = 2 * (1 - norm.cdf(abs(z_stat_re)))

            print(f"  • Pooled effect: {pooled_effect_re:.4f}")
            print(f"  • 95% CI: [{ci_lower_re:.4f}, {ci_upper_re:.4f}]")
            print(f"  • SE: {pooled_se_re:.4f}")
            print(f"  • Z: {z_stat_re:.4f}")
            print(f"  • P-value: {p_value_re:.4g} {'***' if p_value_re < 0.001 else '**' if p_value_re < 0.01 else '*' if p_value_re < 0.05 else 'ns'}")
        else:
            pooled_effect_re = np.nan
            pooled_var_re = np.nan
            pooled_se_re = np.nan
            ci_lower_re = np.nan
            ci_upper_re = np.nan
            z_stat_re = np.nan
            p_value_re = np.nan
            print(f"  ⚠️  Could not calculate random-effects (non-positive weights)")
    else:
        pooled_effect_re = np.nan
        pooled_var_re = np.nan
        pooled_se_re = np.nan
        ci_lower_re = np.nan
        ci_upper_re = np.nan
        z_stat_re = np.nan
        p_value_re = np.nan
        print(f"  ⚠️  Could not calculate random-effects (negative τ²)")

    # --- BIOLOGICAL INTERPRETATION ---
    if es_config['has_fold_change'] and pd.notna(pooled_effect_re):
        print(f"\n📈 Biological Interpretation:")

        if ANALYSIS_CONFIG['effect_size_type'] == 'lnRR':
            RR_fe = np.exp(pooled_effect_fe)
            fold_fe = RR_fe if pooled_effect_fe >= 0 else -1/RR_fe
            pct_change_fe = (RR_fe - 1) * 100

            RR_re = np.exp(pooled_effect_re)
            fold_re = RR_re if pooled_effect_re >= 0 else -1/RR_re
            pct_change_re = (RR_re - 1) * 100

            print(f"  Fixed-Effects:")
            print(f"    • Response Ratio: {RR_fe:.3f}")
            print(f"    • Fold-change: {fold_fe:+.2f}×")
            print(f"    • Percent change: {pct_change_fe:+.1f}%")

            print(f"  Random-Effects:")
            print(f"    • Response Ratio: {RR_re:.3f}")
            print(f"    • Fold-change: {fold_re:+.2f}×")
            print(f"    • Percent change: {pct_change_re:+.1f}%")

            # Direction
            if pooled_effect_re > 0.05:
                direction = "UPREGULATION ↑"
            elif pooled_effect_re < -0.05:
                direction = "DOWNREGULATION ↓"
            else:
                direction = "NO CHANGE ─"
            print(f"\n  Direction: {direction}")

        elif ANALYSIS_CONFIG['effect_size_type'] == 'log_or':
            OR_fe = np.exp(pooled_effect_fe)
            OR_re = np.exp(pooled_effect_re)
            fold_fe = OR_fe
            fold_re = OR_re

            print(f"  Fixed-Effects:")
            print(f"    • Odds Ratio: {OR_fe:.3f}")

            print(f"  Random-Effects:")
            print(f"    • Odds Ratio: {OR_re:.3f}")

            if OR_re > 1:
                direction = "Positive association"
            elif OR_re < 1:
                direction = "Negative association"
            else:
                direction = "No association"
            print(f"\n  Interpretation: {direction}")
    else:
        fold_fe = np.nan
        fold_re = np.nan

    # --- STORE RESULTS ---
    result_dict = {
        'k': k_group,
        'n_papers': n_papers_group,
        # Fixed-effects
        'pooled_effect_fe': pooled_effect_fe,
        'pooled_se_fe': pooled_se_fe,
        'pooled_var_fe': pooled_var_fe,
        'ci_lower_fe': ci_lower_fe,
        'ci_upper_fe': ci_upper_fe,
        'z_stat_fe': z_stat_fe,
        'p_value_fe': p_value_fe,
        # Random-effects
        'pooled_effect_re': pooled_effect_re,
        'pooled_se_re': pooled_se_re,
        'pooled_var_re': pooled_var_re,
        'ci_lower_re': ci_lower_re,
        'ci_upper_re': ci_upper_re,
        'z_stat_re': z_stat_re,
        'p_value_re': p_value_re,
        # Heterogeneity
        'Q_within': Q_within,
        'df_Q': df_Q_group,
        'p_Q_within': p_Q_within,
        'I_squared': I_squared_group,
        'tau_squared': tau_squared_group,
        'tau': tau_group,
        # Fold-changes
        'fold_change_fe': fold_fe,
        'fold_change_re': fold_re,
        # Raw data stats
        'mean_effect_raw': mean_effect,
        'median_effect_raw': median_effect,
        'min_effect_raw': min_effect,
        'max_effect_raw': max_effect
    }

    # Add group identifiers
    if analysis_type == 'single':
        result_dict['group'] = group_name
        result_dict[moderator1] = group_name
    else:
        result_dict['group'] = group_name
        result_dict[moderator1] = group_values[0]
        result_dict[moderator2] = group_values[1]

    subgroup_results.append(result_dict)

# Continue to Part 2...
# --- STEP 5: CREATE RESULTS DATAFRAME ---
print("\n" + "="*70)
print("STEP 5: COMPILING RESULTS")
print("="*70)

# Create DataFrame from results
results_df = pd.DataFrame(subgroup_results)

# Sort results for presentation
if analysis_type == 'single':
    results_df = results_df.sort_values(moderator1).reset_index(drop=True)
    print(f"\n✓ Results compiled for {len(results_df)} subgroups")
    print(f"  Sorted by: {moderator1}")
else:
    results_df = results_df.sort_values([moderator1, moderator2]).reset_index(drop=True)
    print(f"\n✓ Results compiled for {len(results_df)} subgroups")
    print(f"  Sorted by: {moderator1}, then {moderator2}")

# --- STEP 6: HETEROGENEITY PARTITIONING ---
print("\n" + "="*70)
print("STEP 6: HETEROGENEITY PARTITIONING")
print("="*70)

print(f"\n📊 Decomposing overall heterogeneity into components...")

# Calculate Q_within (sum of within-subgroup Q values)
Qe = results_df['Q_within'].sum()

# Calculate Q_between (between-subgroup heterogeneity)
QM = Qt_overall - Qe
QM = max(0, QM)  # Ensure non-negative

# Degrees of freedom
M = len(results_df)  # Number of subgroups
df_QM = M - 1
df_Qe = k_overall - M

# Proportion of heterogeneity explained
if Qt_overall > 0:
    prop_explained = (QM / Qt_overall) * 100
    prop_residual = (Qe / Qt_overall) * 100
else:
    prop_explained = 0
    prop_residual = 0

# Calculate R² (proportion of total variance explained)
if Qt_overall > 0:
    R_squared = max(0, min(100, prop_explained))  # Bound between 0 and 100
else:
    R_squared = 0

print(f"\n📋 Heterogeneity Decomposition:")
print(f"  {'Component':<30} {'Q':>12} {'df':>8} {'% of Total':>12}")
print(f"  {'-'*30} {'-'*12} {'-'*8} {'-'*12}")
print(f"  {'Total (Q_T)':<30} {Qt_overall:>12.4f} {k_overall-1:>8} {'100.0':>11}%")
print(f"  {'Between-groups (Q_M)':<30} {QM:>12.4f} {df_QM:>8} {prop_explained:>11.1f}%")
print(f"  {'Within-groups (Q_E)':<30} {Qe:>12.4f} {df_Qe:>8} {prop_residual:>11.1f}%")

print(f"\n📊 Variance Explained:")
print(f"  • R² = {R_squared:.1f}%")
print(f"  • Interpretation: The moderator explains {R_squared:.1f}% of between-study heterogeneity")

# Interpretation of variance explained
if R_squared > 75:
    r2_interp = "Excellent - moderator accounts for most heterogeneity"
    r2_color = "🟢"
elif R_squared > 50:
    r2_interp = "Good - moderator is a major source of heterogeneity"
    r2_color = "🟢"
elif R_squared > 25:
    r2_interp = "Moderate - moderator partially explains heterogeneity"
    r2_color = "🟡"
else:
    r2_interp = "Low - other factors may be more important"
    r2_color = "🟠"

print(f"  {r2_color} {r2_interp}")

# --- STEP 7: TEST FOR SUBGROUP DIFFERENCES ---
print("\n" + "="*70)
print("STEP 7: TEST FOR SUBGROUP DIFFERENCES")
print("="*70)

print(f"\n🔬 Testing if effect sizes differ significantly across subgroups...")

if df_QM > 0:
    # P-value for Q_between test
    p_value_QM = 1 - chi2.cdf(QM, df_QM)

    print(f"\n📊 Q-Test for Moderator Effect:")
    print(f"  • Q_between (Q_M): {QM:.4f}")
    print(f"  • Degrees of freedom: {df_QM}")
    print(f"  • P-value: {p_value_QM:.4g}")

    # Significance interpretation
    if p_value_QM < 0.001:
        sig_level = "HIGHLY SIGNIFICANT (p < 0.001) ***"
        sig_color = "🔴"
        sig_interpretation = "Very strong evidence that subgroups differ"
    elif p_value_QM < 0.01:
        sig_level = "VERY SIGNIFICANT (p < 0.01) **"
        sig_color = "🟠"
        sig_interpretation = "Strong evidence that subgroups differ"
    elif p_value_QM < 0.05:
        sig_level = "SIGNIFICANT (p < 0.05) *"
        sig_color = "🟡"
        sig_interpretation = "Moderate evidence that subgroups differ"
    elif p_value_QM < 0.10:
        sig_level = "MARGINALLY SIGNIFICANT (p < 0.10) +"
        sig_color = "🟢"
        sig_interpretation = "Weak evidence that subgroups differ"
    else:
        sig_level = "NOT SIGNIFICANT (p ≥ 0.10) ns"
        sig_color = "⚪"
        sig_interpretation = "No significant differences between subgroups"

    print(f"\n  {sig_color} Result: {sig_level}")
    print(f"  {sig_interpretation}")

else:
    p_value_QM = np.nan
    print(f"\n  ⚠️  Cannot test for differences (only one subgroup)")
    sig_level = "N/A"
    sig_interpretation = "Test not applicable"

# --- STEP 8: SUBGROUP COMPARISON SUMMARY ---
print("\n" + "="*70)
print("STEP 8: SUBGROUP COMPARISON SUMMARY")
print("="*70)

print(f"\n📊 Random-Effects Model Results:\n")

# Create formatted summary table
print(f"  {'Group':<35} {'k':>5} {'Effect':>10} {'95% CI':>22} {'P-value':>10} {'I²':>8}")
print(f"  {'-'*35} {'-'*5} {'-'*10} {'-'*22} {'-'*10} {'-'*8}")

for _, row in results_df.iterrows():
    group_name = str(row['group'])[:35]  # Truncate if too long
    k_val = int(row['k'])
    effect = row['pooled_effect_re']
    ci_lower = row['ci_lower_re']
    ci_upper = row['ci_upper_re']
    p_val = row['p_value_re']
    i2_val = row['I_squared']

    # Significance marker
    if pd.notna(p_val):
        if p_val < 0.001:
            sig_marker = "***"
        elif p_val < 0.01:
            sig_marker = "**"
        elif p_val < 0.05:
            sig_marker = "*"
        elif p_val < 0.10:
            sig_marker = "+"
        else:
            sig_marker = "ns"
    else:
        sig_marker = "N/A"

    # Format CI
    ci_str = f"[{ci_lower:>6.3f}, {ci_upper:>6.3f}]"

    print(f"  {group_name:<35} {k_val:>5} {effect:>10.4f} {ci_str:>22} {p_val:>9.4g} {i2_val:>7.1f}%")

print(f"\n  Significance: *** p<0.001, ** p<0.01, * p<0.05, + p<0.10, ns = not significant")

# Add fold-change summary if applicable
if es_config['has_fold_change']:
    print(f"\n📈 Fold-Change Summary (Random-Effects):\n")
    print(f"  {'Group':<35} {'Fold-Change':>15} {'Direction':<20}")
    print(f"  {'-'*35} {'-'*15} {'-'*20}")

    for _, row in results_df.iterrows():
        group_name = str(row['group'])[:35]
        fold = row['fold_change_re']

        if pd.notna(fold):
            if ANALYSIS_CONFIG['effect_size_type'] == 'lnRR':
                if fold > 1.5:
                    direction = "Upregulation ↑↑"
                elif fold > 1.05:
                    direction = "Upregulation ↑"
                elif fold < -1.5:
                    direction = "Downregulation ↓↓"
                elif fold < -1.05:
                    direction = "Downregulation ↓"
                else:
                    direction = "No change ─"
            else:
                direction = "See OR values"

            print(f"  {group_name:<35} {fold:>+14.2f}× {direction:<20}")
        else:
            print(f"  {group_name:<35} {'N/A':>15} {'N/A':<20}")

# --- STEP 9: DETAILED COMPARISON BY MODERATOR ---
print("\n" + "="*70)
print("STEP 9: DETAILED MODERATOR ANALYSIS")
print("="*70)

if analysis_type == 'single':
    # Single moderator - show ranking
    print(f"\n📊 Subgroups Ranked by Effect Size (Random-Effects):\n")

    results_sorted = results_df.sort_values('pooled_effect_re', ascending=False).reset_index(drop=True)

    for rank, (idx, row) in enumerate(results_sorted.iterrows(), 1):
        sig_marker = "***" if row['p_value_re'] < 0.001 else "**" if row['p_value_re'] < 0.01 else "*" if row['p_value_re'] < 0.05 else "ns"

        effect_str = f"{row['pooled_effect_re']:+.4f}"

        if es_config['has_fold_change'] and pd.notna(row['fold_change_re']):
            effect_str += f" ({row['fold_change_re']:+.2f}×)"

        # Add CI
        ci_str = f"[{row['ci_lower_re']:.3f}, {row['ci_upper_re']:.3f}]"

        print(f"  {rank}. {str(row['group']):30s} → {effect_str:25s} {ci_str:20s} {sig_marker:4s} (k={row['k']:2.0f}, I²={row['I_squared']:5.1f}%)")

    # Find significant pairwise differences
    print(f"\n📋 Pairwise Comparisons:")
    largest_effect = results_sorted.iloc[0]
    smallest_effect = results_sorted.iloc[-1]

    effect_diff = largest_effect['pooled_effect_re'] - smallest_effect['pooled_effect_re']

    print(f"  • Largest effect: {str(largest_effect['group'])}")
    print(f"    Effect = {largest_effect['pooled_effect_re']:.4f} [{largest_effect['ci_lower_re']:.3f}, {largest_effect['ci_upper_re']:.3f}]")
    print(f"  • Smallest effect: {str(smallest_effect['group'])}")
    print(f"    Effect = {smallest_effect['pooled_effect_re']:.4f} [{smallest_effect['ci_lower_re']:.3f}, {smallest_effect['ci_upper_re']:.3f}]")
    print(f"  • Difference: {effect_diff:.4f}")

    if es_config['has_fold_change'] and pd.notna(largest_effect['fold_change_re']):
        print(f"    ({largest_effect['fold_change_re']:+.2f}× vs {smallest_effect['fold_change_re']:+.2f}×)")

else:
    # Two-way analysis - show by each moderator
    print(f"\n📊 Results by {moderator1}:\n")

    for mod1_val in sorted(results_df[moderator1].unique()):
        subset = results_df[results_df[moderator1] == mod1_val]
        print(f"\n  {mod1_val}:")

        for _, row in subset.iterrows():
            sig_marker = "***" if row['p_value_re'] < 0.001 else "**" if row['p_value_re'] < 0.01 else "*" if row['p_value_re'] < 0.05 else "ns"

            effect_str = f"{row['pooled_effect_re']:+.4f}"

            if es_config['has_fold_change'] and pd.notna(row['fold_change_re']):
                effect_str += f" ({row['fold_change_re']:+.2f}×)"

            ci_str = f"[{row['ci_lower_re']:.3f}, {row['ci_upper_re']:.3f}]"

            print(f"    • {row[moderator2]:25s} → {effect_str:25s} {ci_str:20s} {sig_marker:4s} (k={row['k']:2.0f})")

    print(f"\n{'─'*70}")
    print(f"\n📊 Results by {moderator2}:\n")

    for mod2_val in sorted(results_df[moderator2].unique()):
        subset = results_df[results_df[moderator2] == mod2_val]
        print(f"\n  {mod2_val}:")

        for _, row in subset.iterrows():
            sig_marker = "***" if row['p_value_re'] < 0.001 else "**" if row['p_value_re'] < 0.01 else "*" if row['p_value_re'] < 0.05 else "ns"

            effect_str = f"{row['pooled_effect_re']:+.4f}"

            if es_config['has_fold_change'] and pd.notna(row['fold_change_re']):
                effect_str += f" ({row['fold_change_re']:+.2f}×)"

            ci_str = f"[{row['ci_lower_re']:.3f}, {row['ci_upper_re']:.3f}]"

            print(f"    • {row[moderator1]:25s} → {effect_str:25s} {ci_str:20s} {sig_marker:4s} (k={row['k']:2.0f})")

# --- STEP 10: WITHIN-SUBGROUP HETEROGENEITY ANALYSIS ---
print("\n" + "="*70)
print("STEP 10: WITHIN-SUBGROUP HETEROGENEITY ANALYSIS")
print("="*70)

print(f"\n📊 Analyzing heterogeneity within each subgroup...")

# Calculate summary statistics
mean_I2 = results_df['I_squared'].mean()
median_I2 = results_df['I_squared'].median()
min_I2 = results_df['I_squared'].min()
max_I2 = results_df['I_squared'].max()

print(f"\n  Heterogeneity Summary (I²):")
print(f"    • Mean:   {mean_I2:.2f}%")
print(f"    • Median: {median_I2:.2f}%")
print(f"    • Range:  {min_I2:.2f}% to {max_I2:.2f}%")

# Identify subgroups with high heterogeneity
high_het_threshold = 50
moderate_het_threshold = 25

high_het_groups = results_df[results_df['I_squared'] > high_het_threshold]
moderate_het_groups = results_df[(results_df['I_squared'] > moderate_het_threshold) &
                                 (results_df['I_squared'] <= high_het_threshold)]
low_het_groups = results_df[results_df['I_squared'] <= moderate_het_threshold]

print(f"\n  Heterogeneity Classification:")
print(f"    • Low (I² ≤ {moderate_het_threshold}%):       {len(low_het_groups)} subgroups")
print(f"    • Moderate ({moderate_het_threshold}% < I² ≤ {high_het_threshold}%): {len(moderate_het_groups)} subgroups")
print(f"    • High (I² > {high_het_threshold}%):          {len(high_het_groups)} subgroups")

if len(high_het_groups) > 0:
    print(f"\n  🔴 Subgroups with High Heterogeneity (I² > {high_het_threshold}%):")
    print(f"\n  {'Group':<35} {'I²':>10} {'τ²':>12} {'k':>8} {'Recommendation':<30}")
    print(f"  {'-'*35} {'-'*10} {'-'*12} {'-'*8} {'-'*30}")

    for _, row in high_het_groups.iterrows():
        group_name = str(row['group'])[:35]
        i2 = row['I_squared']
        tau2 = row['tau_squared']
        k = int(row['k'])

        if k < 10:
            recommendation = "Limited data, caution needed"
        else:
            recommendation = "Consider further subdivision"

        print(f"  {group_name:<35} {i2:>9.1f}% {tau2:>12.6f} {k:>8} {recommendation:<30}")

    print(f"\n  💡 Recommendations for high-heterogeneity subgroups:")
    print(f"     1. Test additional moderators within these subgroups")
    print(f"     2. Check for outliers or influential studies")
    print(f"     3. Consider if effect size varies by other study characteristics")
    print(f"     4. Report prediction intervals for these subgroups")

else:
    print(f"\n  ✓ No subgroups show high heterogeneity")
    print(f"    The moderator successfully reduces within-group variation")

# Overall heterogeneity reduction
print(f"\n📊 Heterogeneity Reduction:")
print(f"  • Overall I² (before subgrouping): {I_squared_overall:.2f}%")
print(f"  • Mean I² within subgroups: {mean_I2:.2f}%")
print(f"  • Reduction: {I_squared_overall - mean_I2:.2f} percentage points")

if mean_I2 < I_squared_overall * 0.5:
    print(f"  🟢 Excellent reduction - moderator accounts for major source of heterogeneity")
elif mean_I2 < I_squared_overall * 0.75:
    print(f"  🟡 Good reduction - moderator partially explains heterogeneity")
else:
    print(f"  🟠 Limited reduction - other factors may be more important")

# --- STEP 11: EFFECT DIRECTION ANALYSIS ---
if es_config['has_fold_change'] and ANALYSIS_CONFIG['effect_size_type'] == 'lnRR':
    print("\n" + "="*70)
    print("STEP 11: EFFECT DIRECTION ANALYSIS")
    print("="*70)

    print(f"\n📊 Analyzing effect directions across subgroups...")

    # Classify by direction
    results_df['direction'] = results_df['pooled_effect_re'].apply(
        lambda x: 'Upregulation' if x > 0.05 else 'Downregulation' if x < -0.05 else 'No change'
    )

    direction_counts = results_df['direction'].value_counts()

    print(f"\n  Direction Distribution:")
    for direction, count in direction_counts.items():
        pct = (count / len(results_df)) * 100
        groups = results_df[results_df['direction'] == direction]['group'].tolist()

        if direction == 'Upregulation':
            symbol = "↑"
        elif direction == 'Downregulation':
            symbol = "↓"
        else:
            symbol = "─"

        print(f"    {symbol} {direction}: {count} subgroups ({pct:.1f}%)")
        for group in groups:
            print(f"       • {group}")

    # Check for heterogeneous directions
    upregulation_groups = results_df[results_df['direction'] == 'Upregulation']['group'].tolist()
    downregulation_groups = results_df[results_df['direction'] == 'Downregulation']['group'].tolist()

    if upregulation_groups and downregulation_groups:
        print(f"\n  🔴 HETEROGENEOUS EFFECT DIRECTIONS DETECTED")
        print(f"     Some subgroups show upregulation while others show downregulation")
        print(f"\n     Upregulation:")
        for group in upregulation_groups:
            effect = results_df[results_df['group'] == group]['pooled_effect_re'].values[0]
            fold = results_df[results_df['group'] == group]['fold_change_re'].values[0]
            print(f"       • {group}: {effect:+.3f} ({fold:+.2f}×)")

        print(f"\n     Downregulation:")
        for group in downregulation_groups:
            effect = results_df[results_df['group'] == group]['pooled_effect_re'].values[0]
            fold = results_df[results_df['group'] == group]['fold_change_re'].values[0]
            print(f"       • {group}: {effect:+.3f} ({fold:+.2f}×)")

        print(f"\n     💡 This suggests the moderator fundamentally changes the effect direction")
        print(f"        Consider this when interpreting the overall pooled effect")
    elif upregulation_groups:
        print(f"\n  ✓ All subgroups show consistent upregulation")
    elif downregulation_groups:
        print(f"\n  ✓ All subgroups show consistent downregulation")
    else:
        print(f"\n  ✓ All subgroups show no significant change")

# Continue to Part 3...
# --- STEP 12: STATISTICAL POWER ASSESSMENT ---
print("\n" + "="*70)
print("STEP 12: STATISTICAL POWER ASSESSMENT")
print("="*70)

print(f"\n⚡ Assessing statistical power for each subgroup...")

# Define thresholds for power assessment
low_power_k = 10
very_low_power_k = 5
low_power_papers = 5
very_low_power_papers = 3

# Classify groups by power
adequate_power = []
limited_power = []
low_power = []

for _, row in results_df.iterrows():
    k = row['k']
    n_papers = row['n_papers']
    group = row['group']

    if k >= low_power_k and n_papers >= low_power_papers:
        adequate_power.append(group)
    elif k >= very_low_power_k and n_papers >= very_low_power_papers:
        limited_power.append(group)
    else:
        low_power.append(group)

print(f"\n  Power Classification:")
print(f"    ✓ Adequate power (k≥{low_power_k}, papers≥{low_power_papers}):     {len(adequate_power)} subgroups")
print(f"    ⚠️  Limited power ({very_low_power_k}≤k<{low_power_k}):               {len(limited_power)} subgroups")
print(f"    🔴 Low power (k<{very_low_power_k} or papers<{very_low_power_papers}): {len(low_power)} subgroups")

if low_power:
    print(f"\n  🔴 Subgroups with Low Statistical Power:")
    print(f"\n  {'Group':<40} {'k':>8} {'Papers':>10} {'Issue':<30}")
    print(f"  {'-'*40} {'-'*8} {'-'*10} {'-'*30}")

    for group in low_power:
        row = results_df[results_df['group'] == group].iloc[0]
        k = int(row['k'])
        n_papers = int(row['n_papers'])

        issues = []
        if k < very_low_power_k:
            issues.append(f"Very few observations (k={k})")
        if n_papers < very_low_power_papers:
            issues.append(f"Few papers (n={n_papers})")

        issue_str = ", ".join(issues)
        print(f"  {group:<40} {k:>8} {n_papers:>10} {issue_str:<30}")

    print(f"\n  ⚠️  Interpretation Cautions for Low-Power Subgroups:")
    print(f"     • Non-significant results may reflect insufficient power, not absence of effect")
    print(f"     • Confidence intervals will be wide")
    print(f"     • Effect size estimates may be imprecise")
    print(f"     • Consider reporting these results as preliminary")

elif limited_power:
    print(f"\n  ⚠️  Subgroups with Limited Power:")
    for group in limited_power:
        row = results_df[results_df['group'] == group].iloc[0]
        k = int(row['k'])
        n_papers = int(row['n_papers'])
        print(f"     • {group}: k={k}, papers={n_papers}")

    print(f"\n     → Interpret these results with moderate caution")

else:
    print(f"\n  ✓ All subgroups have adequate sample sizes for reliable estimates")

# Calculate precision metrics
print(f"\n📏 Precision Analysis:")
results_df['ci_width'] = results_df['ci_upper_re'] - results_df['ci_lower_re']

mean_ci_width = results_df['ci_width'].mean()
median_ci_width = results_df['ci_width'].median()
min_ci_width = results_df['ci_width'].min()
max_ci_width = results_df['ci_width'].max()

print(f"  95% CI Width Statistics:")
print(f"    • Mean:   {mean_ci_width:.4f}")
print(f"    • Median: {median_ci_width:.4f}")
print(f"    • Range:  [{min_ci_width:.4f}, {max_ci_width:.4f}]")

# Identify imprecise estimates
wide_ci_threshold = median_ci_width * 2
wide_ci_groups = results_df[results_df['ci_width'] > wide_ci_threshold]

if len(wide_ci_groups) > 0:
    print(f"\n  ⚠️  Subgroups with Wide CIs (> {wide_ci_threshold:.3f}):")
    for _, row in wide_ci_groups.iterrows():
        print(f"     • {row['group']}: CI width = {row['ci_width']:.4f} (k={int(row['k'])})")

# --- STEP 13: INTERPRETATION & RECOMMENDATIONS ---
print("\n" + "="*70)
print("STEP 13: INTERPRETATION & RECOMMENDATIONS")
print("="*70)

# Overall interpretation based on Q_M test
if pd.notna(p_value_QM):
    print(f"\n{'='*70}")
    if p_value_QM < 0.05:
        print(f"🔴 SIGNIFICANT MODERATOR EFFECT DETECTED")
        print(f"{'='*70}")

        print(f"\n📊 Statistical Evidence:")
        print(f"  • Q_between (Q_M) = {QM:.4f} (df={df_QM}, p={p_value_QM:.4g})")
        print(f"  • Effect sizes SIGNIFICANTLY differ across {analysis_label}")
        print(f"  • Moderator explains {R_squared:.1f}% of between-study heterogeneity")

        print(f"\n📈 Key Findings:")

        # Find extreme effects
        max_effect_row = results_df.loc[results_df['pooled_effect_re'].idxmax()]
        min_effect_row = results_df.loc[results_df['pooled_effect_re'].idxmin()]
        effect_range = max_effect_row['pooled_effect_re'] - min_effect_row['pooled_effect_re']

        print(f"  1. Largest effect: {max_effect_row['group']}")
        print(f"     • Effect: {max_effect_row['pooled_effect_re']:.4f} [{max_effect_row['ci_lower_re']:.3f}, {max_effect_row['ci_upper_re']:.3f}]")
        if es_config['has_fold_change'] and pd.notna(max_effect_row['fold_change_re']):
            print(f"     • Fold-change: {max_effect_row['fold_change_re']:+.2f}×")
        print(f"     • Sample: k={int(max_effect_row['k'])}, {int(max_effect_row['n_papers'])} papers")
        print(f"     • Heterogeneity: I²={max_effect_row['I_squared']:.1f}%")

        print(f"\n  2. Smallest effect: {min_effect_row['group']}")
        print(f"     • Effect: {min_effect_row['pooled_effect_re']:.4f} [{min_effect_row['ci_lower_re']:.3f}, {min_effect_row['ci_upper_re']:.3f}]")
        if es_config['has_fold_change'] and pd.notna(min_effect_row['fold_change_re']):
            print(f"     • Fold-change: {min_effect_row['fold_change_re']:+.2f}×")
        print(f"     • Sample: k={int(min_effect_row['k'])}, {int(min_effect_row['n_papers'])} papers")
        print(f"     • Heterogeneity: I²={min_effect_row['I_squared']:.1f}%")

        print(f"\n  3. Effect Range:")
        print(f"     • Difference: {effect_range:.4f}")
        print(f"     • Relative range: {(effect_range/abs(min_effect_row['pooled_effect_re']))*100:.1f}% of smallest effect")

        print(f"\n💡 Implications:")
        print(f"  ✓ The moderator ({moderator1}{f' × {moderator2}' if moderator2 else ''}) is an important source of heterogeneity")
        print(f"  ✓ Report subgroup-specific effects rather than overall pooled effect")
        print(f"  ✓ Different subgroups may have genuinely different effect sizes")
        print(f"  ✓ Consider biological/methodological explanations for differences")

        print(f"\n📝 Reporting Recommendations:")
        print(f"  1. State: 'Subgroup analysis revealed significant differences (Q_M={QM:.2f}, p={p_value_QM:.3g})'")
        print(f"  2. Report effect for each subgroup separately")
        print(f"  3. Discuss why different subgroups show different effects")
        print(f"  4. Consider subgroup as primary analysis unit")

    else:
        print(f"⚪ NO SIGNIFICANT MODERATOR EFFECT")
        print(f"{'='*70}")

        print(f"\n📊 Statistical Evidence:")
        print(f"  • Q_between (Q_M) = {QM:.4f} (df={df_QM}, p={p_value_QM:.4g})")
        print(f"  • Effect sizes DO NOT significantly differ across {analysis_label}")
        print(f"  • Moderator explains only {R_squared:.1f}% of between-study heterogeneity")

        print(f"\n💡 Interpretation:")
        print(f"  • Observed differences between subgroups likely due to sampling variation")
        print(f"  • Overall pooled effect is appropriate to report")
        print(f"  • This moderator is not a major source of heterogeneity")

        # However, check if some subgroups still differ substantially
        if effect_range > 0.5:  # Substantial practical difference
            print(f"\n  ⚠️  Note: While not statistically significant, effect sizes show")
            print(f"     substantial variation (range = {effect_range:.3f})")
            print(f"     This may reflect:")
            print(f"       • Insufficient statistical power")
            print(f"       • Large within-group heterogeneity")
            print(f"       • Real but modest differences")

        print(f"\n📝 Reporting Recommendations:")
        print(f"  1. State: 'Subgroup analysis found no significant differences (Q_M={QM:.2f}, p={p_value_QM:.3g})'")
        print(f"  2. Report overall pooled effect as primary result")
        print(f"  3. May mention subgroup effects as exploratory")
        if I_squared_overall > 50:
            print(f"  4. Note: Explore other potential moderators (overall I²={I_squared_overall:.1f}% remains high)")

# Heterogeneity interpretation
print(f"\n{'─'*70}")
print(f"\n📊 Residual Heterogeneity Assessment:")

if mean_I2 > 50:
    print(f"\n  🔴 HIGH RESIDUAL HETEROGENEITY")
    print(f"     Mean within-subgroup I² = {mean_I2:.1f}%")

    print(f"\n     Interpretation:")
    print(f"     • Substantial unexplained variation remains within subgroups")
    print(f"     • Additional moderators are likely present")
    print(f"     • The chosen moderator only partially explains heterogeneity")

    print(f"\n     Recommended Actions:")
    print(f"     1. Test additional moderators:")
    other_mods = [m for m in available_moderators if m not in [moderator1, moderator2]][:5]
    if other_mods:
        print(f"        Candidates: {', '.join(other_mods)}")

    if analysis_type == 'single':
        print(f"     2. Consider two-way analysis to test interactions")
    else:
        print(f"     2. Consider three-way analysis if sufficient data")

    print(f"     3. Check for outliers within subgroups")
    print(f"     4. Assess study quality as potential moderator")
    print(f"     5. Consider continuous moderators (meta-regression)")

elif mean_I2 > 25:
    print(f"\n  🟡 MODERATE RESIDUAL HETEROGENEITY")
    print(f"     Mean within-subgroup I² = {mean_I2:.1f}%")

    print(f"\n     Interpretation:")
    print(f"     • Some unexplained variation remains")
    print(f"     • Results are interpretable but not completely homogeneous")
    print(f"     • The moderator explains most, but not all, heterogeneity")

    print(f"\n     Optional Actions:")
    print(f"     • Test additional moderators if interested")
    print(f"     • Current analysis is adequate for most purposes")
    print(f"     • Report random-effects model to account for residual variation")

else:
    print(f"\n  🟢 LOW RESIDUAL HETEROGENEITY")
    print(f"     Mean within-subgroup I² = {mean_I2:.1f}%")

    print(f"\n     Interpretation:")
    print(f"     • The moderator successfully explains most heterogeneity")
    print(f"     • Within-group effects are relatively homogeneous")
    print(f"     • This is an effective subgroup classification")

    print(f"\n     ✓ Excellent result - moderator is highly explanatory")

# Overall heterogeneity reduction
het_reduction = I_squared_overall - mean_I2

print(f"\n{'─'*70}")
print(f"\n📉 Heterogeneity Reduction Summary:")
print(f"  • Before subgrouping: I² = {I_squared_overall:.2f}%")
print(f"  • After subgrouping:  I² = {mean_I2:.2f}% (mean within-group)")
print(f"  • Reduction: {het_reduction:.2f} percentage points")
print(f"  • Variance explained (R²): {R_squared:.1f}%")

if het_reduction > I_squared_overall * 0.5:
    print(f"\n  🟢 Excellent heterogeneity reduction (>{I_squared_overall*0.5:.0f} points)")
    print(f"     The moderator accounts for the major source of variation")
elif het_reduction > I_squared_overall * 0.25:
    print(f"\n  🟡 Good heterogeneity reduction (>{I_squared_overall*0.25:.0f} points)")
    print(f"     The moderator partially explains variation")
else:
    print(f"\n  🟠 Limited heterogeneity reduction (<{I_squared_overall*0.25:.0f} points)")
    print(f"     Other factors may be more important")

# Next steps based on analysis type
print(f"\n{'─'*70}")
print(f"\n📋 Recommended Next Steps:")

print(f"\n  Immediate:")
print(f"  1. ✓ Create forest plot to visualize subgroup effects (next cell)")
print(f"  2. ✓ Include subgroup analysis table in manuscript")
print(f"  3. ✓ Report Q_M test results and variance explained")

if p_value_QM < 0.05:
    print(f"\n  For Significant Moderator:")
    print(f"  4. Discuss biological/methodological reasons for subgroup differences")
    print(f"  5. Consider whether findings generalize across subgroups")
    print(f"  6. Identify which subgroup(s) most relevant to your research question")
else:
    print(f"\n  For Non-Significant Moderator:")
    print(f"  4. Test alternative moderators if overall heterogeneity remains high")
    print(f"  5. Consider meta-regression for continuous moderators")

if analysis_type == 'single' and len(available_moderators) > 1:
    print(f"\n  Future Analyses:")
    print(f"  7. Consider two-way analysis to test interactions between moderators")

if mean_I2 > 50:
    print(f"\n  To Address Residual Heterogeneity:")
    print(f"  8. Perform sensitivity analyses (leave-one-out)")
    print(f"  9. Check for outliers within high-heterogeneity subgroups")
    print(f"  10. Consider study quality as additional moderator")

# --- STEP 14: SAVE RESULTS ---
print("\n" + "="*70)
print("STEP 14: SAVING RESULTS")
print("="*70)

# Save comprehensive results
ANALYSIS_CONFIG['subgroup_results'] = {
    'timestamp': datetime.datetime.now(),
    'results_df': results_df.copy(),
    'analysis_type': analysis_type,
    'moderator1': moderator1,
    'moderator2': moderator2,
    'n_subgroups': len(results_df),
    'n_subgroups_excluded': len(groups_discarded),

    # Heterogeneity partitioning
    'Qt_overall': Qt_overall,
    'QM': QM,
    'Qe': Qe,
    'df_QM': df_QM,
    'df_Qe': df_Qe,
    'p_value_QM': p_value_QM,
    'prop_explained': prop_explained,
    'R_squared': R_squared,

    # Summary statistics
    'mean_I2_within': mean_I2,
    'median_I2_within': median_I2,
    'I2_reduction': het_reduction,

    # Effect size range
    'max_effect': max_effect_row['pooled_effect_re'],
    'min_effect': min_effect_row['pooled_effect_re'],
    'effect_range': effect_range,
    'max_effect_group': max_effect_row['group'],
    'min_effect_group': min_effect_row['group'],

    # Power assessment
    'n_adequate_power': len(adequate_power),
    'n_limited_power': len(limited_power),
    'n_low_power': len(low_power),

    # Groups excluded
    'groups_excluded': groups_discarded
}

print(f"\n✓ Results saved to ANALYSIS_CONFIG['subgroup_results']")

print(f"\n📊 Saved Components:")
print(f"  • Complete results DataFrame ({len(results_df)} subgroups)")
print(f"  • Heterogeneity decomposition (Q_M, Q_E, R²)")
print(f"  • Statistical tests (p-values, CIs)")
print(f"  • Power assessment")
print(f"  • Effect size summaries")

# Create metadata for AI/downstream use
SUBGROUP_ANALYSIS_METADATA = {
    'timestamp': datetime.datetime.now(),
    'analysis_completed': True,
    'moderator_significant': p_value_QM < 0.05 if pd.notna(p_value_QM) else False,
    'variance_explained_category': 'high' if R_squared > 50 else 'moderate' if R_squared > 25 else 'low',
    'residual_heterogeneity_category': 'high' if mean_I2 > 50 else 'moderate' if mean_I2 > 25 else 'low',
    'n_groups_analyzed': len(results_df),
    'recommended_model': 'subgroup-specific' if p_value_QM < 0.05 else 'overall-pooled'
}

print(f"\n✓ Metadata saved to SUBGROUP_ANALYSIS_METADATA")

# --- STEP 15: CREATE PUBLICATION-READY TABLE ---
print("\n" + "="*70)
print("STEP 15: PUBLICATION-READY SUMMARY TABLE")
print("="*70)

print(f"\n📊 Generating exportable summary table...")

# Create publication table
export_df = results_df.copy()

# Format effect size with CI
export_df['Effect [95% CI]'] = export_df.apply(
    lambda row: f"{row['pooled_effect_re']:.3f} [{row['ci_lower_re']:.3f}, {row['ci_upper_re']:.3f}]"
    if pd.notna(row['pooled_effect_re']) else "N/A",
    axis=1
)

# Select and rename columns
if analysis_type == 'single':
    export_cols = ['group', 'k', 'n_papers', 'Effect [95% CI]', 'p_value_re', 'I_squared', 'tau_squared']

    if es_config['has_fold_change']:
        export_cols.insert(4, 'fold_change_re')

    export_table = export_df[export_cols].copy()

    col_rename = {
        'group': 'Subgroup',
        'k': 'k',
        'n_papers': 'Studies',
        'fold_change_re': 'Fold-Change',
        'p_value_re': 'P-value',
        'I_squared': 'I² (%)',
        'tau_squared': 'τ²'
    }
else:
    export_cols = [moderator1, moderator2, 'k', 'n_papers', 'Effect [95% CI]', 'p_value_re', 'I_squared']

    if es_config['has_fold_change']:
        export_cols.insert(5, 'fold_change_re')

    export_table = export_df[export_cols].copy()

    col_rename = {
        moderator1: moderator1,
        moderator2: moderator2,
        'k': 'k',
        'n_papers': 'Studies',
        'fold_change_re': 'Fold-Change',
        'p_value_re': 'P-value',
        'I_squared': 'I² (%)'
    }

export_table = export_table.rename(columns=col_rename)

print(f"\n📋 Table for Manuscript (Random-Effects Model):\n")

# Display with proper formatting
print(export_table.to_string(
    index=False,
    float_format=lambda x: f'{x:.3f}' if abs(x) < 100 else f'{x:.1f}'
))

# Add table footer with statistics
print(f"\n{'─'*70}")
print(f"Test for subgroup differences: Q_M = {QM:.3f}, df = {df_QM}, P = {p_value_QM:.4g}")
print(f"Variance explained (R²): {R_squared:.1f}%")
print(f"Residual heterogeneity: Mean I² = {mean_I2:.1f}%")
print(f"{'─'*70}")

# Save instructions
print(f"\n💾 Export Options:")
print(f"  1. Copy table above directly into manuscript")
print(f"  2. Export to CSV:")
print(f"     ANALYSIS_CONFIG['subgroup_results']['results_df'].to_csv('subgroup_results.csv', index=False)")
print(f"  3. Export formatted table:")
print(f"     export_table.to_csv('subgroup_table_formatted.csv', index=False)")

# --- FINAL STATUS ---
print("\n" + "="*70)
print("✅ SUBGROUP ANALYSIS COMPLETE")
print("="*70)

print(f"\n📊 Analysis Summary:")
print(f"  • Subgroups analyzed: {len(results_df)}")
print(f"  • Total observations: {len(analysis_data_subgroups)}")
print(f"  • Moderator: {analysis_label}")
print(f"  • Significant difference: {'YES' if p_value_QM < 0.05 else 'NO'} (p={p_value_QM:.4g})")
print(f"  • Variance explained: {R_squared:.1f}%")
print(f"  • Mean residual I²: {mean_I2:.1f}%")

if p_value_QM < 0.05:
    print(f"\n🎯 Key Finding:")
    print(f"  Effect sizes SIGNIFICANTLY differ across {analysis_label}")
    print(f"  Report subgroup-specific effects in your manuscript")
else:
    print(f"\n🎯 Key Finding:")
    print(f"  No significant differences between subgroups")
    print(f"  Overall pooled effect is appropriate to report")

print(f"\n▶️  Next Steps:")
print(f"  1. Create forest plot for visualization (next cell)")
print(f"  2. Review and interpret the results above")
print(f"  3. Consider additional analyses if needed")

print("\n" + "="*70)


SUBGROUP ANALYSIS EXECUTION
Timestamp: 2025-11-14 16:24:58

STEP 1: LOADING CONFIGURATION
✓ Configuration loaded successfully

  📊 Analysis Parameters:
     Type: SINGLE
     Primary moderator: Inoculation
     Min papers/group: 3
     Min observations/group: 5

  📊 Overall Meta-Analysis Context:
     Total observations: 69
     Overall Q: 475.6386
     Overall I²: 85.70%

STEP 2: PREPARING DATA FOR SUBGROUP ANALYSIS

📊 Single-Factor Analysis: by Inoculation
  ✓ Inoculation prepared (2 unique values)

STEP 3: IDENTIFYING VALID SUBGROUPS

🔍 Calculating group statistics...
  ✓ Found 2 potential subgroups

📋 Applying quality thresholds:
  • Minimum papers: 3
  • Minimum observations: 5

📊 Filtering Results:
  ✓ Groups retained: 2
  ✓ Total observations: 69
  ✓ Retention rate: 100.0%

  ✓ No groups excluded (all meet quality criteria)

STEP 4: CALCULATING SUBGROUP-SPECIFIC EFFECTS

🔬 Performing meta-analysis within each subgroup...

────────────────────────────────────────────────────────

In [None]:
#@title 📊 DYNAMIC FOREST PLOT (Publication-Ready)

# =============================================================================
# CELL 9: PUBLICATION-READY FOREST PLOT
# Purpose: Create customizable forest plots for meta-analysis results
# Dependencies: Cell 6 (overall_results), Cell 8 (subgroup_results)
# Outputs: PDF and PNG forest plots with full customization
# =============================================================================

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.stats import norm
import datetime
from matplotlib.patches import Patch, Rectangle
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

# --- 1. LOAD CONFIGURATION ---
print("="*70)
print("FOREST PLOT CONFIGURATION")
print("="*70)

try:
    if 'ANALYSIS_CONFIG' not in locals() and 'ANALYSIS_CONFIG' not in globals():
        raise NameError("ANALYSIS_CONFIG not found.")

    subgroup_results = ANALYSIS_CONFIG.get('subgroup_results', {})
    overall_results = ANALYSIS_CONFIG['overall_results']
    es_config = ANALYSIS_CONFIG['es_config']

    # Determine if we have subgroup analysis
    has_subgroups = bool(subgroup_results) and 'results_df' in subgroup_results

    if has_subgroups:
        analysis_type = subgroup_results['analysis_type']
        moderator1 = subgroup_results['moderator1']
        moderator2 = subgroup_results.get('moderator2', None)
        results_df = subgroup_results['results_df']

        # Set dynamic defaults
        if analysis_type == 'two_way':
            default_title = f'Forest Plot: {moderator1} × {moderator2}'
            default_y_label = moderator2
        else:
            default_title = f'Forest Plot: {moderator1}'
            default_y_label = moderator1
    else:
        # Overall only (no subgroups)
        analysis_type = 'overall_only'
        default_title = 'Forest Plot: Overall Effect'
        default_y_label = 'Study'
        moderator1 = None
        moderator2 = None

    default_x_label = es_config.get('effect_label', "Effect Size")

    print(f"✓ Analysis type: {analysis_type}")
    print(f"✓ Has subgroups: {has_subgroups}")
    print(f"✓ Configuration loaded successfully")

except (KeyError, NameError) as e:
    print(f"❌ ERROR: Failed to load configuration: {e}")
    print("   Please run Cell 6 (overall analysis) first")
    raise

# --- 2. DEFINE CUSTOMIZATION WIDGETS ---

# ========== TAB 1: PLOT STYLE ==========
style_header = widgets.HTML("<h3 style='color: #2E86AB;'>Plot Style & Layout</h3>")

model_widget = widgets.Dropdown(
    options=[('Random-Effects', 'RE'), ('Fixed-Effects', 'FE')],
    value='RE',
    description='Model:',
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

width_widget = widgets.FloatSlider(
    value=8.0, min=6.0, max=14.0, step=0.5,
    description='Plot Width (in):',
    continuous_update=False,
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

height_widget = widgets.FloatSlider(
    value=0.4, min=0.2, max=1.0, step=0.05,
    description='Height per Row (in):',
    continuous_update=False,
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

title_fontsize_widget = widgets.IntSlider(
    value=12, min=8, max=18, step=1,
    description='Title Font Size:',
    continuous_update=False,
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

label_fontsize_widget = widgets.IntSlider(
    value=11, min=8, max=16, step=1,
    description='Axis Label Size:',
    continuous_update=False,
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

tick_fontsize_widget = widgets.IntSlider(
    value=9, min=6, max=14, step=1,
    description='Tick Label Size:',
    continuous_update=False,
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

annot_fontsize_widget = widgets.IntSlider(
    value=8, min=6, max=12, step=1,
    description='Annotation Size:',
    continuous_update=False,
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

color_scheme_widget = widgets.Dropdown(
    options=[
        ('Grayscale (Publication)', 'gray'),
        ('Color (Presentation)', 'color'),
        ('Black & White Only', 'bw')
    ],
    value='gray',
    description='Color Scheme:',
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

marker_style_widget = widgets.Dropdown(
    options=[
        ('Circle/Diamond (●/◆)', 'circle_diamond'),
        ('Square/Diamond (■/◆)', 'square_diamond'),
        ('Circle/Star (●/★)', 'circle_star')
    ],
    value='circle_diamond',
    description='Marker Style:',
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

ci_style_widget = widgets.Dropdown(
    options=[
        ('Solid Line', 'solid'),
        ('Dashed Line', 'dashed'),
        ('Solid with Caps', 'caps')
    ],
    value='solid',
    description='CI Line Style:',
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

style_tab = widgets.VBox([
    style_header,
    model_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    widgets.HTML("<b>Dimensions:</b>"),
    width_widget,
    height_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    widgets.HTML("<b>Typography:</b>"),
    title_fontsize_widget,
    label_fontsize_widget,
    tick_fontsize_widget,
    annot_fontsize_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    widgets.HTML("<b>Visual Style:</b>"),
    color_scheme_widget,
    marker_style_widget,
    ci_style_widget
])

# ========== TAB 2: TEXT & LABELS ==========
text_header = widgets.HTML("<h3 style='color: #2E86AB;'>Text & Labels</h3>")

show_title_widget = widgets.Checkbox(
    value=True,
    description='Show Plot Title',
    indent=False,
    layout=widgets.Layout(width='450px')
)

title_widget = widgets.Text(
    value=default_title,
    description='Plot Title:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '130px'}
)

xlabel_widget = widgets.Text(
    value=default_x_label,
    description='X-Axis Label:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '130px'}
)

ylabel_widget = widgets.Text(
    value=default_y_label,
    description='Y-Axis Label:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '130px'}
)

show_ylabel_widget = widgets.Checkbox(
    value=True,
    description='Show Y-Axis Label',
    indent=False,
    layout=widgets.Layout(width='450px')
)

text_tab = widgets.VBox([
    text_header,
    show_title_widget,
    title_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    xlabel_widget,
    show_ylabel_widget,
    ylabel_widget
])

# ========== TAB 3: ANNOTATIONS ==========
annot_header = widgets.HTML("<h3 style='color: #2E86AB;'>Annotations</h3>")

show_k_widget = widgets.Checkbox(
    value=True,
    description='Show k (observations)',
    indent=False,
    layout=widgets.Layout(width='450px')
)

show_papers_widget = widgets.Checkbox(
    value=True,
    description='Show paper count',
    indent=False,
    layout=widgets.Layout(width='450px')
)

show_fold_change_widget = widgets.Checkbox(
    value=es_config.get('has_fold_change', False),
    description='Show Fold-Change',
    indent=False,
    layout=widgets.Layout(width='450px')
)

annot_pos_widget = widgets.Dropdown(
    options=[
        ('Right of CI', 'right'),
        ('Above Marker', 'above'),
        ('Below Marker', 'below')
    ],
    value='right',
    description='Position:',
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

annot_offset_widget = widgets.FloatSlider(
    value=0.0, min=-1.0, max=1.0, step=0.05,
    description='H-Offset:',
    continuous_update=False,
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px'),
    readout_format='.2f'
)

group_label_box = widgets.VBox()
if has_subgroups and analysis_type == 'two_way':
    group_label_h_offset_widget = widgets.FloatSlider(
        value=0.0, min=-2.0, max=2.0, step=0.1,
        description='Group H-Offset:',
        continuous_update=False,
        style={'description_width': '130px'},
        layout=widgets.Layout(width='450px')
    )
    group_label_v_offset_widget = widgets.FloatSlider(
        value=0.0, min=-1.0, max=1.0, step=0.1,
        description='Group V-Offset:',
        continuous_update=False,
        style={'description_width': '130px'},
        layout=widgets.Layout(width='450px')
    )
    group_label_fontsize_widget = widgets.IntSlider(
        value=10, min=7, max=14, step=1,
        description='Group Font Size:',
        continuous_update=False,
        style={'description_width': '130px'},
        layout=widgets.Layout(width='450px')
    )
    group_label_box = widgets.VBox([
        widgets.HTML("<hr style='margin: 10px 0;'>"),
        widgets.HTML("<b>Group Labels (Two-Way):</b>"),
        group_label_h_offset_widget,
        group_label_v_offset_widget,
        group_label_fontsize_widget
    ])

annot_tab = widgets.VBox([
    annot_header,
    widgets.HTML("<b>Show in Annotations:</b>"),
    show_k_widget,
    show_papers_widget,
    show_fold_change_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    widgets.HTML("<b>Position:</b>"),
    annot_pos_widget,
    annot_offset_widget,
    group_label_box
])

# ========== TAB 4: AXES & SCALE ==========
axes_header = widgets.HTML("<h3 style='color: #2E86AB;'>Axes & Scaling</h3>")

auto_scale_widget = widgets.Checkbox(
    value=True,
    description='Auto-Scale X-Axis',
    indent=False,
    layout=widgets.Layout(width='450px')
)

x_min_widget = widgets.FloatText(
    value=-2.0,
    description='X-Min:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='220px', visibility='hidden')
)

x_max_widget = widgets.FloatText(
    value=2.0,
    description='X-Max:',
    style={'description_width': '80px'},
    layout=widgets.Layout(width='220px', visibility='hidden')
)

manual_scale_box = widgets.HBox([x_min_widget, x_max_widget])

def toggle_manual_scale(change):
    if change['new']:
        x_min_widget.layout.visibility = 'hidden'
        x_max_widget.layout.visibility = 'hidden'
    else:
        x_min_widget.layout.visibility = 'visible'
        x_max_widget.layout.visibility = 'visible'

auto_scale_widget.observe(toggle_manual_scale, names='value')

show_grid_widget = widgets.Checkbox(
    value=True,
    description='Show Grid',
    indent=False,
    layout=widgets.Layout(width='450px')
)

grid_style_widget = widgets.Dropdown(
    options=[
        ('Dashed (Light)', 'dashed_light'),
        ('Dotted (Light)', 'dotted_light'),
        ('Solid (Light)', 'solid_light')
    ],
    value='dashed_light',
    description='Grid Style:',
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

show_null_line_widget = widgets.Checkbox(
    value=True,
    description='Show Null Effect Line',
    indent=False,
    layout=widgets.Layout(width='450px')
)

show_fold_axis_widget = widgets.Checkbox(
    value=es_config.get('has_fold_change', False) and show_fold_change_widget.value,
    description='Show Fold-Change Axis (Top)',
    indent=False,
    layout=widgets.Layout(width='450px')
)

axes_tab = widgets.VBox([
    axes_header,
    auto_scale_widget,
    manual_scale_box,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    widgets.HTML("<b>Grid & Reference Lines:</b>"),
    show_grid_widget,
    grid_style_widget,
    show_null_line_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    show_fold_axis_widget
])

# ========== TAB 5: EXPORT OPTIONS ==========
export_header = widgets.HTML("<h3 style='color: #2E86AB;'>Export Options</h3>")

save_pdf_widget = widgets.Checkbox(
    value=True,
    description='Save as PDF',
    indent=False,
    layout=widgets.Layout(width='450px')
)

save_png_widget = widgets.Checkbox(
    value=True,
    description='Save as PNG',
    indent=False,
    layout=widgets.Layout(width='450px')
)

png_dpi_widget = widgets.IntSlider(
    value=300, min=150, max=600, step=50,
    description='PNG DPI:',
    continuous_update=False,
    style={'description_width': '130px'},
    layout=widgets.Layout(width='450px')
)

filename_prefix_widget = widgets.Text(
    value='ForestPlot',
    description='Filename Prefix:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '130px'}
)

transparent_bg_widget = widgets.Checkbox(
    value=False,
    description='Transparent Background',
    indent=False,
    layout=widgets.Layout(width='450px')
)

export_tab = widgets.VBox([
    export_header,
    save_pdf_widget,
    save_png_widget,
    png_dpi_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    filename_prefix_widget,
    transparent_bg_widget
])

# ========== TAB 6: LABEL EDITOR ==========
label_editor_header = widgets.HTML("<h3 style='color: #2E86AB;'>Label Editor</h3>")
label_editor_desc = widgets.HTML(
    "<p style='color: #666;'><i>Customize display names for all groups and subgroups in the plot</i></p>"
)

print(f"\n🔍 Identifying labels for editor...")

unique_labels = set()
label_widgets_dict = {}

try:
    if has_subgroups:
        if analysis_type == 'single':
            unique_labels.update(results_df['group'].astype(str).unique())
        else:  # two_way
            unique_labels.update(results_df[moderator1].astype(str).unique())
            unique_labels.update(results_df[moderator2].astype(str).unique())

    unique_labels.add('Overall')
    sorted_labels = sorted(list(unique_labels))

    print(f"  ✓ Found {len(sorted_labels)} unique labels")

    label_editor_widgets = []
    for label in sorted_labels:
        widget_label = f"Overall Effect:" if label == 'Overall' else f"{label}:"
        text_widget = widgets.Text(
            value=str(label),
            description=widget_label,
            layout=widgets.Layout(width='500px'),
            style={'description_width': '200px'}
        )
        label_editor_widgets.append(text_widget)
        label_widgets_dict[str(label)] = text_widget

    label_editor_tab = widgets.VBox([
        label_editor_header,
        label_editor_desc,
        widgets.HTML("<hr style='margin: 10px 0;'>"),
        widgets.HTML(
            "<p><b>Instructions:</b> Edit the text on the right to change how labels appear in the plot. "
            "The original coded names are shown on the left.</p>"
        ),
        widgets.HTML("<hr style='margin: 10px 0;'>"),
        *label_editor_widgets
    ])

    print(f"  ✓ Label editor created")

except Exception as e:
    print(f"  ⚠️  Error creating label editor: {e}")
    label_editor_tab = widgets.VBox([
        label_editor_header,
        widgets.HTML("<p style='color: red;'>Error creating label editor.</p>")
    ])
    label_widgets_dict = {}

# ========== CREATE TAB WIDGET ==========
tab_children = [style_tab, text_tab, annot_tab, axes_tab, export_tab, label_editor_tab]
tab = widgets.Tab(children=tab_children)
tab.set_title(0, '🎨 Style')
tab.set_title(1, '📝 Text')
tab.set_title(2, '🏷️ Annotations')
tab.set_title(3, '📏 Axes')
tab.set_title(4, '💾 Export')
tab.set_title(5, '✏️ Labels')

# Continue to Part 2 (plot generation function)...
# --- 3. DEFINE PLOT GENERATION FUNCTION ---
plot_output = widgets.Output()

def generate_plot(b):
    with plot_output:
        clear_output(wait=True)

        print("\n" + "="*70)
        print("GENERATING FOREST PLOT")
        print("="*70)
        print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

        try:
            # --- GET WIDGET VALUES ---
            plot_model = model_widget.value
            plot_width = width_widget.value
            height_per_row = height_widget.value
            title_fontsize = title_fontsize_widget.value
            label_fontsize = label_fontsize_widget.value
            tick_fontsize = tick_fontsize_widget.value
            annot_fontsize = annot_fontsize_widget.value
            color_scheme = color_scheme_widget.value
            marker_style = marker_style_widget.value
            ci_style = ci_style_widget.value

            show_title = show_title_widget.value
            graph_title = title_widget.value
            x_label = xlabel_widget.value
            show_ylabel = show_ylabel_widget.value
            y_label = ylabel_widget.value

            show_k = show_k_widget.value
            show_papers = show_papers_widget.value
            show_fold_change = show_fold_change_widget.value
            annot_pos = annot_pos_widget.value
            annot_offset = annot_offset_widget.value

            auto_scale = auto_scale_widget.value
            x_min_manual = x_min_widget.value
            x_max_manual = x_max_widget.value
            show_grid = show_grid_widget.value
            grid_style = grid_style_widget.value
            show_null_line = show_null_line_widget.value
            show_fold_axis = show_fold_axis_widget.value

            save_pdf = save_pdf_widget.value
            save_png = save_png_widget.value
            png_dpi = png_dpi_widget.value
            filename_prefix = filename_prefix_widget.value
            transparent_bg = transparent_bg_widget.value

            # Group label offsets (two-way only)
            if has_subgroups and analysis_type == 'two_way':
                group_label_h_offset = group_label_h_offset_widget.value
                group_label_v_offset = group_label_v_offset_widget.value
                group_label_fontsize = group_label_fontsize_widget.value
            else:
                group_label_h_offset = 0
                group_label_v_offset = 0
                group_label_fontsize = 10

            # --- BUILD LABEL MAPPING FROM EDITOR ---
            label_mapping = {}
            for original_label, widget in label_widgets_dict.items():
                custom_label = widget.value
                label_mapping[original_label] = custom_label
                label_mapping[str(original_label)] = custom_label

            print(f"📊 Configuration:")
            print(f"  Model: {plot_model}")
            print(f"  Dimensions: {plot_width}\" × auto")
            print(f"  Color scheme: {color_scheme}")
            print(f"  Has subgroups: {has_subgroups}")

            # Show custom labels if any were changed
            changed_labels = {k: v for k, v in label_mapping.items() if k != v}
            if changed_labels:
                print(f"\n📝 Custom labels ({len(changed_labels)} changed):")
                for orig, custom in list(changed_labels.items())[:5]:
                    print(f"  '{orig}' → '{custom}'")
                if len(changed_labels) > 5:
                    print(f"  ... and {len(changed_labels)-5} more")

            overall_label_text = label_mapping.get('Overall', 'Overall Effect')

            # --- DETERMINE COLUMN NAMES BASED ON MODEL ---
            if plot_model == 'FE':
                effect_col = 'pooled_effect_fe'
                se_col = 'pooled_se_fe'
                ci_lower_col = 'ci_lower_fe'
                ci_upper_col = 'ci_upper_fe'
                fold_col = 'fold_change_fe'

                overall_effect_key = 'pooled_effect_fixed'
                overall_se_key = 'pooled_SE_fixed'
                overall_ci_lower_key = 'ci_lower_fixed'
                overall_ci_upper_key = 'ci_upper_fixed'
                overall_fold_key = 'pooled_fold_fixed'
            else:  # RE
                effect_col = 'pooled_effect_re'
                se_col = 'pooled_se_re'
                ci_lower_col = 'ci_lower_re'
                ci_upper_col = 'ci_upper_re'
                fold_col = 'fold_change_re'

                overall_effect_key = 'pooled_effect_random'
                overall_se_key = 'pooled_SE_random'
                overall_ci_lower_key = 'ci_lower_random'
                overall_ci_upper_key = 'ci_upper_random'
                overall_fold_key = 'pooled_fold_random'

            # --- PREPARE DATA ---
            if has_subgroups:
                plot_df_subgroups = results_df.copy()

                plot_df_subgroups = plot_df_subgroups.rename(columns={
                    effect_col: 'EffectSize',
                    se_col: 'SE',
                    ci_lower_col: 'CI_Lower',
                    ci_upper_col: 'CI_Upper',
                    fold_col: 'FoldChange',
                    'k': 'k',
                    'n_papers': 'nPapers'
                })

                if analysis_type == 'two_way':
                    plot_df_subgroups['GroupVar'] = plot_df_subgroups[moderator1].astype(str)
                    plot_df_subgroups['LabelVar'] = plot_df_subgroups[moderator2].astype(str)
                else:  # single
                    plot_df_subgroups['GroupVar'] = 'Subgroup'
                    plot_df_subgroups['LabelVar'] = plot_df_subgroups['group'].astype(str)

                required_cols = ['GroupVar', 'LabelVar', 'k', 'nPapers',
                               'EffectSize', 'SE', 'CI_Lower', 'CI_Upper', 'FoldChange']
                plot_df_subgroups = plot_df_subgroups[required_cols]
                plot_df_subgroups.dropna(subset=['EffectSize', 'SE'], inplace=True)

                print(f"  Subgroups: {len(plot_df_subgroups)}")
            else:
                plot_df_subgroups = pd.DataFrame(columns=[
                    'GroupVar', 'LabelVar', 'k', 'nPapers',
                    'EffectSize', 'SE', 'CI_Lower', 'CI_Upper', 'FoldChange'
                ])

            # --- ADD OVERALL EFFECT ---
            overall_effect_val = overall_results[overall_effect_key]
            overall_se_val = overall_results[overall_se_key]
            overall_ci_lower_val = overall_results[overall_ci_lower_key]
            overall_ci_upper_val = overall_results[overall_ci_upper_key]
            overall_k_val = overall_results['k']
            overall_papers_val = overall_results['k_papers']
            overall_fold_val = overall_results.get(overall_fold_key, np.nan)

            overall_row = pd.DataFrame([{
                'GroupVar': 'Overall',
                'LabelVar': 'Overall',
                'k': overall_k_val,
                'nPapers': overall_papers_val,
                'EffectSize': overall_effect_val,
                'SE': overall_se_val,
                'CI_Lower': overall_ci_lower_val,
                'CI_Upper': overall_ci_upper_val,
                'FoldChange': overall_fold_val
            }])

            print(f"  Overall: k={overall_k_val}, papers={overall_papers_val}")

            # --- COMBINE DATA (OVERALL ON TOP) ---
            plot_df = pd.concat([overall_row, plot_df_subgroups], ignore_index=True)

            plot_df['SortKey_Group'] = plot_df['GroupVar'].apply(
                lambda x: 'AAAAA' if x == 'Overall' else str(x)
            )
            plot_df['SortKey_Label'] = plot_df['LabelVar'].apply(
                lambda x: 'AAAAA' if x == 'Overall' else str(x)
            )
            plot_df.sort_values(by=['SortKey_Group', 'SortKey_Label'], inplace=True)
            plot_df.reset_index(drop=True, inplace=True)

            if plot_df.empty:
                print("❌ ERROR: No data to plot")
                return

            print(f"  Total rows: {len(plot_df)}")

            # --- CALCULATE PLOT DIMENSIONS ---
            num_rows = len(plot_df)
            y_positions = np.arange(num_rows)

            base_height = 2.5
            plot_height = max(base_height, num_rows * height_per_row + 1.5)

            y_margin_top = 0.75
            y_margin_bottom = 0.75
            y_lim_bottom = y_positions[0] - y_margin_bottom
            y_lim_top = y_positions[-1] + y_margin_top

            # --- Y-TICK LABELS (USE CUSTOM MAPPING) ---
            y_tick_labels = []
            for i, row in plot_df.iterrows():
                if row['GroupVar'] == 'Overall':
                    y_tick_labels.append(overall_label_text)
                else:
                    original_label = str(row['LabelVar'])
                    display_label = label_mapping.get(original_label, original_label)
                    y_tick_labels.append(display_label)

            # --- CALCULATE X-AXIS LIMITS (FIXED - USE ALL DATA) ---
            min_ci = plot_df['CI_Lower'].min()
            max_ci = plot_df['CI_Upper'].max()
            min_effect = plot_df['EffectSize'].min()
            max_effect = plot_df['EffectSize'].max()

            plot_min = min(min_ci, 0)
            plot_max = max(max_ci, 0)
            x_range = plot_max - plot_min

            if x_range == 0:
                x_range = 1

            print(f"\n📏 Data range:")
            print(f"  Effect sizes: [{min_effect:.3f}, {max_effect:.3f}]")
            print(f"  CI range: [{min_ci:.3f}, {max_ci:.3f}]")
            print(f"  Plot range: [{plot_min:.3f}, {plot_max:.3f}]")

            # --- ESTIMATE ANNOTATION SPACE NEEDED ---
            max_k = int(plot_df['k'].max())
            max_np = int(plot_df['nPapers'].max()) if 'nPapers' in plot_df.columns else 0

            annot_parts = []
            if show_k:
                annot_parts.append(f"k={max_k}")
            if show_papers:
                annot_parts.append(f"({max_np})")
            if show_fold_change and es_config.get('has_fold_change', False):
                max_fold = plot_df['FoldChange'].abs().max() if 'FoldChange' in plot_df.columns else 10
                annot_parts.append(f"[-{max_fold:.2f}×]")

            example_annot = " ".join(annot_parts) if annot_parts else "k=100 (10)"

            char_width_fraction = (annot_fontsize / 8.0) * 0.006
            annot_space_fraction = len(example_annot) * char_width_fraction

            print(f"  Annotation example: '{example_annot}' ({len(example_annot)} chars)")

            # --- CALCULATE SPACE FOR GROUP LABELS (TWO-WAY) ---
            group_label_space = 0
            if has_subgroups and analysis_type == 'two_way':
                max_group_len = 0
                for group_val in plot_df[plot_df['GroupVar'] != 'Overall']['GroupVar'].unique():
                    custom_label = label_mapping.get(str(group_val), str(group_val))
                    max_group_len = max(max_group_len, len(custom_label))

                char_width_group = (group_label_fontsize / 8.0) * 0.006
                group_label_space = max_group_len * char_width_group

                print(f"  Group label max: {max_group_len} chars")

            # --- AUTO-SCALE CALCULATION ---
            if auto_scale:
                left_padding = 0.05
                annot_distance = 0.015
                right_padding = 0.03

                total_right_fraction = (annot_distance +
                                       annot_space_fraction +
                                       group_label_space +
                                       right_padding)

                x_min_auto = plot_min - x_range * left_padding
                x_max_auto = plot_max + x_range * (total_right_fraction / (1 - total_right_fraction))

                x_limits = (x_min_auto, x_max_auto)
                print(f"  X-axis (auto): [{x_min_auto:.3f}, {x_max_auto:.3f}]")
            else:
                x_limits = (x_min_manual, x_max_manual)
                print(f"  X-axis (manual): [{x_min_manual:.3f}, {x_max_manual:.3f}]")

            # --- DETERMINE COLORS AND MARKERS ---
            if color_scheme == 'gray':
                subgroup_color = 'dimgray'
                overall_color = 'black'
                ci_color_subgroup = 'gray'
                ci_color_overall = 'black'
            elif color_scheme == 'color':
                subgroup_color = '#4A90E2'
                overall_color = '#E74C3C'
                ci_color_subgroup = '#4A90E2'
                ci_color_overall = '#E74C3C'
            else:  # bw
                subgroup_color = 'black'
                overall_color = 'black'
                ci_color_subgroup = 'black'
                ci_color_overall = 'black'

            if marker_style == 'circle_diamond':
                subgroup_marker = 'o'
                overall_marker = 'D'
            elif marker_style == 'square_diamond':
                subgroup_marker = 's'
                overall_marker = 'D'
            else:  # circle_star
                subgroup_marker = 'o'
                overall_marker = '*'

            subgroup_marker_size = 6
            overall_marker_size = 8
            subgroup_ci_width = 1.5
            overall_ci_width = 2.0

            if ci_style == 'solid':
                capsize = 0
            elif ci_style == 'dashed':
                capsize = 0
            else:  # caps
                capsize = 4

            # --- CREATE FIGURE ---
            fig, ax = plt.subplots(figsize=(plot_width, plot_height))

            if transparent_bg:
                fig.patch.set_alpha(0)
                ax.patch.set_alpha(0)

            print(f"\n🎨 Plotting {num_rows} rows...")

            # --- PLOT DATA POINTS AND ERROR BARS ---
            for i, row in plot_df.iterrows():
                is_overall = (row['GroupVar'] == 'Overall')

                marker = overall_marker if is_overall else subgroup_marker
                msize = overall_marker_size if is_overall else subgroup_marker_size
                color = overall_color if is_overall else subgroup_color
                ci_color = ci_color_overall if is_overall else ci_color_subgroup
                ci_width = overall_ci_width if is_overall else subgroup_ci_width
                zorder = 5 if is_overall else 3

                linestyle = '-' if ci_style != 'dashed' else '--'

                ax.errorbar(
                    x=row['EffectSize'],
                    y=y_positions[i],
                    xerr=[[row['EffectSize'] - row['CI_Lower']],
                          [row['CI_Upper'] - row['EffectSize']]],
                    fmt='none',
                    capsize=capsize,
                    color=ci_color,
                    linewidth=ci_width,
                    linestyle=linestyle,
                    alpha=0.9,
                    zorder=zorder-1
                )

                ax.plot(
                    row['EffectSize'],
                    y_positions[i],
                    marker=marker,
                    markersize=msize,
                    markerfacecolor=color,
                    markeredgecolor='black' if color_scheme != 'bw' else 'black',
                    markeredgewidth=1.0,
                    linestyle='none',
                    zorder=zorder
                )

            # --- SET AXIS LIMITS FIRST ---
            ax.set_xlim(x_limits[0], x_limits[1])
            ax.set_ylim(y_lim_top, y_lim_bottom)  # Inverted

            final_xlims = ax.get_xlim()
            final_xrange = final_xlims[1] - final_xlims[0]

            print(f"  Final X-axis: [{final_xlims[0]:.3f}, {final_xlims[1]:.3f}]")

            # --- ADD ANNOTATIONS ---
            print(f"  Adding annotations...")

            annot_x_offset = annot_distance * final_xrange

            for i, row in plot_df.iterrows():
                is_overall = (row['GroupVar'] == 'Overall')
                font_weight = 'bold' if is_overall else 'normal'

                annot_parts = []
                if show_k:
                    annot_parts.append(f"k={int(row['k'])}")
                if show_papers and pd.notna(row['nPapers']):
                    annot_parts.append(f"({int(row['nPapers'])})")
                if show_fold_change and pd.notna(row['FoldChange']) and es_config.get('has_fold_change', False):
                    fold_sign = "+" if row['FoldChange'] > 0 else ""
                    annot_parts.append(f"[{fold_sign}{row['FoldChange']:.2f}×]")

                annotation_text = " ".join(annot_parts) if annot_parts else ""

                if annotation_text:
                    if annot_pos == 'right':
                        x_pos = row['CI_Upper'] + annot_x_offset + (annot_offset * final_xrange * 0.1)
                        y_pos = y_positions[i]
                        va = 'center'
                        ha = 'left'
                    elif annot_pos == 'above':
                        x_pos = row['EffectSize'] + (annot_offset * final_xrange * 0.1)
                        y_pos = y_positions[i] - 0.2
                        va = 'bottom'
                        ha = 'center'
                    else:  # below
                        x_pos = row['EffectSize'] + (annot_offset * final_xrange * 0.1)
                        y_pos = y_positions[i] + 0.2
                        va = 'top'
                        ha = 'center'

                    ax.text(
                        x_pos, y_pos,
                        annotation_text,
                        va=va, ha=ha,
                        fontsize=annot_fontsize,
                        fontweight=font_weight,
                        clip_on=False
                    )

            # --- ADD GROUP LABELS (TWO-WAY) ---
            if has_subgroups and analysis_type == 'two_way':
                print(f"  Adding group labels...")

                current_group = None
                first_subgroup_idx = 1 if 'Overall' in plot_df['GroupVar'].values else 0
                group_label_x_base = final_xlims[1] - (right_padding * final_xrange)

                for i, row in plot_df.iterrows():
                    group_val = str(row['GroupVar'])

                    if group_val != 'Overall' and group_val != current_group:
                        if i > first_subgroup_idx:
                            ax.axhline(
                                y=y_positions[i] - 0.5,
                                color='darkgray',
                                linewidth=0.8,
                                linestyle='-',
                                xmin=0.01,
                                xmax=0.99,
                                zorder=1
                            )

                        group_indices = plot_df[plot_df['GroupVar'] == group_val].index
                        label_y = (y_positions[group_indices[0]] + y_positions[group_indices[-1]]) / 2.0

                        label_x = group_label_x_base + (group_label_h_offset * final_xrange * 0.05)
                        label_y = label_y + group_label_v_offset

                        display_group_label = label_mapping.get(group_val, group_val)

                        ax.text(
                            label_x, label_y,
                            display_group_label,
                            va='center',
                            ha='right',
                            fontweight='bold',
                            fontsize=group_label_fontsize,
                            color='black',
                            clip_on=False
                        )

                        current_group = group_val

            # --- ADD SEPARATOR LINE BELOW OVERALL ---
            if len(plot_df) > 1:
                separator_y = y_positions[0] + 0.5
                ax.axhline(
                    y=separator_y,
                    color='black',
                    linewidth=1.5,
                    linestyle='-'
                )

            # --- CUSTOMIZE AXES ---
            print(f"  Customizing axes...")

            if show_null_line:
                ax.axvline(
                    x=0,
                    color='black',
                    linestyle='-',
                    linewidth=1.5,
                    alpha=0.8,
                    zorder=1
                )

            ax.set_xlabel(x_label, fontsize=label_fontsize, fontweight='bold')
            if show_ylabel:
                ax.set_ylabel(y_label, fontsize=label_fontsize, fontweight='bold')

            if show_title:
                ax.set_title(graph_title, fontweight='bold', fontsize=title_fontsize, pad=15)

            ax.set_yticks(y_positions)
            ax.set_yticklabels(y_tick_labels, fontsize=tick_fontsize)
            ax.tick_params(axis='x', labelsize=tick_fontsize)

            if show_grid:
                if grid_style == 'dashed_light':
                    ax.grid(axis='x', alpha=0.3, linestyle='--', linewidth=0.5)
                elif grid_style == 'dotted_light':
                    ax.grid(axis='x', alpha=0.3, linestyle=':', linewidth=0.5)
                else:  # solid_light
                    ax.grid(axis='x', alpha=0.2, linestyle='-', linewidth=0.5)

            # --- ADD FOLD-CHANGE AXIS (TOP) ---
            if show_fold_axis and es_config.get('has_fold_change', False):
                print(f"  Adding fold-change axis...")

                ax2 = ax.twiny()

                fold_ticks_lnRR = np.array([-2, -1.5, -1, -0.5, 0, 0.5, 1, 1.5, 2])
                fold_ticks_RR = np.exp(fold_ticks_lnRR)

                valid_mask = ((fold_ticks_lnRR >= final_xlims[0]) &
                             (fold_ticks_lnRR <= final_xlims[1]))
                fold_ticks_lnRR = fold_ticks_lnRR[valid_mask]
                fold_ticks_RR = fold_ticks_RR[valid_mask]

                ax2.set_xlim(final_xlims[0], final_xlims[1])
                ax2.set_xticks(fold_ticks_lnRR)

                fold_labels = []
                for rr in fold_ticks_RR:
                    if rr < 1:
                        fold_labels.append(f"{1/rr:.1f}× ↓")
                    elif rr > 1:
                        fold_labels.append(f"{rr:.1f}× ↑")
                    else:
                        fold_labels.append("1×")

                ax2.set_xticklabels(fold_labels, fontsize=tick_fontsize)
                ax2.set_xlabel("Fold-Change", fontsize=label_fontsize, fontweight='bold')

            # --- FINALIZE PLOT ---
            fig.tight_layout()

            # --- SAVE FILES ---
            print(f"\n💾 Saving files...")

            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            base_filename = f"{filename_prefix}_{plot_model}_{timestamp}"

            saved_files = []

            if save_pdf:
                pdf_filename = f"{base_filename}.pdf"
                fig.savefig(pdf_filename, bbox_inches='tight', transparent=transparent_bg)
                saved_files.append(pdf_filename)
                print(f"  ✓ {pdf_filename}")

            if save_png:
                png_filename = f"{base_filename}.png"
                fig.savefig(png_filename, dpi=png_dpi, bbox_inches='tight', transparent=transparent_bg)
                saved_files.append(png_filename)
                print(f"  ✓ {png_filename} (DPI: {png_dpi})")

            plt.show()

            print(f"\n" + "="*70)
            print("✅ FOREST PLOT COMPLETE")
            print("="*70)
            print(f"Files: {', '.join(saved_files)}")

        except Exception as e:
            print(f"\n❌ ERROR: {e}")
            import traceback
            traceback.print_exc()

# --- 4. CREATE BUTTON AND DISPLAY ---
plot_button = widgets.Button(
    description='📊 Generate Forest Plot',
    button_style='success',
    layout=widgets.Layout(width='450px', height='50px'),
    style={'font_weight': 'bold', 'font_size': '14px'}
)

plot_button.on_click(generate_plot)

print("\n" + "="*70)
print("✅ FOREST PLOT INTERFACE READY")
print("="*70)
print("👆 Customize your plot using the tabs above, then click Generate")
print("\n📝 Tips:")
print("  • Use the 'Labels' tab to rename coded variables")
print("  • Auto-scale considers ALL data points for proper spacing")
print("  • Annotations and group labels will fit within the plot")
print("="*70 + "\n")

display(widgets.VBox([
    widgets.HTML("<h3 style='color: #2E86AB;'>📊 Forest Plot Generator</h3>"),
    widgets.HTML("<p style='color: #666;'>Create publication-ready forest plots with full customization</p>"),
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    tab,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    plot_button,
    plot_output
]))

FOREST PLOT CONFIGURATION
✓ Analysis type: single
✓ Has subgroups: True
✓ Configuration loaded successfully

🔍 Identifying labels for editor...
  ✓ Found 3 unique labels
  ✓ Label editor created

✅ FOREST PLOT INTERFACE READY
👆 Customize your plot using the tabs above, then click Generate

📝 Tips:
  • Use the 'Labels' tab to rename coded variables
  • Auto-scale considers ALL data points for proper spacing
  • Annotations and group labels will fit within the plot



VBox(children=(HTML(value="<h3 style='color: #2E86AB;'>📊 Forest Plot Generator</h3>"), HTML(value="<p style='c…

In [None]:
#@title 📈 META-REGRESSION ANALYSIS

# =============================================================================
# CELL 10: META-REGRESSION ANALYSIS (Fixed & Mixed Effects)
# Purpose: Test continuous moderators and their relationship with effect sizes
# Dependencies: Cell 6 (overall_results), Cell 5 (data_filtered)
# Outputs: Regression coefficients, R², heterogeneity partitioning
# =============================================================================

import statsmodels.api as sm
from scipy.stats import chi2
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
import datetime
import numpy as np
import pandas as pd

print("="*70)
print("META-REGRESSION CONFIGURATION")
print("="*70)

# --- 1. LOAD DATA AND IDENTIFY MODERATORS ---
try:
    if 'ANALYSIS_CONFIG' not in locals() and 'ANALYSIS_CONFIG' not in globals():
        raise NameError("ANALYSIS_CONFIG not found.")

    # The data is in global scope as 'data_filtered', not in ANALYSIS_CONFIG
    if 'data_filtered' in globals():
        analysis_data = data_filtered.copy()
        print(f"✓ Found data: 'data_filtered' with {len(analysis_data)} observations")
    elif 'analysis_data_subgroups' in globals():
        analysis_data = analysis_data_subgroups.copy()
        print(f"✓ Found data: 'analysis_data_subgroups' with {len(analysis_data)} observations")
    elif 'raw_data' in globals():
        analysis_data = raw_data.copy()
        print(f"✓ Found data: 'raw_data' with {len(analysis_data)} observations")
    else:
        raise ValueError("Cannot find analysis data (data_filtered, analysis_data_subgroups, or raw_data)")

    # Store it in ANALYSIS_CONFIG for future use
    ANALYSIS_CONFIG['analysis_data'] = analysis_data

    # Check for required configuration
    overall_results = ANALYSIS_CONFIG.get('overall_results', {})
    if not overall_results:
        raise ValueError("'overall_results' not found. Run Cell 6 (Overall Analysis) first.")

    # Get effect size configuration
    effect_col = ANALYSIS_CONFIG['effect_col']
    var_col = ANALYSIS_CONFIG['var_col']
    se_col = ANALYSIS_CONFIG['se_col']

    print(f"✓ Effect column: {effect_col}")
    print(f"✓ Variance column: {var_col}")

    # Identify numeric moderators
    excluded_cols = [
        effect_col, var_col, se_col,
        'w_fixed', 'w_random', 'id',
        'xe', 'sde', 'ne', 'xc', 'sdc', 'nc',
        ANALYSIS_CONFIG.get('ci_lower_col'),
        ANALYSIS_CONFIG.get('ci_upper_col')
    ]

    # Remove None values from excluded list
    excluded_cols = [col for col in excluded_cols if col is not None]

    # Find numeric columns that could be moderators
    all_cols = analysis_data.columns.tolist()
    potential_moderators = []

    print(f"\n🔍 Scanning for numeric moderators...")
    for col in all_cols:
        if col not in excluded_cols:
            # Check if column is numeric or can be converted to numeric
            try:
                temp_numeric = pd.to_numeric(analysis_data[col], errors='coerce')
                n_valid = temp_numeric.notna().sum()
                n_unique = temp_numeric.nunique()

                # Only include if has valid values and some variability
                if n_valid >= 3 and n_unique >= 3:  # Need at least 3 unique values
                    potential_moderators.append(col)
                    print(f"  ✓ {col}: {n_valid} valid values, {n_unique} unique")
            except:
                pass

    if not potential_moderators:
        print("\n⚠️  No suitable numeric moderators found")
        print("   Moderators need:")
        print("   • At least 3 valid numeric values")
        print("   • At least 3 unique values")
        print("\n   Available columns:", all_cols[:20])
        raise ValueError("No numeric moderator columns found")

    print(f"\n✓ Found {len(potential_moderators)} potential moderators")

    # Show moderators
    if len(potential_moderators) <= 15:
        print(f"  Available: {', '.join(potential_moderators)}")
    else:
        print(f"  Available: {', '.join(potential_moderators[:15])}... and {len(potential_moderators)-15} more")

except (NameError, KeyError, ValueError) as e:
    print(f"❌ ERROR: {e}")
    print("\n💡 Please ensure:")
    print("   1. Cell 4 (Configuration) has been run")
    print("   2. Cell 5 (Data Preparation) has been run")
    print("   3. Cell 6 (Overall Analysis) has been run")
    potential_moderators = []

# --- 2. CREATE WIDGETS ---

header = widgets.HTML(
    "<h3 style='color: #2E86AB;'>Meta-Regression Setup</h3>"
    "<p style='color: #666;'><i>Test how continuous moderators relate to effect sizes</i></p>"
)

model_type_widget = widgets.RadioButtons(
    options=[
        ('Fixed-Effects (FE)', 'FE'),
        ('Mixed-Effects (RE)', 'RE')
    ],
    value='RE',
    description='Model Type:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

model_help = widgets.HTML(
    "<div style='background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 5px;'>"
    "<b>Model Selection Guide:</b><br>"
    "• <b>Fixed-Effects (FE):</b> Assumes all studies share a true effect size. "
    "Use when heterogeneity is low (I² < 25%).<br>"
    "• <b>Mixed-Effects (RE):</b> Accounts for between-study variability. "
    "Recommended for most meta-analyses (I² ≥ 25%)."
    "</div>"
)

moderator_widget = widgets.Dropdown(
    options=potential_moderators if potential_moderators else ['No moderators available'],
    value=potential_moderators[0] if potential_moderators else 'No moderators available',
    description='Moderator:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px'),
    disabled=not bool(potential_moderators)
)

show_info_button = widgets.Button(
    description='📊 Show Moderator Info',
    button_style='info',
    layout=widgets.Layout(width='200px'),
    disabled=not bool(potential_moderators)
)

info_output = widgets.Output()

def show_moderator_info(b):
    with info_output:
        clear_output()
        if moderator_widget.value and moderator_widget.value != 'No moderators available':
            try:
                mod_col = moderator_widget.value
                mod_data = pd.to_numeric(analysis_data[mod_col], errors='coerce').dropna()

                if len(mod_data) > 0:
                    print(f"\n📊 Moderator: {mod_col}")
                    print(f"  Valid observations: {len(mod_data)}")
                    print(f"  Missing: {len(analysis_data) - len(mod_data)}")
                    print(f"  Range: [{mod_data.min():.3f}, {mod_data.max():.3f}]")
                    print(f"  Mean: {mod_data.mean():.3f}")
                    print(f"  Median: {mod_data.median():.3f}")
                    print(f"  SD: {mod_data.std():.3f}")

                    print(f"\n  Distribution:")
                    quartiles = mod_data.quantile([0.25, 0.5, 0.75])
                    print(f"    25th percentile: {quartiles[0.25]:.3f}")
                    print(f"    50th percentile: {quartiles[0.50]:.3f}")
                    print(f"    75th percentile: {quartiles[0.75]:.3f}")
                else:
                    print(f"⚠️  No valid numeric data found for {mod_col}")
            except Exception as e:
                print(f"❌ Error: {e}")

show_info_button.on_click(show_moderator_info)

run_button = widgets.Button(
    description='▶ Run Meta-Regression',
    button_style='success',
    layout=widgets.Layout(width='450px', height='50px'),
    style={'font_weight': 'bold'},
    disabled=not bool(potential_moderators)
)

regression_output = widgets.Output()

# --- 3. DEFINE META-REGRESSION FUNCTION ---

def run_meta_regression(b):
    """Perform meta-regression analysis"""
    with regression_output:
        clear_output(wait=True)

        print("\n" + "="*70)
        print("META-REGRESSION ANALYSIS")
        print("="*70)
        print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

        try:
            # --- LOAD CONFIGURATION ---
            moderator_col_name = moderator_widget.value
            model_type = model_type_widget.value

            if not moderator_col_name or moderator_col_name == 'No moderators available':
                print("❌ ERROR: No valid moderator selected")
                return

            # Get data
            if 'analysis_data' in ANALYSIS_CONFIG:
                analysis_data_local = ANALYSIS_CONFIG['analysis_data']
            elif 'data_filtered' in globals():
                analysis_data_local = data_filtered
            else:
                print("❌ ERROR: Cannot find analysis data")
                return

            effect_col = ANALYSIS_CONFIG['effect_col']
            var_col = ANALYSIS_CONFIG['var_col']
            es_config = ANALYSIS_CONFIG['es_config']
            overall_results = ANALYSIS_CONFIG['overall_results']

            print(f"📋 Configuration:")
            print(f"  Model: {model_type}")
            print(f"  Effect size: {es_config['effect_label']} ({effect_col})")
            print(f"  Moderator: {moderator_col_name}")

            Qt_overall = overall_results['Qt']
            I_sq_overall = overall_results['I_squared']
            tau_sq_overall = overall_results['tau_squared']
            k_overall = overall_results['k']

            print(f"  Overall: k={k_overall}, I²={I_sq_overall:.1f}%, τ²={tau_sq_overall:.4f}")

            if model_type == 'FE' and I_sq_overall > 25:
                print(f"\n⚠️  WARNING: High heterogeneity (I²={I_sq_overall:.1f}%)")
                print(f"   Consider Mixed-Effects (RE) model")

            # --- PREPARE DATA ---
            print(f"\n📊 Preparing regression data...")

            # Check if required columns exist
            required_cols = [effect_col, var_col, moderator_col_name, 'id']
            missing_cols = [col for col in required_cols if col not in analysis_data_local.columns]

            if missing_cols:
                print(f"❌ ERROR: Missing columns: {missing_cols}")
                print(f"   Available columns: {list(analysis_data_local.columns)[:20]}")
                return

            # Check if w_fixed exists, if not calculate it
            if 'w_fixed' not in analysis_data_local.columns:
                print("  ℹ️  Calculating fixed-effects weights...")
                analysis_data_local['w_fixed'] = 1 / analysis_data_local[var_col]

            reg_df = analysis_data_local[[effect_col, var_col, 'w_fixed', moderator_col_name, 'id']].copy()

            # Convert moderator to numeric
            reg_df[moderator_col_name] = pd.to_numeric(reg_df[moderator_col_name], errors='coerce')

            initial_n = len(reg_df)

            # Remove missing data
            reg_df.dropna(subset=[effect_col, var_col, 'w_fixed', moderator_col_name], inplace=True)
            reg_df = reg_df[reg_df['w_fixed'] > 0]
            reg_df = reg_df[np.isfinite(reg_df['w_fixed'])]

            k_reg = len(reg_df)
            n_dropped = initial_n - k_reg

            if n_dropped > 0:
                print(f"  ⚠️  Dropped {n_dropped} observations with missing/invalid data")
                retention_pct = (k_reg / k_overall) * 100
                print(f"  Retained: {k_reg}/{k_overall} ({retention_pct:.1f}%)")
            else:
                print(f"  ✓ Using all {k_reg} observations")

            # Check if sufficient data
            p_reg = 2  # intercept + 1 slope
            if k_reg < p_reg + 1:
                print(f"\n❌ ERROR: Insufficient data for regression")
                print(f"   Need at least {p_reg + 1} observations, have {k_reg}")
                return

            # Show moderator distribution
            mod_values = reg_df[moderator_col_name]
            print(f"\n  Moderator statistics:")
            print(f"    Range: [{mod_values.min():.3f}, {mod_values.max():.3f}]")
            print(f"    Mean: {mod_values.mean():.3f}, SD: {mod_values.std():.3f}")

            # --- PREPARE REGRESSION VARIABLES ---
            y = reg_df[effect_col]
            X = reg_df[[moderator_col_name]]
            X = sm.add_constant(X, prepend=True)

            # --- RUN MODEL ---
            if model_type == 'FE':
                print(f"\n🔧 Running Fixed-Effects meta-regression...")
                weights = reg_df['w_fixed']
                meta_reg_model = sm.WLS(y, X, weights=weights).fit()

            else:  # RE
                print(f"\n🔧 Running Mixed-Effects meta-regression...")
                print(f"  Using τ²={tau_sq_overall:.4f} from overall model")

                # Calculate mixed-effects weights
                reg_df['w_mixed'] = 1 / (reg_df[var_col] + tau_sq_overall)
                reg_df = reg_df[reg_df['w_mixed'] > 0]
                reg_df = reg_df[np.isfinite(reg_df['w_mixed'])]

                k_reg = len(reg_df)
                if k_reg < p_reg + 1:
                    print(f"\n❌ ERROR: Insufficient data after calculating mixed weights")
                    return

                # Update y, X with filtered data
                y = reg_df[effect_col]
                X = reg_df[[moderator_col_name]]
                X = sm.add_constant(X, prepend=True)
                weights = reg_df['w_mixed']

                meta_reg_model = sm.WLS(y, X, weights=weights).fit()

            # --- EXTRACT RESULTS ---
            b0 = meta_reg_model.params['const']
            b1 = meta_reg_model.params[moderator_col_name]

            se0 = meta_reg_model.bse['const']
            se1 = meta_reg_model.bse[moderator_col_name]

            p0 = meta_reg_model.pvalues['const']
            p1 = meta_reg_model.pvalues[moderator_col_name]

            ci0 = meta_reg_model.conf_int().loc['const'].values
            ci1 = meta_reg_model.conf_int().loc[moderator_col_name].values

            # --- DISPLAY RESULTS ---
            print("\n" + "="*70)
            print(f"{model_type} META-REGRESSION RESULTS")
            print("="*70)

            print(f"\n📐 Regression Model:")
            sign = "+" if b1 >= 0 else ""
            print(f"   {effect_col} = {b0:.4f} {sign} {b1:.4f} × {moderator_col_name}")

            print(f"\n📊 Coefficients:")
            print(f"  {'Parameter':<20} {'Estimate':<12} {'SE':<10} {'95% CI':<25} {'P-value':<10} {'Sig':<5}")
            print(f"  {'-'*20} {'-'*12} {'-'*10} {'-'*25} {'-'*10} {'-'*5}")

            sig0 = "***" if p0 < 0.001 else "**" if p0 < 0.01 else "*" if p0 < 0.05 else "ns"
            print(f"  {'Intercept':<20} {b0:>11.4f} {se0:>10.4f} [{ci0[0]:>7.4f}, {ci0[1]:>7.4f}] {p0:>10.4g} {sig0:<5}")

            sig1 = "***" if p1 < 0.001 else "**" if p1 < 0.01 else "*" if p1 < 0.05 else "ns"
            print(f"  {moderator_col_name:<20} {b1:>11.4f} {se1:>10.4f} [{ci1[0]:>7.4f}, {ci1[1]:>7.4f}] {p1:>10.4g} {sig1:<5}")

            print(f"\n  Significance: *** p<0.001, ** p<0.01, * p<0.05, ns = not significant")

            print(f"\n💡 Interpretation:")
            print(f"  • Intercept (b₀): Predicted effect when {moderator_col_name} = 0")
            if p1 < 0.05:
                direction = "increases" if b1 > 0 else "decreases"
                print(f"  • Slope (b₁): Effect size {direction} by {abs(b1):.4f} per unit")
                print(f"    increase in {moderator_col_name} (p={p1:.4g})")
                print(f"  ✓ SIGNIFICANT MODERATOR EFFECT")
            else:
                print(f"  • Slope (b₁): No significant relationship (p={p1:.4g})")

            # --- HETEROGENEITY PARTITIONING ---
            print(f"\n" + "="*70)
            print("HETEROGENEITY ANALYSIS")
            print("="*70)

            if model_type == 'FE':
                pooled_effect_subset = (reg_df['w_fixed'] * reg_df[effect_col]).sum() / reg_df['w_fixed'].sum()
                QT_subset = (reg_df['w_fixed'] * (reg_df[effect_col] - pooled_effect_subset)**2).sum()
                df_QT = k_reg - 1

                QE = meta_reg_model.ssr
                df_QE = k_reg - p_reg

                QM = max(0, QT_subset - QE)
                df_QM = p_reg - 1

                p_QM = 1 - chi2.cdf(QM, df_QM) if df_QM > 0 else np.nan
                p_QE = 1 - chi2.cdf(QE, df_QE) if df_QE > 0 else np.nan

                R_sq = (QM / QT_subset) * 100 if QT_subset > 0 else 0

                print(f"\n📊 Decomposition:")
                print(f"  {'Component':<30} {'Q':<12} {'df':<8} {'P-value':<10}")
                print(f"  {'-'*30} {'-'*12} {'-'*8} {'-'*10}")
                print(f"  {'Total (Q_T)':<30} {QT_subset:>11.4f} {df_QT:>8} {'-':<10}")
                print(f"  {'Model (Q_M)':<30} {QM:>11.4f} {df_QM:>8} {p_QM:>10.4g}")
                print(f"  {'Residual (Q_E)':<30} {QE:>11.4f} {df_QE:>8} {p_QE:>10.4g}")

                print(f"\n📈 Variance Explained: R² = {R_sq:.2f}%")

                if R_sq > 75:
                    print(f"  🟢 Excellent - moderator explains most heterogeneity")
                elif R_sq > 50:
                    print(f"  🟢 Good - major source of heterogeneity")
                elif R_sq > 25:
                    print(f"  🟡 Moderate - partially explains heterogeneity")
                else:
                    print(f"  🟠 Low - other factors may be important")

                if p_QE < 0.05:
                    print(f"\n  ⚠️  Significant residual heterogeneity (p={p_QE:.4g})")
                else:
                    print(f"\n  ✓ No significant residual heterogeneity (p={p_QE:.4g})")

            else:  # RE
                QE = meta_reg_model.ssr
                df_QE = k_reg - p_reg
                p_QE = 1 - chi2.cdf(QE, df_QE) if df_QE > 0 else np.nan

                print(f"\n📊 Residual Heterogeneity:")
                print(f"  • Q_E = {QE:.4f} (df={df_QE})")
                print(f"  • P-value = {p_QE:.4g}")

                if p_QE < 0.10:
                    print(f"  ⚠️  Significant residual heterogeneity remains")
                else:
                    print(f"  ✓ No significant residual heterogeneity")

                pooled_effect_subset = (reg_df['w_fixed'] * reg_df[effect_col]).sum() / reg_df['w_fixed'].sum()
                QT_subset = (reg_df['w_fixed'] * (reg_df[effect_col] - pooled_effect_subset)**2).sum()

                if QT_subset > 0:
                    QM_approx = max(0, QT_subset - QE)
                    R_sq = (QM_approx / QT_subset) * 100
                    print(f"\n📈 Approximate R² ≈ {R_sq:.2f}%")
                else:
                    R_sq = np.nan
                    print(f"\n  Cannot calculate R²")

            # --- MODEL FIT ---
            print(f"\n" + "="*70)
            print("MODEL FIT")
            print("="*70)

            print(f"\n📊 Statistics:")
            print(f"  • Observations (k): {k_reg}")
            print(f"  • Parameters (p): {p_reg}")
            print(f"  • R²: {R_sq:.2f}%" if not np.isnan(R_sq) else "  • R²: N/A")
            print(f"  • AIC: {meta_reg_model.aic:.2f}")
            print(f"  • BIC: {meta_reg_model.bic:.2f}")

            # --- SAVE RESULTS ---
            ANALYSIS_CONFIG['meta_regression_results'] = {
                'timestamp': datetime.datetime.now(),
                'model_type': model_type,
                'reg_df': reg_df,
                'meta_reg_model': meta_reg_model,
                'moderator_col_name': moderator_col_name,
                'effect_col': effect_col,
                'b0_intercept': b0,
                'b1_slope': b1,
                'se_slope': se1,
                'p_slope': p1,
                'ci_slope': ci1,
                'R_squared': R_sq,
                'p_value_QE': p_QE if 'p_QE' in locals() else np.nan,
                'k_reg': k_reg
            }

            print(f"\n" + "="*70)
            print("✅ META-REGRESSION COMPLETE")
            print("="*70)
            print(f"✓ Results saved to ANALYSIS_CONFIG['meta_regression_results']")
            print(f"▶️  Ready for plotting! Run the next cell to visualize")

        except Exception as e:
            print(f"\n❌ ERROR: {e}")
            import traceback
            traceback.print_exc()

# --- 4. LINK BUTTON AND DISPLAY ---
run_button.on_click(run_meta_regression)

if potential_moderators:
    print("\n" + "="*70)
    print("✅ META-REGRESSION INTERFACE READY")
    print("="*70)
    print("👆 Select model type and moderator, then click Run\n")
else:
    print("\n⚠️  No moderators available - check data requirements\n")

display(widgets.VBox([
    header,
    model_help,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    model_type_widget,
    moderator_widget,
    widgets.HBox([show_info_button]),
    info_output,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    run_button,
    regression_output
]))

META-REGRESSION CONFIGURATION
✓ Found data: 'data_filtered' with 69 observations
✓ Effect column: hedges_g
✓ Variance column: Vg

🔍 Scanning for numeric moderators...
  ✓ kgPot: 68 valid values, 10 unique
  ✓ plantsPot: 66 valid values, 6 unique
  ✓ plantsKg: 66 valid values, 15 unique
  ✓ plantsKg2: 66 valid values, 16 unique
  ✓ cv_e: 13 valid values, 11 unique
  ✓ cv_c: 14 valid values, 4 unique
  ✓ sde_imputed: 69 valid values, 64 unique
  ✓ sdc_imputed: 69 valid values, 31 unique
  ✓ df: 69 valid values, 5 unique
  ✓ sp_squared: 69 valid values, 66 unique
  ✓ sp: 69 valid values, 66 unique
  ✓ cohen_d: 69 valid values, 68 unique
  ✓ hedges_j: 69 valid values, 5 unique
  ✓ ci_width: 69 valid values, 69 unique

✓ Found 14 potential moderators
  Available: kgPot, plantsPot, plantsKg, plantsKg2, cv_e, cv_c, sde_imputed, sdc_imputed, df, sp_squared, sp, cohen_d, hedges_j, ci_width

✅ META-REGRESSION INTERFACE READY
👆 Select model type and moderator, then click Run



VBox(children=(HTML(value="<h3 style='color: #2E86AB;'>Meta-Regression Setup</h3><p style='color: #666;'><i>Te…

In [None]:
#@title 📈 META-REGRESSION PLOT

# =============================================================================
# CELL 11: META-REGRESSION PLOT (Publication-Ready)
# Purpose: Visualize relationship between moderator and effect sizes
# Dependencies: Cell 10 (meta_regression_results)
# Outputs: PDF and PNG scatter plot with regression line
# =============================================================================

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import statsmodels.api as sm
import datetime
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from matplotlib.patches import Patch

print("="*70)
print("META-REGRESSION PLOT CONFIGURATION")
print("="*70)

# --- 1. LOAD CONFIGURATION ---
try:
    if 'ANALYSIS_CONFIG' not in locals() and 'ANALYSIS_CONFIG' not in globals():
        raise NameError("ANALYSIS_CONFIG not found.")

    # Load results from meta-regression analysis
    if 'meta_regression_results' not in ANALYSIS_CONFIG:
        raise KeyError("meta_regression_results not found. Run Cell 10 first.")

    reg_results = ANALYSIS_CONFIG['meta_regression_results']
    es_config = ANALYSIS_CONFIG['es_config']

    # Get data
    if 'analysis_data' in ANALYSIS_CONFIG:
        analysis_data = ANALYSIS_CONFIG['analysis_data']
    elif 'data_filtered' in globals():
        analysis_data = data_filtered
    else:
        raise ValueError("Cannot find analysis data")

    # Get names
    moderator_col_name = reg_results['moderator_col_name']
    effect_col = reg_results['effect_col']
    effect_label = es_config['effect_label']

    # Set defaults
    default_title = f"Meta-Regression: {effect_label} vs. {moderator_col_name}"
    default_x_label = moderator_col_name
    default_y_label = effect_label

    # Find categorical moderators for color coding
    excluded_cols = [
        ANALYSIS_CONFIG['effect_col'],
        ANALYSIS_CONFIG['var_col'],
        ANALYSIS_CONFIG['se_col'],
        'w_fixed', 'w_random', 'id',
        'xe', 'sde', 'ne', 'xc', 'sdc', 'nc',
        ANALYSIS_CONFIG.get('ci_lower_col'),
        ANALYSIS_CONFIG.get('ci_upper_col')
    ]
    excluded_cols = [col for col in excluded_cols if col is not None]

    categorical_cols = analysis_data.select_dtypes(include=['object', 'category']).columns
    available_color_moderators = ["None"] + [
        col for col in categorical_cols
        if col not in excluded_cols and analysis_data[col].nunique() <= 10
    ]

    print(f"✓ Moderator: {moderator_col_name}")
    print(f"✓ Effect size: {effect_label}")
    print(f"✓ Found {len(available_color_moderators)-1} categorical moderators for color")
    print(f"✓ Configuration loaded successfully")

except (KeyError, NameError, ValueError) as e:
    print(f"❌ ERROR: {e}")
    print("   Please run Cell 10 (Meta-Regression) first")
    raise

# --- 2. CREATE WIDGETS ---

# Header
header = widgets.HTML(
    "<h3 style='color: #2E86AB;'>Meta-Regression Plot Setup</h3>"
    "<p style='color: #666;'><i>Visualize the relationship between moderator and effect size</i></p>"
)

# ========== TAB 1: PLOT STYLE ==========
style_header = widgets.HTML("<h3 style='color: #2E86AB;'>Plot Style</h3>")

show_title_widget = widgets.Checkbox(
    value=True,
    description='Show Plot Title',
    indent=False,
    layout=widgets.Layout(width='450px')
)

title_widget = widgets.Text(
    value=default_title,
    description='Plot Title:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '120px'}
)

xlabel_widget = widgets.Text(
    value=default_x_label,
    description='X-Axis Label:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '120px'}
)

ylabel_widget = widgets.Text(
    value=default_y_label,
    description='Y-Axis Label:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '120px'}
)

width_widget = widgets.FloatSlider(
    value=8.0, min=5.0, max=14.0, step=0.5,
    description='Plot Width (in):',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

height_widget = widgets.FloatSlider(
    value=6.0, min=4.0, max=12.0, step=0.5,
    description='Plot Height (in):',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

style_tab = widgets.VBox([
    style_header,
    show_title_widget,
    title_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    xlabel_widget,
    ylabel_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    width_widget,
    height_widget
])

# ========== TAB 2: DATA POINTS ==========
points_header = widgets.HTML("<h3 style='color: #2E86AB;'>Data Points</h3>")

color_mod_widget = widgets.Dropdown(
    options=available_color_moderators,
    value='None',
    description='Color By:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

bubble_base_widget = widgets.IntSlider(
    value=20, min=0, max=200, step=10,
    description='Min Bubble Size:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

bubble_range_widget = widgets.IntSlider(
    value=800, min=100, max=2000, step=100,
    description='Max Bubble Size:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

bubble_alpha_widget = widgets.FloatSlider(
    value=0.6, min=0.1, max=1.0, step=0.1,
    description='Transparency:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

point_color_widget = widgets.Dropdown(
    options=['gray', 'blue', 'red', 'green', 'purple', 'orange'],
    value='gray',
    description='Point Color:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

# This will be hidden when color moderator is selected
def update_point_color_visibility(change):
    if change['new'] == 'None':
        point_color_widget.layout.visibility = 'visible'
    else:
        point_color_widget.layout.visibility = 'hidden'

color_mod_widget.observe(update_point_color_visibility, names='value')

points_tab = widgets.VBox([
    points_header,
    color_mod_widget,
    point_color_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    widgets.HTML("<b>Bubble Size (by precision):</b>"),
    bubble_base_widget,
    bubble_range_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    bubble_alpha_widget
])

# ========== TAB 3: REGRESSION LINE ==========
regline_header = widgets.HTML("<h3 style='color: #2E86AB;'>Regression Line</h3>")

show_ci_widget = widgets.Checkbox(
    value=True,
    description='Show 95% Confidence Band',
    indent=False,
    layout=widgets.Layout(width='450px')
)

line_color_widget = widgets.Dropdown(
    options=['red', 'blue', 'black', 'green', 'purple'],
    value='red',
    description='Line Color:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

line_width_widget = widgets.FloatSlider(
    value=2.0, min=0.5, max=5.0, step=0.5,
    description='Line Width:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

ci_alpha_widget = widgets.FloatSlider(
    value=0.3, min=0.1, max=0.8, step=0.1,
    description='CI Transparency:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

show_equation_widget = widgets.Checkbox(
    value=True,
    description='Show Regression Equation',
    indent=False,
    layout=widgets.Layout(width='450px')
)

show_r2_widget = widgets.Checkbox(
    value=True,
    description='Show R² Value',
    indent=False,
    layout=widgets.Layout(width='450px')
)

regline_tab = widgets.VBox([
    regline_header,
    line_color_widget,
    line_width_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    show_ci_widget,
    ci_alpha_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    show_equation_widget,
    show_r2_widget
])

# ========== TAB 4: LAYOUT ==========
layout_header = widgets.HTML("<h3 style='color: #2E86AB;'>Layout & Legend</h3>")

show_grid_widget = widgets.Checkbox(
    value=True,
    description='Show Grid',
    indent=False,
    layout=widgets.Layout(width='450px')
)

show_null_line_widget = widgets.Checkbox(
    value=True,
    description='Show Null Effect Line (y=0)',
    indent=False,
    layout=widgets.Layout(width='450px')
)

legend_loc_widget = widgets.Dropdown(
    options=['best', 'upper right', 'upper left', 'lower left', 'lower right',
             'right', 'center left', 'lower center', 'upper center', 'center'],
    value='best',
    description='Legend Position:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

legend_fontsize_widget = widgets.IntSlider(
    value=10, min=6, max=14, step=1,
    description='Legend Font:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

layout_tab = widgets.VBox([
    layout_header,
    show_grid_widget,
    show_null_line_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    legend_loc_widget,
    legend_fontsize_widget
])

# ========== TAB 5: EXPORT ==========
export_header = widgets.HTML("<h3 style='color: #2E86AB;'>Export Options</h3>")

save_pdf_widget = widgets.Checkbox(
    value=True,
    description='Save as PDF',
    indent=False,
    layout=widgets.Layout(width='450px')
)

save_png_widget = widgets.Checkbox(
    value=True,
    description='Save as PNG',
    indent=False,
    layout=widgets.Layout(width='450px')
)

png_dpi_widget = widgets.IntSlider(
    value=300, min=150, max=600, step=50,
    description='PNG DPI:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

filename_prefix_widget = widgets.Text(
    value='MetaRegression',
    description='Filename Prefix:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '120px'}
)

transparent_bg_widget = widgets.Checkbox(
    value=False,
    description='Transparent Background',
    indent=False,
    layout=widgets.Layout(width='450px')
)

export_tab = widgets.VBox([
    export_header,
    save_pdf_widget,
    save_png_widget,
    png_dpi_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    filename_prefix_widget,
    transparent_bg_widget
])

# Create tabs
tab_children = [style_tab, points_tab, regline_tab, layout_tab, export_tab]
tab = widgets.Tab(children=tab_children)
tab.set_title(0, '🎨 Style')
tab.set_title(1, '⚫ Points')
tab.set_title(2, '📈 Regression')
tab.set_title(3, '📐 Layout')
tab.set_title(4, '💾 Export')

# Continue to Part 2...
# --- 3. DEFINE PLOT GENERATION FUNCTION ---
plot_output = widgets.Output()

def generate_regression_plot(b):
    """Generate meta-regression scatter plot with regression line"""
    with plot_output:
        clear_output(wait=True)

        print("\n" + "="*70)
        print("GENERATING META-REGRESSION PLOT")
        print("="*70)
        print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

        try:
            # --- GET WIDGET VALUES ---
            show_title = show_title_widget.value
            graph_title = title_widget.value
            x_label = xlabel_widget.value
            y_label = ylabel_widget.value
            plot_width = width_widget.value
            plot_height = height_widget.value

            color_mod = color_mod_widget.value
            point_color = point_color_widget.value
            bubble_base = bubble_base_widget.value
            bubble_range = bubble_range_widget.value
            bubble_alpha = bubble_alpha_widget.value

            show_ci = show_ci_widget.value
            line_color = line_color_widget.value
            line_width = line_width_widget.value
            ci_alpha = ci_alpha_widget.value
            show_equation = show_equation_widget.value
            show_r2 = show_r2_widget.value

            show_grid = show_grid_widget.value
            show_null_line = show_null_line_widget.value
            legend_loc = legend_loc_widget.value
            legend_fontsize = legend_fontsize_widget.value

            save_pdf = save_pdf_widget.value
            save_png = save_png_widget.value
            png_dpi = png_dpi_widget.value
            filename_prefix = filename_prefix_widget.value
            transparent_bg = transparent_bg_widget.value

            print(f"📊 Configuration:")
            print(f"  Plot size: {plot_width}\" × {plot_height}\"")
            print(f"  Color by: {color_mod}")

            # --- LOAD DATA ---
            reg_results = ANALYSIS_CONFIG['meta_regression_results']
            moderator_col_name = reg_results['moderator_col_name']
            effect_col = reg_results['effect_col']
            meta_reg_model = reg_results['meta_reg_model']
            plot_data = reg_results['reg_df'].copy()
            var_col = ANALYSIS_CONFIG['var_col']

            b0 = reg_results['b0_intercept']
            b1 = reg_results['b1_slope']
            R_sq = reg_results['R_squared']
            p_slope = reg_results['p_slope']

            print(f"  Regression: y = {b0:.4f} + {b1:.4f}x")
            print(f"  R² = {R_sq:.2f}%, p = {p_slope:.4g}")

            if plot_data.empty:
                print("❌ ERROR: No data to plot")
                return

            print(f"  Data points: {len(plot_data)}")

            # --- PREPARE DATA ---
            # Calculate bubble sizes based on precision
            plot_data['Precision'] = 1 / plot_data[var_col]
            min_prec = plot_data['Precision'].min()
            max_prec = plot_data['Precision'].max()

            if max_prec > min_prec:
                plot_data['BubbleSize'] = bubble_base + (
                    ((plot_data['Precision'] - min_prec) / (max_prec - min_prec)) * bubble_range
                )
            else:
                plot_data['BubbleSize'] = bubble_base + bubble_range / 2

            print(f"  Bubble sizes: {plot_data['BubbleSize'].min():.0f} to {plot_data['BubbleSize'].max():.0f}")

            # --- HANDLE COLOR CODING ---
            c_values = point_color  # Default single color
            cmap = None
            norm = None
            unique_cats = []

            if color_mod != 'None':
                try:
                    # Get analysis data
                    if 'analysis_data' in ANALYSIS_CONFIG:
                        full_data = ANALYSIS_CONFIG['analysis_data']
                    elif 'data_filtered' in globals():
                        full_data = data_filtered
                    else:
                        raise ValueError("Cannot find analysis data")

                    # Merge color moderator data
                    if color_mod in full_data.columns:
                        color_data = full_data[['id', color_mod]].copy()
                        plot_data = pd.merge(plot_data, color_data, on='id', how='left')

                        # Handle missing values and convert to categories
                        plot_data[color_mod] = plot_data[color_mod].fillna('N/A').astype(str)
                        plot_data['color_codes'], unique_cats = pd.factorize(plot_data[color_mod])

                        c_values = plot_data['color_codes']
                        cmap = 'tab10'
                        norm = plt.Normalize(vmin=0, vmax=len(unique_cats)-1)

                        print(f"  Color categories: {len(unique_cats)} ({', '.join(unique_cats[:5])}{'...' if len(unique_cats) > 5 else ''})")
                    else:
                        print(f"  ⚠️  Color moderator '{color_mod}' not found, using default")
                        color_mod = 'None'

                except Exception as e:
                    print(f"  ⚠️  Error applying color: {e}")
                    print(f"     Using default color")
                    c_values = point_color
                    color_mod = 'None'

            # --- CREATE FIGURE ---
            fig, ax = plt.subplots(figsize=(plot_width, plot_height))

            if transparent_bg:
                fig.patch.set_alpha(0)
                ax.patch.set_alpha(0)

            print(f"\n🎨 Creating plot...")

            # --- PLOT DATA POINTS ---
            scatter = ax.scatter(
                x=plot_data[moderator_col_name],
                y=plot_data[effect_col],
                s=plot_data['BubbleSize'],
                c=c_values,
                cmap=cmap,
                norm=norm,
                alpha=bubble_alpha,
                edgecolors='black',
                linewidths=0.5,
                zorder=3
            )

            # --- PLOT REGRESSION LINE ---
            try:
                # Create prediction line
                x_min = plot_data[moderator_col_name].min()
                x_max = plot_data[moderator_col_name].max()
                x_range_val = x_max - x_min
                x_padding = x_range_val * 0.05

                x_line = np.linspace(x_min - x_padding, x_max + x_padding, 200)
                x_line_df = pd.DataFrame({moderator_col_name: x_line})
                X_line_pred = sm.add_constant(x_line_df, has_constant='add')

                # Get predictions
                pred = meta_reg_model.get_prediction(X_line_pred)
                pred_summary = pred.summary_frame(alpha=0.05)

                # Plot regression line
                ax.plot(
                    x_line,
                    pred_summary['mean'],
                    color=line_color,
                    linewidth=line_width,
                    linestyle='-',
                    label='Regression Line',
                    zorder=2
                )

                # Plot confidence band
                if show_ci:
                    ci_color = line_color
                    ax.fill_between(
                        x_line,
                        pred_summary['mean_ci_lower'],
                        pred_summary['mean_ci_upper'],
                        color=ci_color,
                        alpha=ci_alpha,
                        label='95% CI',
                        zorder=1
                    )

                print(f"  ✓ Regression line plotted")

            except Exception as e:
                print(f"  ⚠️  Error plotting regression line: {e}")

            # --- CUSTOMIZE AXES ---
            if show_null_line:
                ax.axhline(
                    0,
                    color='gray',
                    linestyle='--',
                    linewidth=1.0,
                    alpha=0.7,
                    zorder=0
                )

            ax.set_xlabel(x_label, fontsize=12, fontweight='bold')
            ax.set_ylabel(y_label, fontsize=12, fontweight='bold')

            if show_title:
                ax.set_title(graph_title, fontsize=14, fontweight='bold', pad=15)

            ax.tick_params(axis='both', which='major', labelsize=10)

            if show_grid:
                ax.grid(True, linestyle=':', alpha=0.4, zorder=0)

            # --- ADD EQUATION AND R² ---
            if show_equation or show_r2:
                text_lines = []

                if show_equation:
                    sign = '+' if b1 >= 0 else ''
                    eq_text = f'y = {b0:.3f} {sign} {b1:.3f}x'
                    text_lines.append(eq_text)

                if show_r2:
                    sig_marker = '***' if p_slope < 0.001 else '**' if p_slope < 0.01 else '*' if p_slope < 0.05 else 'ns'
                    r2_text = f'R² = {R_sq:.1f}% ({sig_marker})'
                    text_lines.append(r2_text)

                if text_lines:
                    text_str = '\n'.join(text_lines)

                    # Position in upper left corner
                    ax.text(
                        0.05, 0.95,
                        text_str,
                        transform=ax.transAxes,
                        fontsize=11,
                        verticalalignment='top',
                        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8, edgecolor='gray'),
                        zorder=10
                    )

            # --- CREATE LEGEND ---
            handles = []
            labels_list = []

            # Add regression line and CI
            line_handles, line_labels = ax.get_legend_handles_labels()
            handles.extend(line_handles)
            labels_list.extend(line_labels)

            # Add color categories if used
            if color_mod != 'None' and len(unique_cats) > 0:
                for i, cat in enumerate(unique_cats):
                    color_val = plt.get_cmap(cmap)(norm(i))
                    patch = Patch(
                        facecolor=color_val,
                        edgecolor='black',
                        label=cat,
                        alpha=0.7
                    )
                    handles.append(patch)
                    labels_list.append(cat)

            # Add bubble size reference
            bubble_handle = ax.scatter(
                [], [],
                s=150,
                c='gray' if color_mod == 'None' else 'lightgray',
                alpha=0.6,
                edgecolors='black',
                linewidths=0.5,
                label='Study (size ∝ precision)'
            )
            handles.append(bubble_handle)
            labels_list.append('Study (size ∝ precision)')

            # Create legend
            if handles:
                legend = ax.legend(
                    handles=handles,
                    labels=labels_list,
                    loc=legend_loc,
                    fontsize=legend_fontsize,
                    framealpha=0.9,
                    edgecolor='gray'
                )

            # --- FINALIZE ---
            fig.tight_layout()

            # --- SAVE FILES ---
            print(f"\n💾 Saving files...")

            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            clean_mod = moderator_col_name.replace(' ', '_').replace('/', '-').replace('(', '').replace(')', '')
            base_filename = f"{filename_prefix}_{clean_mod}_{timestamp}"

            saved_files = []

            if save_pdf:
                pdf_filename = f"{base_filename}.pdf"
                fig.savefig(pdf_filename, bbox_inches='tight', transparent=transparent_bg)
                saved_files.append(pdf_filename)
                print(f"  ✓ {pdf_filename}")

            if save_png:
                png_filename = f"{base_filename}.png"
                fig.savefig(png_filename, dpi=png_dpi, bbox_inches='tight', transparent=transparent_bg)
                saved_files.append(png_filename)
                print(f"  ✓ {png_filename} (DPI: {png_dpi})")

            plt.show()

            print(f"\n" + "="*70)
            print("✅ META-REGRESSION PLOT COMPLETE")
            print("="*70)
            print(f"Files: {', '.join(saved_files)}")

            # --- SAVE PLOT DATA ---
            ANALYSIS_CONFIG['meta_regression_plot_data'] = {
                'timestamp': datetime.datetime.now(),
                'plot_data': plot_data,
                'moderator': moderator_col_name,
                'effect': effect_col,
                'color_moderator': color_mod if color_mod != 'None' else None
            }

        except Exception as e:
            print(f"\n❌ ERROR: {e}")
            import traceback
            traceback.print_exc()

# --- 4. CREATE BUTTON AND DISPLAY ---
plot_button = widgets.Button(
    description='📊 Generate Regression Plot',
    button_style='success',
    layout=widgets.Layout(width='450px', height='50px'),
    style={'font_weight': 'bold'}
)

plot_button.on_click(generate_regression_plot)

print("\n" + "="*70)
print("✅ META-REGRESSION PLOT INTERFACE READY")
print("="*70)
print("👆 Customize your plot using the tabs above, then click Generate")
print("\n📝 Tips:")
print("  • Bubble size represents study precision (1/variance)")
print("  • Use 'Color By' to highlight patterns by categorical variables")
print("  • R² shows proportion of heterogeneity explained by moderator")
print("="*70 + "\n")

display(widgets.VBox([
    header,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    tab,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    plot_button,
    plot_output
]))


META-REGRESSION PLOT CONFIGURATION
✓ Moderator: kgPot
✓ Effect size: Hedges' g
✓ Found 8 categorical moderators for color
✓ Configuration loaded successfully

✅ META-REGRESSION PLOT INTERFACE READY
👆 Customize your plot using the tabs above, then click Generate

📝 Tips:
  • Bubble size represents study precision (1/variance)
  • Use 'Color By' to highlight patterns by categorical variables
  • R² shows proportion of heterogeneity explained by moderator



VBox(children=(HTML(value="<h3 style='color: #2E86AB;'>Meta-Regression Plot Setup</h3><p style='color: #666;'>…

In [None]:
#@title 📊 FUNNEL PLOT & PUBLICATION BIAS ASSESSMENT

# =============================================================================
# CELL 12: FUNNEL PLOT FOR PUBLICATION BIAS
# Purpose: Assess publication bias through visual and statistical methods
# Dependencies: Cell 6 (overall_results), Cell 5 (data)
# Outputs: Funnel plot (PDF/PNG) and bias test results
# =============================================================================

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output
from scipy.stats import norm, linregress
import datetime

print("="*70)
print("FUNNEL PLOT CONFIGURATION")
print("="*70)

# --- 1. LOAD CONFIGURATION ---
try:
    if 'ANALYSIS_CONFIG' not in locals() and 'ANALYSIS_CONFIG' not in globals():
        raise NameError("ANALYSIS_CONFIG not found.")

    es_config = ANALYSIS_CONFIG['es_config']
    effect_col = ANALYSIS_CONFIG['effect_col']
    se_col = ANALYSIS_CONFIG['se_col']
    var_col = ANALYSIS_CONFIG['var_col']

    # Get data
    if 'analysis_data' in ANALYSIS_CONFIG:
        analysis_data = ANALYSIS_CONFIG['analysis_data']
    elif 'data_filtered' in globals():
        analysis_data = data_filtered
    else:
        raise ValueError("Cannot find analysis data")

    overall_results = ANALYSIS_CONFIG['overall_results']

    # Set defaults
    effect_label = es_config['effect_label']
    default_title = "Funnel Plot: Publication Bias Assessment"
    default_x_label = effect_label
    default_y_label = "Standard Error"

    print(f"✓ Effect size: {effect_label} ({effect_col})")
    print(f"✓ Standard error: {se_col}")
    print(f"✓ Data points: {len(analysis_data)}")
    print(f"✓ Configuration loaded successfully")

except (KeyError, NameError, ValueError) as e:
    print(f"❌ ERROR: {e}")
    print("   Please run Cell 6 (Overall Analysis) first")
    raise

# --- 2. CREATE WIDGETS ---

# Header
header = widgets.HTML(
    "<h3 style='color: #2E86AB;'>Funnel Plot Setup</h3>"
    "<p style='color: #666;'><i>Visual and statistical assessment of publication bias</i></p>"
)

# ========== TAB 1: PLOT STYLE ==========
style_header = widgets.HTML("<h3 style='color: #2E86AB;'>Plot Style</h3>")

show_title_widget = widgets.Checkbox(
    value=True,
    description='Show Plot Title',
    indent=False,
    layout=widgets.Layout(width='450px')
)

title_widget = widgets.Text(
    value=default_title,
    description='Plot Title:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '120px'}
)

xlabel_widget = widgets.Text(
    value=default_x_label,
    description='X-Axis Label:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '120px'}
)

ylabel_widget = widgets.Text(
    value=default_y_label,
    description='Y-Axis Label:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '120px'}
)

width_widget = widgets.FloatSlider(
    value=8.0, min=5.0, max=14.0, step=0.5,
    description='Plot Width (in):',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

height_widget = widgets.FloatSlider(
    value=6.0, min=4.0, max=12.0, step=0.5,
    description='Plot Height (in):',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

style_tab = widgets.VBox([
    style_header,
    show_title_widget,
    title_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    xlabel_widget,
    ylabel_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    width_widget,
    height_widget
])

# ========== TAB 2: PLOT ELEMENTS ==========
elements_header = widgets.HTML("<h3 style='color: #2E86AB;'>Plot Elements</h3>")

reference_model_widget = widgets.Dropdown(
    options=[
        ('Fixed-Effects', 'fixed'),
        ('Random-Effects', 'random')
    ],
    value='fixed',
    description='Reference Line:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

show_ci_funnel_widget = widgets.Checkbox(
    value=True,
    description='Show 95% CI Funnel',
    indent=False,
    layout=widgets.Layout(width='450px')
)

show_contours_widget = widgets.Checkbox(
    value=False,
    description='Show Significance Contours',
    indent=False,
    layout=widgets.Layout(width='450px')
)

point_color_widget = widgets.Dropdown(
    options=['blue', 'black', 'red', 'green', 'purple'],
    value='blue',
    description='Point Color:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

point_size_widget = widgets.IntSlider(
    value=50, min=20, max=200, step=10,
    description='Point Size:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

point_alpha_widget = widgets.FloatSlider(
    value=0.6, min=0.1, max=1.0, step=0.1,
    description='Transparency:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

elements_tab = widgets.VBox([
    elements_header,
    reference_model_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    show_ci_funnel_widget,
    show_contours_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    point_color_widget,
    point_size_widget,
    point_alpha_widget
])

# ========== TAB 3: BIAS TESTS ==========
tests_header = widgets.HTML("<h3 style='color: #2E86AB;'>Publication Bias Tests</h3>")

run_egger_widget = widgets.Checkbox(
    value=True,
    description="Run Egger's Regression Test",
    indent=False,
    layout=widgets.Layout(width='450px')
)

run_begg_widget = widgets.Checkbox(
    value=True,
    description="Run Begg's Rank Correlation Test",
    indent=False,
    layout=widgets.Layout(width='450px')
)

show_trim_fill_widget = widgets.Checkbox(
    value=False,
    description='Show Trim-and-Fill Estimate',
    indent=False,
    layout=widgets.Layout(width='450px')
)

tests_info = widgets.HTML(
    "<div style='background-color: #f0f0f0; padding: 10px; margin: 10px 0; border-radius: 5px;'>"
    "<b>About the Tests:</b><br>"
    "• <b>Egger's Test:</b> Tests for funnel plot asymmetry using regression.<br>"
    "• <b>Begg's Test:</b> Tests for correlation between effect sizes and variances.<br>"
    "• <b>Trim-and-Fill:</b> Estimates number of missing studies due to bias."
    "</div>"
)

tests_tab = widgets.VBox([
    tests_header,
    tests_info,
    run_egger_widget,
    run_begg_widget,
    show_trim_fill_widget
])

# ========== TAB 4: LAYOUT ==========
layout_header = widgets.HTML("<h3 style='color: #2E86AB;'>Layout & Legend</h3>")

show_grid_widget = widgets.Checkbox(
    value=True,
    description='Show Grid',
    indent=False,
    layout=widgets.Layout(width='450px')
)

legend_loc_widget = widgets.Dropdown(
    options=['best', 'upper right', 'upper left', 'lower left', 'lower right',
             'right', 'center left', 'lower center', 'upper center', 'center'],
    value='upper right',
    description='Legend Position:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

legend_fontsize_widget = widgets.IntSlider(
    value=10, min=6, max=14, step=1,
    description='Legend Font:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

layout_tab = widgets.VBox([
    layout_header,
    show_grid_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    legend_loc_widget,
    legend_fontsize_widget
])

# ========== TAB 5: EXPORT ==========
export_header = widgets.HTML("<h3 style='color: #2E86AB;'>Export Options</h3>")

save_pdf_widget = widgets.Checkbox(
    value=True,
    description='Save as PDF',
    indent=False,
    layout=widgets.Layout(width='450px')
)

save_png_widget = widgets.Checkbox(
    value=True,
    description='Save as PNG',
    indent=False,
    layout=widgets.Layout(width='450px')
)

png_dpi_widget = widgets.IntSlider(
    value=300, min=150, max=600, step=50,
    description='PNG DPI:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

filename_prefix_widget = widgets.Text(
    value='FunnelPlot',
    description='Filename Prefix:',
    layout=widgets.Layout(width='450px'),
    style={'description_width': '120px'}
)

transparent_bg_widget = widgets.Checkbox(
    value=False,
    description='Transparent Background',
    indent=False,
    layout=widgets.Layout(width='450px')
)

export_tab = widgets.VBox([
    export_header,
    save_pdf_widget,
    save_png_widget,
    png_dpi_widget,
    widgets.HTML("<hr style='margin: 10px 0;'>"),
    filename_prefix_widget,
    transparent_bg_widget
])

# Create tabs
tab_children = [style_tab, elements_tab, tests_tab, layout_tab, export_tab]
tab = widgets.Tab(children=tab_children)
tab.set_title(0, '🎨 Style')
tab.set_title(1, '📊 Elements')
tab.set_title(2, '🔬 Tests')
tab.set_title(3, '📐 Layout')
tab.set_title(4, '💾 Export')

# Continue to Part 2...
# --- 3. DEFINE PLOT GENERATION FUNCTION ---
plot_output = widgets.Output()

def generate_funnel_plot(b):
    """Generate funnel plot with publication bias assessment"""
    with plot_output:
        clear_output(wait=True)

        print("\n" + "="*70)
        print("FUNNEL PLOT & PUBLICATION BIAS ASSESSMENT")
        print("="*70)
        print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

        try:
            # --- GET WIDGET VALUES ---
            show_title = show_title_widget.value
            graph_title = title_widget.value
            x_label = xlabel_widget.value
            y_label = ylabel_widget.value
            plot_width = width_widget.value
            plot_height = height_widget.value

            reference_model = reference_model_widget.value
            show_ci_funnel = show_ci_funnel_widget.value
            show_contours = show_contours_widget.value
            point_color = point_color_widget.value
            point_size = point_size_widget.value
            point_alpha = point_alpha_widget.value

            run_egger = run_egger_widget.value
            run_begg = run_begg_widget.value
            show_trim_fill = show_trim_fill_widget.value

            show_grid = show_grid_widget.value
            legend_loc = legend_loc_widget.value
            legend_fontsize = legend_fontsize_widget.value

            save_pdf = save_pdf_widget.value
            save_png = save_png_widget.value
            png_dpi = png_dpi_widget.value
            filename_prefix = filename_prefix_widget.value
            transparent_bg = transparent_bg_widget.value

            print(f"📊 Configuration:")
            print(f"  Plot size: {plot_width}\" × {plot_height}\"")
            print(f"  Reference: {reference_model}-effects model")

            # --- LOAD DATA ---
            if 'analysis_data' in ANALYSIS_CONFIG:
                plot_data = ANALYSIS_CONFIG['analysis_data'].copy()
            elif 'data_filtered' in globals():
                plot_data = data_filtered.copy()
            else:
                print("❌ ERROR: Cannot find analysis data")
                return

            effect_col = ANALYSIS_CONFIG['effect_col']
            se_col = ANALYSIS_CONFIG['se_col']
            var_col = ANALYSIS_CONFIG['var_col']
            overall_results = ANALYSIS_CONFIG['overall_results']

            # Get reference effect size
            if reference_model == 'fixed':
                pooled_effect = overall_results['pooled_effect_fixed']
                model_label = 'Fixed-Effects'
            else:
                pooled_effect = overall_results['pooled_effect_random']
                model_label = 'Random-Effects'

            print(f"  Pooled effect ({model_label}): {pooled_effect:.4f}")

            # --- PREPARE DATA ---
            # Remove missing values
            plot_data = plot_data.dropna(subset=[effect_col, se_col])

            if len(plot_data) == 0:
                print("❌ ERROR: No valid data points for funnel plot")
                return

            print(f"  Data points: {len(plot_data)}")

            # Calculate precision for some tests
            plot_data['precision'] = 1 / plot_data[se_col]

            # --- RUN PUBLICATION BIAS TESTS ---
            print(f"\n" + "="*70)
            print("PUBLICATION BIAS TESTS")
            print("="*70)

            bias_results = {}

            # Egger's Regression Test
            if run_egger and len(plot_data) >= 3:
                try:
                    print(f"\n🔬 Egger's Regression Test:")

                    # Egger's test: regress standardized effect on precision
                    # standardized_effect = effect / SE
                    # Regress on 1/SE (precision)

                    standardized_effect = plot_data[effect_col] / plot_data[se_col]
                    precision = plot_data['precision']

                    # Linear regression
                    slope, intercept, r_value, p_value, std_err = linregress(precision, standardized_effect)

                    bias_results['egger'] = {
                        'intercept': intercept,
                        'slope': slope,
                        'p_value': p_value,
                        'r_squared': r_value**2
                    }

                    print(f"  • Intercept (bias): {intercept:.4f}")
                    print(f"  • Standard error: {std_err:.4f}")
                    print(f"  • P-value: {p_value:.4g}")

                    if p_value < 0.05:
                        print(f"  🔴 SIGNIFICANT ASYMMETRY DETECTED (p < 0.05)")
                        print(f"     Evidence of publication bias or small-study effects")
                    elif p_value < 0.10:
                        print(f"  🟡 MARGINAL ASYMMETRY (p < 0.10)")
                        print(f"     Possible publication bias")
                    else:
                        print(f"  ✓ NO SIGNIFICANT ASYMMETRY (p ≥ 0.10)")
                        print(f"     No strong evidence of publication bias")

                except Exception as e:
                    print(f"  ⚠️  Error running Egger's test: {e}")

            # Begg's Rank Correlation Test
            if run_begg and len(plot_data) >= 3:
                try:
                    print(f"\n🔬 Begg's Rank Correlation Test:")

                    # Begg's test: Kendall's tau between effect sizes and variances
                    from scipy.stats import kendalltau

                    tau, p_value = kendalltau(plot_data[effect_col], plot_data[var_col])

                    bias_results['begg'] = {
                        'tau': tau,
                        'p_value': p_value
                    }

                    print(f"  • Kendall's tau: {tau:.4f}")
                    print(f"  • P-value: {p_value:.4g}")

                    if p_value < 0.05:
                        print(f"  🔴 SIGNIFICANT CORRELATION (p < 0.05)")
                        print(f"     Evidence of publication bias")
                    elif p_value < 0.10:
                        print(f"  🟡 MARGINAL CORRELATION (p < 0.10)")
                        print(f"     Possible publication bias")
                    else:
                        print(f"  ✓ NO SIGNIFICANT CORRELATION (p ≥ 0.10)")
                        print(f"     No strong evidence of publication bias")

                except ImportError:
                    print(f"  ⚠️  scipy.stats.kendalltau not available")
                except Exception as e:
                    print(f"  ⚠️  Error running Begg's test: {e}")

            # Trim-and-Fill (simplified version)
            if show_trim_fill:
                print(f"\n🔬 Trim-and-Fill Analysis:")
                print(f"  ℹ️  Simplified implementation")

                # Count studies on each side of pooled effect
                left_side = (plot_data[effect_col] < pooled_effect).sum()
                right_side = (plot_data[effect_col] >= pooled_effect).sum()

                print(f"  • Studies left of pooled effect: {left_side}")
                print(f"  • Studies right of pooled effect: {right_side}")

                imbalance = abs(left_side - right_side)
                total = left_side + right_side

                if imbalance / total > 0.2:  # More than 20% imbalance
                    print(f"  🟡 Notable asymmetry detected")
                    print(f"     Estimated missing studies: ~{imbalance}")
                else:
                    print(f"  ✓ Relatively symmetric distribution")

            # Overall interpretation
            print(f"\n" + "="*70)
            print("OVERALL INTERPRETATION")
            print("="*70)

            sig_tests = 0
            total_tests = 0

            if run_egger and 'egger' in bias_results:
                total_tests += 1
                if bias_results['egger']['p_value'] < 0.10:
                    sig_tests += 1

            if run_begg and 'begg' in bias_results:
                total_tests += 1
                if bias_results['begg']['p_value'] < 0.10:
                    sig_tests += 1

            if total_tests > 0:
                print(f"\n📊 {sig_tests}/{total_tests} tests suggest possible publication bias")

                if sig_tests == 0:
                    print(f"\n✓ LOW RISK OF PUBLICATION BIAS")
                    print(f"  • No statistical tests show significant asymmetry")
                    print(f"  • Results appear to be unbiased")
                elif sig_tests < total_tests:
                    print(f"\n🟡 MODERATE RISK OF PUBLICATION BIAS")
                    print(f"  • Mixed evidence from statistical tests")
                    print(f"  • Visual inspection of funnel plot recommended")
                    print(f"  • Consider sensitivity analyses")
                else:
                    print(f"\n🔴 HIGH RISK OF PUBLICATION BIAS")
                    print(f"  • Multiple tests suggest asymmetry")
                    print(f"  • Possible small-study effects or selective reporting")
                    print(f"  • Interpret pooled effect with caution")

            # --- CREATE FIGURE ---
            print(f"\n" + "="*70)
            print("GENERATING PLOT")
            print("="*70 + "\n")

            fig, ax = plt.subplots(figsize=(plot_width, plot_height))

            if transparent_bg:
                fig.patch.set_alpha(0)
                ax.patch.set_alpha(0)

            # --- PLOT CONFIDENCE FUNNEL ---
            if show_ci_funnel:
                se_max = plot_data[se_col].max()
                se_range = np.linspace(0, se_max * 1.1, 200)

                # 95% CI limits
                upper_ci = pooled_effect + 1.96 * se_range
                lower_ci = pooled_effect - 1.96 * se_range

                ax.plot(upper_ci, se_range,
                       color='gray', linestyle='--', linewidth=1.5,
                       label='95% CI', alpha=0.7)
                ax.plot(lower_ci, se_range,
                       color='gray', linestyle='--', linewidth=1.5, alpha=0.7)

                # Fill the funnel
                ax.fill_betweenx(se_range, lower_ci, upper_ci,
                               color='lightgray', alpha=0.2)

            # --- PLOT SIGNIFICANCE CONTOURS ---
            if show_contours:
                se_max = plot_data[se_col].max()
                se_range = np.linspace(0, se_max * 1.1, 200)

                # P=0.05 contours (z = 1.96)
                sig_upper = 0 + 1.96 * se_range
                sig_lower = 0 - 1.96 * se_range

                ax.plot(sig_upper, se_range,
                       color='red', linestyle=':', linewidth=1,
                       label='P=0.05 (if null)', alpha=0.5)
                ax.plot(sig_lower, se_range,
                       color='red', linestyle=':', linewidth=1, alpha=0.5)

            # --- PLOT DATA POINTS ---
            ax.scatter(
                plot_data[effect_col],
                plot_data[se_col],
                s=point_size,
                c=point_color,
                alpha=point_alpha,
                edgecolors='black',
                linewidths=0.5,
                label='Studies',
                zorder=3
            )

            # --- PLOT REFERENCE LINE ---
            ax.axvline(
                x=pooled_effect,
                color='darkred',
                linestyle='-',
                linewidth=2,
                label=f'Pooled Effect ({model_label})',
                zorder=2
            )

            # Add null effect line if different from pooled
            if abs(pooled_effect) > 0.01:
                ax.axvline(
                    x=0,
                    color='black',
                    linestyle='--',
                    linewidth=1,
                    alpha=0.5,
                    label='Null Effect',
                    zorder=1
                )

            # --- CUSTOMIZE AXES ---
            ax.set_xlabel(x_label, fontsize=12, fontweight='bold')
            ax.set_ylabel(y_label, fontsize=12, fontweight='bold')

            if show_title:
                ax.set_title(graph_title, fontsize=14, fontweight='bold', pad=15)

            # Invert y-axis (standard practice for funnel plots)
            ax.invert_yaxis()

            ax.tick_params(axis='both', which='major', labelsize=10)

            if show_grid:
                ax.grid(True, linestyle=':', alpha=0.4, zorder=0)

            # --- CREATE LEGEND ---
            ax.legend(loc=legend_loc, fontsize=legend_fontsize, framealpha=0.9)

            # --- ADD TEXT ANNOTATION WITH TEST RESULTS ---
            if (run_egger or run_begg) and bias_results:
                text_lines = []

                if 'egger' in bias_results:
                    p_egger = bias_results['egger']['p_value']
                    sig_egger = "***" if p_egger < 0.001 else "**" if p_egger < 0.01 else "*" if p_egger < 0.05 else "ns"
                    text_lines.append(f"Egger's test: p={p_egger:.3g} {sig_egger}")

                if 'begg' in bias_results:
                    p_begg = bias_results['begg']['p_value']
                    sig_begg = "***" if p_begg < 0.001 else "**" if p_begg < 0.01 else "*" if p_begg < 0.05 else "ns"
                    text_lines.append(f"Begg's test: p={p_begg:.3g} {sig_begg}")

                if text_lines:
                    text_str = '\n'.join(text_lines)
                    ax.text(
                        0.05, 0.05,
                        text_str,
                        transform=ax.transAxes,
                        fontsize=10,
                        verticalalignment='bottom',
                        bbox=dict(boxstyle='round', facecolor='white', alpha=0.8, edgecolor='gray'),
                        zorder=10
                    )

            # --- FINALIZE ---
            fig.tight_layout()

            # --- SAVE FILES ---
            print(f"💾 Saving files...")

            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            base_filename = f"{filename_prefix}_{timestamp}"

            saved_files = []

            if save_pdf:
                pdf_filename = f"{base_filename}.pdf"
                fig.savefig(pdf_filename, bbox_inches='tight', transparent=transparent_bg)
                saved_files.append(pdf_filename)
                print(f"  ✓ {pdf_filename}")

            if save_png:
                png_filename = f"{base_filename}.png"
                fig.savefig(png_filename, dpi=png_dpi, bbox_inches='tight', transparent=transparent_bg)
                saved_files.append(png_filename)
                print(f"  ✓ {png_filename} (DPI: {png_dpi})")

            plt.show()

            print(f"\n" + "="*70)
            print("✅ FUNNEL PLOT COMPLETE")
            print("="*70)
            print(f"Files: {', '.join(saved_files)}")

            # --- SAVE RESULTS ---
            ANALYSIS_CONFIG['funnel_plot_results'] = {
                'timestamp': datetime.datetime.now(),
                'bias_tests': bias_results,
                'n_studies': len(plot_data),
                'pooled_effect': pooled_effect,
                'reference_model': reference_model
            }

            print(f"✓ Results saved to ANALYSIS_CONFIG['funnel_plot_results']")

        except Exception as e:
            print(f"\n❌ ERROR: {e}")
            import traceback
            traceback.print_exc()

# --- 4. CREATE BUTTON AND DISPLAY ---
plot_button = widgets.Button(
    description='📊 Generate Funnel Plot',
    button_style='success',
    layout=widgets.Layout(width='450px', height='50px'),
    style={'font_weight': 'bold'}
)

plot_button.on_click(generate_funnel_plot)

print("\n" + "="*70)
print("✅ FUNNEL PLOT INTERFACE READY")
print("="*70)
print("👆 Customize your plot using the tabs above, then click Generate")
print("\n📝 Tips:")
print("  • Symmetric funnel suggests no publication bias")
print("  • Missing studies in bottom corners indicate possible bias")
print("  • Run both Egger's and Begg's tests for robust assessment")
print("  • Consider heterogeneity as alternative explanation for asymmetry")
print("="*70 + "\n")

display(widgets.VBox([
    header,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    tab,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    plot_button,
    plot_output
]))

FUNNEL PLOT CONFIGURATION
✓ Effect size: Hedges' g (hedges_g)
✓ Standard error: SE_g
✓ Data points: 69
✓ Configuration loaded successfully

✅ FUNNEL PLOT INTERFACE READY
👆 Customize your plot using the tabs above, then click Generate

📝 Tips:
  • Symmetric funnel suggests no publication bias
  • Missing studies in bottom corners indicate possible bias
  • Run both Egger's and Begg's tests for robust assessment
  • Consider heterogeneity as alternative explanation for asymmetry



VBox(children=(HTML(value="<h3 style='color: #2E86AB;'>Funnel Plot Setup</h3><p style='color: #666;'><i>Visual…

In [11]:
#@title 🔄 LEAVE-ONE-OUT SENSITIVITY ANALYSIS

# =============================================================================
# CELL 13: LEAVE-ONE-OUT SENSITIVITY ANALYSIS
# Purpose: Assess influence of individual studies on pooled effect
# Dependencies: Cell 6 (overall_results), Cell 5 (data)
# Outputs: Influence plot and sensitivity metrics
# =============================================================================

import numpy as np
import pandas as pd
from scipy.stats import norm, chi2
import matplotlib.pyplot as plt
import datetime
import ipywidgets as widgets
from IPython.display import display, HTML, clear_output

print("="*70)
print("LEAVE-ONE-OUT SENSITIVITY ANALYSIS")
print("="*70)

# --- 1. HELPER FUNCTIONS ---

def calculate_tau_squared_dl(df, effect_col, var_col):
    """
    Calculate Tau-squared using selected method (DL or advanced if available)

    This function now serves as a wrapper that:
    1. Checks if advanced estimators (from Cell 4.5) are available
    2. Uses the method specified in ANALYSIS_CONFIG['tau_method']
    3. Falls back to DL if advanced methods fail or aren't available
    """
    k = len(df)
    if k < 2:
        return 0.0

    # --- TRY ADVANCED ESTIMATORS FIRST (if available) ---
    if 'calculate_tau_squared' in globals():
        tau_method = ANALYSIS_CONFIG.get('tau_method', 'DL')

        if tau_method != 'DL':
            # Try using advanced estimator
            try:
                tau_sq, info = calculate_tau_squared(df, effect_col, var_col, method=tau_method)

                if info.get('success', True):
                    return tau_sq
                # If failed, fall through to DL below
            except Exception as e:
                # Silently fall back to DL
                pass

    # --- CLASSIC DL METHOD (fallback or if DL selected) ---
    try:
        w_fixed = 1 / df[var_col]
        sum_w = w_fixed.sum()

        if sum_w <= 0:
            return 0.0

        # Calculate FE pooled estimate
        pooled_effect = (w_fixed * df[effect_col]).sum() / sum_w

        # Calculate Q statistic
        Qt = (w_fixed * (df[effect_col] - pooled_effect)**2).sum()
        df_Q = k - 1

        # Calculate C
        sum_w_sq = (w_fixed**2).sum()
        C = sum_w - (sum_w_sq / sum_w)

        # Calculate tau-squared
        if C > 0 and Qt > df_Q:
            tau_squared = (Qt - df_Q) / C
        else:
            tau_squared = 0.0

        return max(0.0, tau_squared)

    except Exception as e:
        print(f"  ⚠️  Error calculating tau²: {e}")
        return 0.0

def calculate_re_pooled(df, tau_squared, effect_col, var_col, alpha=0.05):
    """Calculate Random-Effects pooled estimate with CI"""
    k = len(df)
    if k < 1:
        return np.nan, np.nan, np.nan, np.nan, np.nan

    try:
        # Calculate RE weights
        w_re = 1 / (df[var_col] + tau_squared)
        sum_w_re = w_re.sum()

        if sum_w_re <= 0:
            return np.nan, np.nan, np.nan, np.nan, np.nan

        # Pooled effect
        pooled_effect = (w_re * df[effect_col]).sum() / sum_w_re
        pooled_var = 1 / sum_w_re
        pooled_se = np.sqrt(pooled_var)

        # CI
        z_crit = norm.ppf(1 - alpha / 2)
        ci_lower = pooled_effect - z_crit * pooled_se
        ci_upper = pooled_effect + z_crit * pooled_se

        # I-squared
        pooled_effect_fe = (w_re * df[effect_col]).sum() / sum_w_re
        Q = (w_re * (df[effect_col] - pooled_effect_fe)**2).sum()
        df_Q = k - 1
        I_sq = max(0, ((Q - df_Q) / Q) * 100) if Q > 0 else 0

        return pooled_effect, pooled_se, ci_lower, ci_upper, I_sq

    except Exception as e:
        print(f"  ⚠️  Error calculating RE pooled: {e}")
        return np.nan, np.nan, np.nan, np.nan, np.nan

# --- 2. LOAD CONFIGURATION ---
try:
    if 'ANALYSIS_CONFIG' not in locals() and 'ANALYSIS_CONFIG' not in globals():
        raise NameError("ANALYSIS_CONFIG not found.")

    # Get data
    if 'analysis_data' in ANALYSIS_CONFIG:
        analysis_data = ANALYSIS_CONFIG['analysis_data']
    elif 'data_filtered' in globals():
        analysis_data = data_filtered
    else:
        raise ValueError("Cannot find analysis data")

    if analysis_data.empty:
        raise ValueError("Analysis data is empty")

    # Get configuration
    effect_col = ANALYSIS_CONFIG['effect_col']
    var_col = ANALYSIS_CONFIG['var_col']
    es_config = ANALYSIS_CONFIG['es_config']
    overall_results = ANALYSIS_CONFIG['overall_results']

    # Original results
    original_effect = overall_results['pooled_effect_random']
    original_ci_lower = overall_results['ci_lower_random']
    original_ci_upper = overall_results['ci_upper_random']
    original_tau2 = overall_results['tau_squared']
    original_I2 = overall_results['I_squared']

    # Count studies
    n_studies = analysis_data['id'].nunique()
    n_obs = len(analysis_data)

    print(f"✓ Configuration loaded")
    print(f"  Effect size: {es_config['effect_label']}")
    print(f"  Studies: {n_studies}")
    print(f"  Observations: {n_obs}")
    print(f"  Original effect: {original_effect:.4f} [{original_ci_lower:.4f}, {original_ci_upper:.4f}]")
    print(f"  Original I²: {original_I2:.1f}%")

except (NameError, KeyError, ValueError) as e:
    print(f"❌ ERROR: {e}")
    print("   Please run Cell 6 (Overall Analysis) first")
    raise

# --- 3. CREATE WIDGETS ---

header = widgets.HTML(
    "<h3 style='color: #2E86AB;'>Leave-One-Out Analysis Setup</h3>"
    "<p style='color: #666;'><i>Assess the influence of individual studies on pooled results</i></p>"
)

# Analysis options
removal_unit_widget = widgets.RadioButtons(
    options=[
        ('By Study (remove all observations from one paper)', 'study'),
        ('By Observation (remove one observation at a time)', 'observation')
    ],
    value='study',
    description='',
    layout=widgets.Layout(width='500px')
)

show_table_widget = widgets.Checkbox(
    value=True,
    description='Show detailed results table',
    indent=False,
    layout=widgets.Layout(width='450px')
)

# Plot options
plot_width_widget = widgets.FloatSlider(
    value=10.0, min=6.0, max=14.0, step=0.5,
    description='Plot Width:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

plot_height_widget = widgets.FloatSlider(
    value=8.0, min=4.0, max=16.0, step=0.5,
    description='Plot Height:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

sort_by_widget = widgets.Dropdown(
    options=[
        ('Effect Size', 'effect'),
        ('Study ID', 'id'),
        ('Influence (distance from original)', 'influence')
    ],
    value='effect',
    description='Sort By:',
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

# Export options
save_pdf_widget = widgets.Checkbox(
    value=True,
    description='Save as PDF',
    indent=False,
    layout=widgets.Layout(width='450px')
)

save_png_widget = widgets.Checkbox(
    value=True,
    description='Save as PNG',
    indent=False,
    layout=widgets.Layout(width='450px')
)

png_dpi_widget = widgets.IntSlider(
    value=300, min=150, max=600, step=50,
    description='PNG DPI:',
    continuous_update=False,
    style={'description_width': '120px'},
    layout=widgets.Layout(width='450px')
)

# Run button
run_button = widgets.Button(
    description='▶ Run Leave-One-Out Analysis',
    button_style='success',
    layout=widgets.Layout(width='450px', height='50px'),
    style={'font_weight': 'bold'}
)

# Output area
analysis_output = widgets.Output()

# --- 4. DEFINE ANALYSIS FUNCTION ---

def run_loo_analysis(b):
    """Perform leave-one-out sensitivity analysis"""
    with analysis_output:
        clear_output(wait=True)

        print("\n" + "="*70)
        print("LEAVE-ONE-OUT SENSITIVITY ANALYSIS")
        print("="*70)
        print(f"Timestamp: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}\n")

        try:
            # Get widget values
            removal_unit = removal_unit_widget.value
            show_table = show_table_widget.value
            plot_width = plot_width_widget.value
            plot_height = plot_height_widget.value
            sort_by = sort_by_widget.value
            save_pdf = save_pdf_widget.value
            save_png = save_png_widget.value
            png_dpi = png_dpi_widget.value

            print(f"📋 Configuration:")
            print(f"  Removal unit: {removal_unit}")
            print(f"  Original effect: {original_effect:.4f}")
            print(f"  Original I²: {original_I2:.1f}%")

            # Get data
            if 'analysis_data' in ANALYSIS_CONFIG:
                data = ANALYSIS_CONFIG['analysis_data'].copy()
            elif 'data_filtered' in globals():
                data = data_filtered.copy()
            else:
                print("❌ ERROR: Cannot find data")
                return

            # Determine removal units
            if removal_unit == 'study':
                removal_ids = data['id'].unique()
                removal_label = 'Study ID'
                print(f"  Studies to analyze: {len(removal_ids)}")
            else:
                data['obs_id'] = range(len(data))
                removal_ids = data['obs_id'].values
                removal_label = 'Observation'
                print(f"  Observations to analyze: {len(removal_ids)}")

            if len(removal_ids) < 3:
                print(f"⚠️  WARNING: Too few units ({len(removal_ids)}) for meaningful analysis")
                print(f"   Need at least 3 units")
                return

            # Run LOO analysis
            print(f"\n⚙️  Running leave-one-out analysis...")
            loo_results = []

            for i, remove_id in enumerate(removal_ids):
                # Create subset without this unit
                if removal_unit == 'study':
                    df_loo = data[data['id'] != remove_id].copy()
                    unit_label = str(remove_id)
                else:
                    df_loo = data[data['obs_id'] != remove_id].copy()
                    unit_label = f"Obs {remove_id}"

                if len(df_loo) < 2:
                    continue

                # Recalculate tau-squared
                tau2_loo = calculate_tau_squared_dl(df_loo, effect_col, var_col)

                # Recalculate pooled effect
                effect_loo, se_loo, ci_lower_loo, ci_upper_loo, I2_loo = calculate_re_pooled(
                    df_loo, tau2_loo, effect_col, var_col
                )

                # Calculate influence metrics
                effect_diff = effect_loo - original_effect
                effect_diff_pct = (effect_diff / abs(original_effect)) * 100 if original_effect != 0 else 0

                # Check if CI crosses null
                crosses_null_original = (original_ci_lower < es_config['null_value'] < original_ci_upper)
                crosses_null_loo = (ci_lower_loo < es_config['null_value'] < ci_upper_loo)
                changes_significance = (crosses_null_original != crosses_null_loo)

                loo_results.append({
                    'unit_removed': unit_label,
                    'k': len(df_loo),
                    'pooled_effect': effect_loo,
                    'se': se_loo,
                    'ci_lower': ci_lower_loo,
                    'ci_upper': ci_upper_loo,
                    'tau_squared': tau2_loo,
                    'I_squared': I2_loo,
                    'effect_diff': effect_diff,
                    'effect_diff_pct': effect_diff_pct,
                    'abs_diff': abs(effect_diff),  # Add this here
                    'changes_sig': changes_significance
                })

                # Progress indicator
                if (i + 1) % 10 == 0 or (i + 1) == len(removal_ids):
                    print(f"  Progress: {i+1}/{len(removal_ids)}", end='\r')

            print(f"\n  ✓ Analysis complete")

            if len(loo_results) == 0:
                print("❌ ERROR: No valid results")
                return

            # Create results dataframe
            results_df = pd.DataFrame(loo_results)

            # Sort results
            if sort_by == 'effect':
                results_df = results_df.sort_values('pooled_effect')
            elif sort_by == 'influence':
                results_df = results_df.sort_values('abs_diff', ascending=False)
            else:  # id
                results_df = results_df.sort_values('unit_removed')

            results_df = results_df.reset_index(drop=True)

            # --- DISPLAY RESULTS ---
            print(f"\n" + "="*70)
            print("RESULTS SUMMARY")
            print("="*70)

            # Range of effects
            min_effect = results_df['pooled_effect'].min()
            max_effect = results_df['pooled_effect'].max()
            effect_range = max_effect - min_effect

            print(f"\n📊 Effect Size Range:")
            print(f"  • Original: {original_effect:.4f}")
            print(f"  • Minimum: {min_effect:.4f} (removing {results_df.loc[results_df['pooled_effect'].idxmin(), 'unit_removed']})")
            print(f"  • Maximum: {max_effect:.4f} (removing {results_df.loc[results_df['pooled_effect'].idxmax(), 'unit_removed']})")
            print(f"  • Range: {effect_range:.4f}")
            print(f"  • Mean: {results_df['pooled_effect'].mean():.4f}")
            print(f"  • SD: {results_df['pooled_effect'].std():.4f}")

            # Heterogeneity changes
            min_I2 = results_df['I_squared'].min()
            max_I2 = results_df['I_squared'].max()

            print(f"\n📊 Heterogeneity (I²) Range:")
            print(f"  • Original: {original_I2:.1f}%")
            print(f"  • Minimum: {min_I2:.1f}%")
            print(f"  • Maximum: {max_I2:.1f}%")
            print(f"  • Mean: {results_df['I_squared'].mean():.1f}%")

            # Most influential studies - FIXED
            print(f"\n🔍 Most Influential Units (by absolute effect change):")
            top_influential = results_df.nlargest(min(5, len(results_df)), 'abs_diff')

            for idx, row in top_influential.iterrows():
                direction = "increases" if row['effect_diff'] > 0 else "decreases"
                print(f"  • {row['unit_removed']}: Effect {direction} by {abs(row['effect_diff']):.4f} ({abs(row['effect_diff_pct']):.1f}%)")

            # Check for significance changes
            sig_changers = results_df[results_df['changes_sig'] == True]

            if len(sig_changers) > 0:
                print(f"\n🔴 WARNING: {len(sig_changers)} units change significance when removed:")
                for idx, row in sig_changers.iterrows():
                    print(f"  • {row['unit_removed']}: Effect = {row['pooled_effect']:.4f} [{row['ci_lower']:.4f}, {row['ci_upper']:.4f}]")
                print(f"\n  ⚠️  Results are sensitive to these studies")
                print(f"     Consider investigating these studies more closely")
            else:
                print(f"\n✓ No single unit changes the significance")
                print(f"  Results appear robust")

            # Overall stability assessment
            print(f"\n" + "="*70)
            print("STABILITY ASSESSMENT")
            print("="*70)

            # Calculate coefficient of variation
            mean_effect = results_df['pooled_effect'].mean()
            cv = (results_df['pooled_effect'].std() / abs(mean_effect)) * 100 if mean_effect != 0 else 0

            print(f"\n📊 Variability Metrics:")
            print(f"  • Coefficient of variation: {cv:.2f}%")
            print(f"  • Max absolute change: {results_df['abs_diff'].max():.4f}")
            print(f"  • Max percentage change: {results_df['effect_diff_pct'].abs().max():.1f}%")

            if cv < 5:
                stability = "HIGHLY STABLE"
                color = "🟢"
                interpretation = "Results are very robust to single study removal"
            elif cv < 10:
                stability = "STABLE"
                color = "🟢"
                interpretation = "Results are generally robust"
            elif cv < 20:
                stability = "MODERATELY STABLE"
                color = "🟡"
                interpretation = "Some sensitivity to individual studies"
            else:
                stability = "UNSTABLE"
                color = "🔴"
                interpretation = "High sensitivity to individual studies"

            print(f"\n  {color} {stability}")
            print(f"  {interpretation}")

            # Display table if requested
            if show_table:
                print(f"\n" + "="*70)
                print("DETAILED RESULTS TABLE")
                print("="*70 + "\n")

                display_df = results_df[[
                    'unit_removed', 'k', 'pooled_effect', 'ci_lower', 'ci_upper',
                    'I_squared', 'effect_diff'
                ]].copy()

                display_df.columns = [
                    removal_label, 'k', 'Effect', 'CI Lower', 'CI Upper',
                    'I² (%)', 'Δ Effect'
                ]

                print(display_df.to_string(
                    index=False,
                    float_format=lambda x: f'{x:.4f}' if abs(x) < 100 else f'{x:.1f}'
                ))

# --- CREATE PLOT ---
            print(f"\n" + "="*70)
            print("GENERATING PLOT")
            print("="*70 + "\n")

            # Adjust height based on number of units
            plot_height_adj = max(plot_height, len(results_df) * 0.2 + 2)

            fig, ax = plt.subplots(figsize=(plot_width, plot_height_adj))

            y_positions = np.arange(len(results_df))

            # Plot LOO effects with CIs - FIXED: Plot each point individually
            for idx, (_, row) in enumerate(results_df.iterrows()):
                color = 'red' if row['changes_sig'] else 'blue'

                # Plot error bar
                ax.errorbar(
                    x=row['pooled_effect'],
                    y=y_positions[idx],
                    xerr=[[row['pooled_effect'] - row['ci_lower']],
                          [row['ci_upper'] - row['pooled_effect']]],
                    fmt='o',
                    capsize=3,
                    color=color,
                    ecolor=color,
                    mfc=color,
                    mec='black',
                    markersize=5,
                    linewidth=1.5,
                    zorder=3
                )

            # Add legend entries manually
            from matplotlib.lines import Line2D
            legend_elements = [
                Line2D([0], [0], marker='o', color='w', markerfacecolor='blue',
                       markeredgecolor='black', markersize=8, label='No significance change'),
                Line2D([0], [0], marker='o', color='w', markerfacecolor='red',
                       markeredgecolor='black', markersize=8, label='Changes significance'),
                Line2D([0], [0], color='darkred', linestyle='--', linewidth=2,
                       label=f'Original Effect ({original_effect:.3f})'),
                plt.Rectangle((0, 0), 1, 1, fc='red', alpha=0.1,
                             label='Original 95% CI'),
                Line2D([0], [0], color='gray', linestyle='-', linewidth=1, alpha=0.5,
                       label='Null Effect')
            ]

            # Original effect line
            ax.axvline(
                x=original_effect,
                color='darkred',
                linestyle='--',
                linewidth=2,
                zorder=1
            )

            # Original CI band
            ax.axvspan(
                original_ci_lower,
                original_ci_upper,
                color='red',
                alpha=0.1,
                zorder=0
            )

            # Null effect line
            ax.axvline(
                x=es_config['null_value'],
                color='gray',
                linestyle='-',
                linewidth=1,
                alpha=0.5,
                zorder=0
            )

            # Customize axes
            ax.set_yticks(y_positions)
            ax.set_yticklabels(results_df['unit_removed'], fontsize=8)
            ax.set_xlabel(f"Pooled Effect ({es_config['effect_label']})", fontsize=12, fontweight='bold')
            ax.set_ylabel(f"{removal_label} Removed", fontsize=12, fontweight='bold')
            ax.set_title("Leave-One-Out Sensitivity Analysis", fontsize=14, fontweight='bold', pad=15)

            # Add legend
            ax.legend(handles=legend_elements, loc='best', fontsize=10, framealpha=0.9)
            ax.grid(axis='x', linestyle=':', alpha=0.4)

            fig.tight_layout()

            # Save files
            print(f"💾 Saving files...")

            timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
            base_filename = f"LeaveOneOut_{timestamp}"

            saved_files = []

            if save_pdf:
                pdf_filename = f"{base_filename}.pdf"
                fig.savefig(pdf_filename, bbox_inches='tight')
                saved_files.append(pdf_filename)
                print(f"  ✓ {pdf_filename}")

            if save_png:
                png_filename = f"{base_filename}.png"
                fig.savefig(png_filename, dpi=png_dpi, bbox_inches='tight')
                saved_files.append(png_filename)
                print(f"  ✓ {png_filename} (DPI: {png_dpi})")

            plt.show()

            print(f"\n" + "="*70)
            print("✅ LEAVE-ONE-OUT ANALYSIS COMPLETE")
            print("="*70)
            print(f"Files: {', '.join(saved_files)}")

            # Save results
            ANALYSIS_CONFIG['loo_results'] = {
                'timestamp': datetime.datetime.now(),
                'results_df': results_df,
                'removal_unit': removal_unit,
                'original_effect': original_effect,
                'effect_range': effect_range,
                'stability': stability,
                'cv': cv,
                'n_sig_changers': len(sig_changers)
            }

            print(f"✓ Results saved to ANALYSIS_CONFIG['loo_results']")

        except Exception as e:
            print(f"\n❌ ERROR: {e}")
            import traceback
            traceback.print_exc()

# --- 5. LINK BUTTON AND DISPLAY ---
run_button.on_click(run_loo_analysis)

print("\n" + "="*70)
print("✅ LEAVE-ONE-OUT ANALYSIS INTERFACE READY")
print("="*70)
print("👆 Configure options and click Run to assess study influence")
print("\n📝 Tips:")
print("  • Use 'By Study' to remove all observations from one paper")
print("  • Use 'By Observation' for more granular sensitivity analysis")
print("  • Red points indicate units that change significance when removed")
print("  • Look for units far from the original effect line")
print("="*70 + "\n")

display(widgets.VBox([
    header,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    widgets.HTML("<h4 style='margin-bottom: 10px;'>Removal Unit:</h4>"),
    removal_unit_widget,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    widgets.HTML("<h4 style='margin-bottom: 10px;'>Display Options:</h4>"),
    show_table_widget,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    widgets.HTML("<h4 style='margin-bottom: 10px;'>Plot Options:</h4>"),
    plot_width_widget,
    plot_height_widget,
    sort_by_widget,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    widgets.HTML("<h4 style='margin-bottom: 10px;'>Export Options:</h4>"),
    save_pdf_widget,
    save_png_widget,
    png_dpi_widget,
    widgets.HTML("<hr style='margin: 15px 0;'>"),
    run_button,
    analysis_output
]))

LEAVE-ONE-OUT SENSITIVITY ANALYSIS
✓ Configuration loaded
  Effect size: Hedges' g
  Studies: 23
  Observations: 69
  Original effect: 2.5788 [2.0292, 3.1284]
  Original I²: 85.7%

✅ LEAVE-ONE-OUT ANALYSIS INTERFACE READY
👆 Configure options and click Run to assess study influence

📝 Tips:
  • Use 'By Study' to remove all observations from one paper
  • Use 'By Observation' for more granular sensitivity analysis
  • Red points indicate units that change significance when removed
  • Look for units far from the original effect line



VBox(children=(HTML(value="<h3 style='color: #2E86AB;'>Leave-One-Out Analysis Setup</h3><p style='color: #666;…