# TabPFN + GGH Fusion Benchmark (Using GGH_2 Package)Simplified benchmark notebook using the refactored GGH_2 package.

In [None]:
# CUDA determinism fix (must be before torch import)
import os
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

import sys
import numpy as np
import torch
import warnings

# Add parent directory to path for packages
sys.path.insert(0, '..')

# Import from GGH_2 package
from GGH_2 import (
    HypothesisAmplifyingModel,
    set_to_deterministic,
    train_with_soft_weights,
    evaluate_on_test,
    run_ggh_soft_refinement,
)
from GGH_2.ggh import DEFAULT_CONFIG

# Import from GGH package (correct module paths)
from GGH.data_ops import DataOperator
from GGH.imputation_methods import Imputer
from GGH.models import initialize_model
from GGH.train_val_loop import TrainValidationManager
from GGH.selection_algorithms import AlgoModulators
from GGH.inspector import Inspector

# Device setup
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Using device: {DEVICE}")

In [None]:
# =============================================================================
# CONFIGURATION
# =============================================================================
data_path = '../data/dataset_photo_pce10/data.csv'
results_path = "../saved_results/TabPFN_GGH_Fusion"

# Variables
inpt_vars = ['P3HT', 'PTB7-Th']
target_vars = ['Degradation']
miss_vars = ['PCBM']

# Hypothesis values (6 PCBM concentration values)
hypothesis = [[0.03, 0.11, 0.20, 0.32, 0.43, 0.6]]
hyp_per_sample = len(hypothesis[0])

# Benchmark parameters
BENCHMARK_N_RUNS = 3
PARTIAL_PERCENTAGES = [0.03, 0.10, 0.25]

# GGH config (can override defaults)
GGH_CONFIG = {
    'iter1_epochs': 60,
    'iter1_analysis_epochs': 5,
    'iter1_lr': 0.01,
    'iter2_epochs': 30,
    'iter2_lr': 0.01,
    'scoring_passes': 5,
    'min_weight': 0.1,
    'temperature_iter1': 1.0,
    'temperature_iter3': 0.8,
    'loss_influence': 0.25,
    'partial_base_weight': 2.0,
    'shared_hidden': 16,
    'hypothesis_hidden': 32,
    'final_hidden': 32,
}

# Training parameters
GGH_FINAL_EPOCHS = 200
GGH_BENCHMARK_LR = 0.01

print(f"Dataset: Photocell Degradation")
print(f"Hypothesis values: {hypothesis[0]}")
print(f"Partial percentages: {PARTIAL_PERCENTAGES}")

In [None]:
# =============================================================================
# MAIN BENCHMARK
# =============================================================================
import time

results = {pct: {'full_info': [], 'partial': [], 'ggh': [], 'tabpfn': []} for pct in PARTIAL_PERCENTAGES}

for partial_pct in PARTIAL_PERCENTAGES:
    print(f"\n{'='*60}")
    print(f"Testing with {partial_pct*100:.0f}% partial data")
    print(f"{'='*60}")
    
    for run in range(BENCHMARK_N_RUNS):
        r_state = run
        set_to_deterministic(r_state)
        
        # Create DataOperator
        DO = DataOperator(data_path, inpt_vars, target_vars, miss_vars, hypothesis, 
                          partial_pct, r_state, device=DEVICE)
        
        if DO.lack_partial_coverage:
            print(f"  Run {run+1}: Skipping (lack coverage)")
            continue
        
        n_shared = len(DO.inpt_vars)
        n_hyp = len(DO.miss_vars)
        out_size = len(DO.target_vars)
        
        partial_gids = set(DO.df_train_hypothesis[
            (DO.df_train_hypothesis['partial_full_info'] == 1) & 
            (DO.df_train_hypothesis['correct_hypothesis'] == True)
        ].index.tolist())
        
        print(f"\n  Run {run+1}/{BENCHMARK_N_RUNS}")
        
        # === Full Info ===
        n_samples_full = len(DO.df_train_hypothesis) // hyp_per_sample
        full_info_weights = {}
        for sample_idx in range(n_samples_full):
            for hyp_idx in range(hyp_per_sample):
                gid = sample_idx * hyp_per_sample + hyp_idx
                if DO.df_train_hypothesis.iloc[gid]['correct_hypothesis']:
                    full_info_weights[gid] = 1.0
        
        set_to_deterministic(r_state + 600)
        model_full = HypothesisAmplifyingModel(n_shared, n_hyp, GGH_CONFIG['shared_hidden'],
                                               GGH_CONFIG['hypothesis_hidden'], 
                                               GGH_CONFIG['final_hidden'], out_size).to(DEVICE)
        model_full, _, _ = train_with_soft_weights(DO, model_full, full_info_weights, set(),
                                                    1.0, GGH_BENCHMARK_LR, GGH_FINAL_EPOCHS)
        _, _, full_r2 = evaluate_on_test(DO, model_full)
        results[partial_pct]['full_info'].append(full_r2)
        print(f"    Full Info R2: {full_r2:.4f}")
        
        # === Partial Only ===
        set_to_deterministic(r_state + 400)
        model_partial = HypothesisAmplifyingModel(n_shared, n_hyp, GGH_CONFIG['shared_hidden'],
                                                  GGH_CONFIG['hypothesis_hidden'],
                                                  GGH_CONFIG['final_hidden'], out_size).to(DEVICE)
        model_partial, _, _ = train_with_soft_weights(DO, model_partial, {}, partial_gids,
                                                       1.0, GGH_BENCHMARK_LR, GGH_FINAL_EPOCHS)
        _, _, partial_r2 = evaluate_on_test(DO, model_partial)
        results[partial_pct]['partial'].append(partial_r2)
        print(f"    Partial R2: {partial_r2:.4f}")
        
        # === GGH ===
        ggh_weights, ggh_precision, _, ggh_partial_weight = run_ggh_soft_refinement(DO, r_state, GGH_CONFIG)
        set_to_deterministic(r_state + 200)
        model_ggh = HypothesisAmplifyingModel(n_shared, n_hyp, GGH_CONFIG['shared_hidden'],
                                              GGH_CONFIG['hypothesis_hidden'],
                                              GGH_CONFIG['final_hidden'], out_size).to(DEVICE)
        model_ggh, _, _ = train_with_soft_weights(DO, model_ggh, ggh_weights, partial_gids,
                                                   ggh_partial_weight, GGH_BENCHMARK_LR, GGH_FINAL_EPOCHS)
        _, _, ggh_r2 = evaluate_on_test(DO, model_ggh)
        results[partial_pct]['ggh'].append(ggh_r2)
        print(f"    GGH R2: {ggh_r2:.4f} (precision: {ggh_precision:.1f}%)")
        
        # === TabPFN Standalone ===
        tabpfn_probs, _ = get_tabpfn_probabilities(DO, r_state, verbose=False)
        tabpfn_weights = {}
        partial_sample_indices = set(gid // hyp_per_sample for gid in partial_gids)
        n_samples = len(DO.df_train_hypothesis) // hyp_per_sample
        for sample_idx in range(n_samples):
            if sample_idx in partial_sample_indices:
                continue
            if tabpfn_probs and sample_idx in tabpfn_probs:
                pred_class = np.argmax(tabpfn_probs[sample_idx])
                gid = sample_idx * hyp_per_sample + pred_class
                tabpfn_weights[gid] = 1.0
        
        set_to_deterministic(r_state + 500)
        model_tabpfn = HypothesisAmplifyingModel(n_shared, n_hyp, GGH_CONFIG['shared_hidden'],
                                                  GGH_CONFIG['hypothesis_hidden'],
                                                  GGH_CONFIG['final_hidden'], out_size).to(DEVICE)
        model_tabpfn, _, _ = train_with_soft_weights(DO, model_tabpfn, tabpfn_weights, partial_gids,
                                                      GGH_CONFIG['partial_base_weight'], 
                                                      GGH_BENCHMARK_LR, GGH_FINAL_EPOCHS)
        _, _, tabpfn_r2 = evaluate_on_test(DO, model_tabpfn)
        results[partial_pct]['tabpfn'].append(tabpfn_r2)
        print(f"    TabPFN R2: {tabpfn_r2:.4f}")

print("\n" + "="*60)
print("BENCHMARK COMPLETE")
print("="*60)

In [None]:
# =============================================================================
# RESULTS SUMMARY
# =============================================================================
print("\n" + "="*80)
print("BENCHMARK SUMMARY")
print("="*80)
print(f"{'Partial %':<12} | {'Full Info':<12} | {'Partial':<12} | {'GGH':<12} | {'TabPFN':<12}")
print("-"*68)

for pct in PARTIAL_PERCENTAGES:
    full_avg = np.mean(results[pct]['full_info']) if results[pct]['full_info'] else 0
    partial_avg = np.mean(results[pct]['partial']) if results[pct]['partial'] else 0
    ggh_avg = np.mean(results[pct]['ggh']) if results[pct]['ggh'] else 0
    tabpfn_avg = np.mean(results[pct]['tabpfn']) if results[pct]['tabpfn'] else 0
    
    print(f"{pct*100:>10.0f}% | {full_avg:>10.4f} | {partial_avg:>10.4f} | {ggh_avg:>10.4f} | {tabpfn_avg:>10.4f}")

print("="*80)

In [None]:
# =============================================================================
# SPEED BENCHMARK: CPU vs GPU
# =============================================================================
import time

SPEED_N_RUNS = 2
SPEED_PARTIAL_PERC = 0.10

print("=" * 80)
print("SPEED BENCHMARK: CPU vs GPU")
print("=" * 80)

def run_speed_test(device_name, device):
    """Run all methods on specified device and return timing dict."""
    times = {'Full Info': [], 'Partial': [], 'GGH': [], 'TabPFN': []}
    
    for run in range(SPEED_N_RUNS):
        r_state = run
        set_to_deterministic(r_state)
        
        DO = DataOperator(data_path, inpt_vars, target_vars, miss_vars, hypothesis, 
                          SPEED_PARTIAL_PERC, r_state, device=device)
        
        if DO.lack_partial_coverage:
            continue
        
        n_shared = len(DO.inpt_vars)
        n_hyp = len(DO.miss_vars)
        out_size = len(DO.target_vars)
        
        partial_gids = set(DO.df_train_hypothesis[
            (DO.df_train_hypothesis['partial_full_info'] == 1) & 
            (DO.df_train_hypothesis['correct_hypothesis'] == True)
        ].index.tolist())
        
        print(f"\n  Run {run+1}/{SPEED_N_RUNS} on {device_name}")
        
        # Full Info
        start = time.time()
        n_samples_full = len(DO.df_train_hypothesis) // hyp_per_sample
        full_info_weights = {sample_idx * hyp_per_sample + hyp_idx: 1.0
                            for sample_idx in range(n_samples_full)
                            for hyp_idx in range(hyp_per_sample)
                            if DO.df_train_hypothesis.iloc[sample_idx * hyp_per_sample + hyp_idx]['correct_hypothesis']}
        
        set_to_deterministic(r_state + 600)
        model = HypothesisAmplifyingModel(n_shared, n_hyp, GGH_CONFIG['shared_hidden'],
                                          GGH_CONFIG['hypothesis_hidden'],
                                          GGH_CONFIG['final_hidden'], out_size).to(device)
        train_with_soft_weights(DO, model, full_info_weights, set(), 1.0, GGH_BENCHMARK_LR, GGH_FINAL_EPOCHS)
        times['Full Info'].append(time.time() - start)
        print(f"    Full Info: {times['Full Info'][-1]:.2f}s")
        
        # Partial
        start = time.time()
        set_to_deterministic(r_state + 400)
        model = HypothesisAmplifyingModel(n_shared, n_hyp, GGH_CONFIG['shared_hidden'],
                                          GGH_CONFIG['hypothesis_hidden'],
                                          GGH_CONFIG['final_hidden'], out_size).to(device)
        train_with_soft_weights(DO, model, {}, partial_gids, 1.0, GGH_BENCHMARK_LR, GGH_FINAL_EPOCHS)
        times['Partial'].append(time.time() - start)
        print(f"    Partial: {times['Partial'][-1]:.2f}s")
        
        # GGH
        start = time.time()
        ggh_weights, _, _, ggh_partial_weight = run_ggh_soft_refinement(DO, r_state, GGH_CONFIG)
        set_to_deterministic(r_state + 200)
        model = HypothesisAmplifyingModel(n_shared, n_hyp, GGH_CONFIG['shared_hidden'],
                                          GGH_CONFIG['hypothesis_hidden'],
                                          GGH_CONFIG['final_hidden'], out_size).to(device)
        train_with_soft_weights(DO, model, ggh_weights, partial_gids, ggh_partial_weight, GGH_BENCHMARK_LR, GGH_FINAL_EPOCHS)
        times['GGH'].append(time.time() - start)
        print(f"    GGH: {times['GGH'][-1]:.2f}s")
        
        # TabPFN
        start = time.time()
        tabpfn_probs, _ = get_tabpfn_probabilities(DO, r_state, verbose=False)
        tabpfn_weights = {}
        partial_sample_indices = set(gid // hyp_per_sample for gid in partial_gids)
        n_samples = len(DO.df_train_hypothesis) // hyp_per_sample
        for sample_idx in range(n_samples):
            if sample_idx not in partial_sample_indices and tabpfn_probs and sample_idx in tabpfn_probs:
                gid = sample_idx * hyp_per_sample + np.argmax(tabpfn_probs[sample_idx])
                tabpfn_weights[gid] = 1.0
        
        set_to_deterministic(r_state + 500)
        model = HypothesisAmplifyingModel(n_shared, n_hyp, GGH_CONFIG['shared_hidden'],
                                          GGH_CONFIG['hypothesis_hidden'],
                                          GGH_CONFIG['final_hidden'], out_size).to(device)
        train_with_soft_weights(DO, model, tabpfn_weights, partial_gids, GGH_CONFIG['partial_base_weight'], GGH_BENCHMARK_LR, GGH_FINAL_EPOCHS)
        times['TabPFN'].append(time.time() - start)
        print(f"    TabPFN: {times['TabPFN'][-1]:.2f}s")
    
    return times

# Run benchmarks
print("\nTesting on CPU...")
cpu_times = run_speed_test("CPU", torch.device('cpu'))

if torch.cuda.is_available():
    print("\nTesting on GPU...")
    gpu_times = run_speed_test("GPU", torch.device('cuda'))
else:
    gpu_times = None
    print("\nGPU not available")

# Summary
print("\n" + "="*60)
print("SPEED SUMMARY")
print("="*60)
for method in ['Full Info', 'Partial', 'GGH', 'TabPFN']:
    cpu_avg = np.mean(cpu_times[method]) if cpu_times[method] else 0
    if gpu_times and gpu_times[method]:
        gpu_avg = np.mean(gpu_times[method])
        speedup = cpu_avg / gpu_avg if gpu_avg > 0 else 0
        print(f"{method:<12}: CPU={cpu_avg:.1f}s, GPU={gpu_avg:.1f}s, Speedup={speedup:.1f}x")
    else:
        print(f"{method:<12}: CPU={cpu_avg:.1f}s")