# Wine_Claude: Targeted Parameter Search for GGH

This notebook systematically tests different parameter configurations to find
settings where gradient-guided hypothesis selection significantly outperforms baselines.

**Goal**: Find configuration where `use hypothesis` R2 score is at least 4.5 percentage points
higher than the best baseline (`partial info` or `use known only`).

In [1]:
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import sys
sys.path.insert(0, '../')
sys.path.insert(0, '../GGH')

from GGH.data_ops import DataOperator
from GGH.selection_algorithms import AlgoModulators
from GGH.models import initialize_model, load_model
from GGH.train_val_loop import TrainValidationManager
from GGH.inspector import Inspector, visualize_train_val_error, selection_histograms, clean_final_analysis
from sklearn.metrics import r2_score
import warnings
warnings.filterwarnings('ignore')

def set_to_deterministic(rand_state):
    import random
    random.seed(rand_state)
    np.random.seed(rand_state)
    torch.manual_seed(rand_state)
    torch.set_num_threads(1)
    torch.use_deterministic_algorithms(True)
    
print("Imports successful!")

Imports successful!


In [2]:
# Data configuration
data_path = '../data/wine/red_wine.csv'
results_path = "../saved_results/Red Wine Claude"
inpt_vars = ['volatile acidity', 'total sulfur dioxide', 'citric acid'] 
target_vars = ['quality']
miss_vars = ['alcohol']
hypothesis = [[9.35, 10, 11.5, 15]]

# Model parameters
hidden_size = 32
batch_size = 100 * len(hypothesis[0])  # 400
output_size = len(target_vars)

# Create directories
import os
os.makedirs(results_path, exist_ok=True)
for folder in ['use hypothesis', 'partial info', 'use known only', 'full info']:
    os.makedirs(f'{results_path}/{folder}', exist_ok=True)

# Initialize inspector
INSPECT = Inspector(results_path, hidden_size)
print(f"Results will be saved to: {results_path}")

Results will be saved to: ../saved_results/Red Wine Claude


In [3]:
def test_config(use_info, partial_perc, num_epochs, lr, nu, normalize, freqperc, 
                dropout=0.05, n_runs=5, verbose=False, use_enhanced_context=False):
    """
    Test a configuration and return R2 scores across multiple runs.
    
    Args:
        use_enhanced_context: If True, adds hypothesis class ID (one-hot) to enriched vectors
                             to better distinguish between hypotheses.
    """
    results = []
    valid_runs = 0
    
    for r_state in range(500):  # Try up to 500 random states to get n_runs valid ones
        set_to_deterministic(r_state)
        
        DO = DataOperator(data_path, inpt_vars, target_vars, miss_vars, hypothesis,
                          partial_perc, r_state, device='cpu')
        DO.problem_type = 'regression'
        
        if not DO.lack_partial_coverage:
            AM = AlgoModulators(DO, lr=lr, nu=nu, normalize_grads_contx=normalize,
                               use_context=True, freqperc_cutoff=freqperc,
                               use_enhanced_context=use_enhanced_context)
            dataloader = DO.prep_dataloader(use_info, batch_size)
            model = initialize_model(DO, dataloader, hidden_size, r_state, dropout=dropout)
            
            TVM = TrainValidationManager(use_info, num_epochs, dataloader, batch_size,
                                         r_state, results_path, final_analysis=False)
            TVM.train_model(DO, AM, model, final_analysis=False)
            
            # Load best model and evaluate
            model.load_state_dict(torch.load(TVM.weights_save_path))
            model.eval()
            
            if use_info in ['use hypothesis', 'partial info', 'full info']:
                test_pred = model(DO.full_test_input_tensor)
            else:
                test_pred = model(DO.known_test_input_tensor)
            
            test_true = DO.df_test[target_vars].values
            r2 = r2_score(test_true, test_pred.detach().numpy())
            results.append(r2)
            valid_runs += 1
            
            if verbose:
                print(f"  Run {valid_runs} (seed={r_state}): R2={r2:.4f}")
            
            if valid_runs >= n_runs:
                break
    
    return np.mean(results), np.std(results), results

print("Test function defined.")

Test function defined.


## Step 1: Establish Baselines

Test both `partial_perc=0.015` and `partial_perc=0.025` to find which gives better opportunity for improvement.

In [4]:
# Baselines with partial_perc = 0.015 (1.5% complete data)
partial_perc = 0.015
n_baseline_runs = 5

print(f"\n{'='*60}")
print(f"BASELINES with partial_perc = {partial_perc}")
print(f"{'='*60}\n")

print("Testing partial info...")
p_mean_015, p_std_015, p_results_015 = test_config(
    'partial info', partial_perc, 200, 0.001, 0.1, False, 0.25, n_runs=n_baseline_runs
)
print(f"  partial info: {p_mean_015:.4f} +/- {p_std_015:.4f}")
print(f"  Individual: {[f'{r:.4f}' for r in p_results_015]}")

print("\nTesting use known only...")
k_mean_015, k_std_015, k_results_015 = test_config(
    'use known only', partial_perc, 200, 0.001, 0.1, False, 0.25, n_runs=n_baseline_runs
)
print(f"  use known only: {k_mean_015:.4f} +/- {k_std_015:.4f}")
print(f"  Individual: {[f'{r:.4f}' for r in k_results_015]}")

best_baseline_015 = max(p_mean_015, k_mean_015)
target_015 = best_baseline_015 + 0.045
print(f"\nBest baseline: {best_baseline_015:.4f}")
print(f"Target (baseline + 4.5pp): {target_015:.4f}")


BASELINES with partial_perc = 0.015

Testing partial info...
  partial info: 0.1790 +/- 0.0835
  Individual: ['0.0220', '0.2613', '0.1854', '0.1905', '0.2360']

Testing use known only...
  use known only: 0.1758 +/- 0.0479
  Individual: ['0.2019', '0.2316', '0.1561', '0.0929', '0.1963']

Best baseline: 0.1790
Target (baseline + 4.5pp): 0.2240


In [5]:
# Baselines with partial_perc = 0.025 (2.5% complete data)
partial_perc = 0.025

print(f"\n{'='*60}")
print(f"BASELINES with partial_perc = {partial_perc}")
print(f"{'='*60}\n")

print("Testing partial info...")
p_mean_025, p_std_025, p_results_025 = test_config(
    'partial info', partial_perc, 200, 0.001, 0.1, False, 0.25, n_runs=n_baseline_runs
)
print(f"  partial info: {p_mean_025:.4f} +/- {p_std_025:.4f}")
print(f"  Individual: {[f'{r:.4f}' for r in p_results_025]}")

print("\nTesting use known only...")
k_mean_025, k_std_025, k_results_025 = test_config(
    'use known only', partial_perc, 200, 0.001, 0.1, False, 0.25, n_runs=n_baseline_runs
)
print(f"  use known only: {k_mean_025:.4f} +/- {k_std_025:.4f}")
print(f"  Individual: {[f'{r:.4f}' for r in k_results_025]}")

best_baseline_025 = max(p_mean_025, k_mean_025)
target_025 = best_baseline_025 + 0.045
print(f"\nBest baseline: {best_baseline_025:.4f}")
print(f"Target (baseline + 4.5pp): {target_025:.4f}")


BASELINES with partial_perc = 0.025

Testing partial info...
  partial info: 0.1902 +/- 0.0485
  Individual: ['0.1530', '0.2789', '0.2042', '0.1662', '0.1487']

Testing use known only...
  use known only: 0.1809 +/- 0.0511
  Individual: ['0.2019', '0.2316', '0.1561', '0.2222', '0.0929']

Best baseline: 0.1902
Target (baseline + 4.5pp): 0.2352


In [6]:
# Summary of baselines
print("\n" + "="*60)
print("BASELINE SUMMARY")
print("="*60)
print(f"\npartial_perc=0.015: best baseline={best_baseline_015:.4f}, target={target_015:.4f}")
print(f"partial_perc=0.025: best baseline={best_baseline_025:.4f}, target={target_025:.4f}")


BASELINE SUMMARY

partial_perc=0.015: best baseline=0.1790, target=0.2240
partial_perc=0.025: best baseline=0.1902, target=0.2352


## Step 2: Targeted Parameter Search

Test promising configurations for `use hypothesis` method.

**Key parameters:**
- `nu`: OneClassSVM parameter (lower = more permissive selection)
- `lr`: Learning rate
- `freqperc_cutoff`: Frequency threshold for final selection
- `normalize_grads_contx`: Whether to normalize gradients + context
- `num_epochs`: Training epochs

In [None]:
    # Baseline-like configs (no enhanced context)
    (0.10, 0.001, 0.25, False, 60, False, "baseline"),
    
    # Vary nu (selection restrictiveness)
    (0.05, 0.001, 0.25, False, 60, False, "low nu"),
    (0.08, 0.001, 0.25, False, 60, False, "med-low nu"),
    (0.15, 0.001, 0.25, False, 60, False, "med-high nu"),
    (0.20, 0.001, 0.25, False, 60, False, "high nu"),
    
    # Vary learning rate
    (0.10, 0.002, 0.25, False, 60, False, "higher lr"),
    (0.10, 0.004, 0.25, False, 60, False, "high lr"),
    (0.10, 0.0005, 0.25, False, 60, False, "low lr"),
    
    # Vary frequency cutoff
    (0.10, 0.001, 0.15, False, 60, False, "low freq cutoff"),
    (0.10, 0.001, 0.20, False, 60, False, "med-low freq cutoff"),
    (0.10, 0.001, 0.33, False, 60, False, "high freq cutoff"),
    
    # With normalization
    (0.10, 0.001, 0.25, True, 60, False, "with normalize"),
    (0.10, 0.002, 0.25, True, 60, False, "normalize + higher lr"),
    
    # Vary epochs
    (0.10, 0.001, 0.25, False, 40, False, "40 epochs"),
    (0.10, 0.001, 0.25, False, 80, False, "more epochs"),
    
    # Combined promising configs
    (0.08, 0.002, 0.20, False, 60, False, "combined 1"),
    (0.08, 0.002, 0.20, True, 60, False, "combined 2 + norm"),

In [7]:
# Define configurations to test
# Format: (nu, lr, freqperc_cutoff, normalize, epochs, use_enhanced_context, description)
configs_to_test = [
    
    # ==========================================
    # ENHANCED CONTEXT CONFIGURATIONS
    # (adds hypothesis class ID to enriched vectors)
    # This addresses the diagnostic finding that 75% of input features
    # are identical across hypotheses, diluting the gradient signal
    # ==========================================
    (0.10, 0.001, 0.25, False, 50, True, "enhanced context"),
    (0.10, 0.001, 0.25, True, 50, True, "enhanced + normalize"),
    #(0.08, 0.001, 0.25, False, 60, True, "enhanced + low nu"),
    #(0.15, 0.001, 0.25, False, 60, True, "enhanced + high nu"),
    #(0.10, 0.002, 0.25, False, 60, True, "enhanced + higher lr"),
    #(0.10, 0.001, 0.20, False, 60, True, "enhanced + low freq"),
    #(0.08, 0.002, 0.20, True, 60, True, "enhanced combined"),
    #(0.10, 0.001, 0.25, True, 80, True, "enhanced + norm + epochs"),
]

print(f"Total configurations to test: {len(configs_to_test)}")
print(f"  - Standard configs: {sum(1 for c in configs_to_test if not c[5])}")
print(f"  - Enhanced context configs: {sum(1 for c in configs_to_test if c[5])}")

Total configurations to test: 2
  - Standard configs: 0
  - Enhanced context configs: 2


In [None]:
# Search with partial_perc = 0.015
partial_perc = 0.015
best_baseline = best_baseline_015
target = target_015

print(f"\n{'='*60}")
print(f"PARAMETER SEARCH with partial_perc = {partial_perc}")
print(f"Best baseline: {best_baseline:.4f}, Target: {target:.4f}")
print(f"{'='*60}\n")

results_015 = []
best_r2_015 = 0
best_config_015 = None

for i, (nu, lr, freqperc, normalize, epochs, enhanced_ctx, desc) in enumerate(configs_to_test):
    print(f"[{i+1}/{len(configs_to_test)}] Testing {desc}...", end=" ")
    
    h_mean, h_std, h_results = test_config(
        'use hypothesis', partial_perc, epochs, lr, nu, normalize, freqperc, 
        n_runs=5, use_enhanced_context=enhanced_ctx
    )
    
    improvement = (h_mean - best_baseline) * 100
    results_015.append({
        'desc': desc, 'nu': nu, 'lr': lr, 'freqperc': freqperc,
        'normalize': normalize, 'epochs': epochs, 'enhanced_ctx': enhanced_ctx,
        'mean_r2': h_mean, 'std_r2': h_std, 'improvement_pp': improvement
    })
    
    marker = "***" if improvement >= 4.5 else ("**" if improvement >= 2.0 else ("*" if improvement > 0 else ""))
    print(f"R2={h_mean:.4f} ({improvement:+.2f}pp) {marker}")
    
    if h_mean > best_r2_015:
        best_r2_015 = h_mean
        best_config_015 = (nu, lr, freqperc, normalize, epochs, enhanced_ctx, desc)

print(f"\n{'='*60}")
print(f"BEST for partial_perc={partial_perc}:")
print(f"  R2 = {best_r2_015:.4f} (improvement: {(best_r2_015-best_baseline)*100:.2f}pp)")
print(f"  Config: {best_config_015}")


PARAMETER SEARCH with partial_perc = 0.015
Best baseline: 0.1790, Target: 0.2240

[1/2] Testing enhanced context... 

100%|██████████| 50/50 [12:34<00:00, 15.08s/it]
100%|██████████| 50/50 [12:35<00:00, 15.10s/it]
100%|██████████| 50/50 [12:38<00:00, 15.17s/it]
100%|██████████| 50/50 [12:40<00:00, 15.21s/it]
100%|██████████| 50/50 [12:40<00:00, 15.22s/it]


R2=0.1525 (-2.65pp) 
[2/2] Testing enhanced + normalize... 

100%|██████████| 50/50 [12:37<00:00, 15.15s/it]
 34%|███▍      | 17/50 [04:28<08:40, 15.78s/it]

No gradients were selected, training will cease.



 38%|███▊      | 19/50 [04:47<07:58, 15.42s/it]

In [None]:
# Search with partial_perc = 0.025
partial_perc = 0.025
best_baseline = best_baseline_025
target = target_025

print(f"\n{'='*60}")
print(f"PARAMETER SEARCH with partial_perc = {partial_perc}")
print(f"Best baseline: {best_baseline:.4f}, Target: {target:.4f}")
print(f"{'='*60}\n")

results_025 = []
best_r2_025 = 0
best_config_025 = None

for i, (nu, lr, freqperc, normalize, epochs, enhanced_ctx, desc) in enumerate(configs_to_test):
    print(f"[{i+1}/{len(configs_to_test)}] Testing {desc}...", end=" ")
    
    h_mean, h_std, h_results = test_config(
        'use hypothesis', partial_perc, epochs, lr, nu, normalize, freqperc, 
        n_runs=5, use_enhanced_context=enhanced_ctx
    )
    
    improvement = (h_mean - best_baseline) * 100
    results_025.append({
        'desc': desc, 'nu': nu, 'lr': lr, 'freqperc': freqperc,
        'normalize': normalize, 'epochs': epochs, 'enhanced_ctx': enhanced_ctx,
        'mean_r2': h_mean, 'std_r2': h_std, 'improvement_pp': improvement
    })
    
    marker = "***" if improvement >= 4.5 else ("**" if improvement >= 2.0 else ("*" if improvement > 0 else ""))
    print(f"R2={h_mean:.4f} ({improvement:+.2f}pp) {marker}")
    
    if h_mean > best_r2_025:
        best_r2_025 = h_mean
        best_config_025 = (nu, lr, freqperc, normalize, epochs, enhanced_ctx, desc)

print(f"\n{'='*60}")
print(f"BEST for partial_perc={partial_perc}:")
print(f"  R2 = {best_r2_025:.4f} (improvement: {(best_r2_025-best_baseline_025)*100:.2f}pp)")
print(f"  Config: {best_config_025}")

In [None]:
# Show results as tables
print("\nResults for partial_perc=0.015:")
df_015 = pd.DataFrame(results_015).sort_values('mean_r2', ascending=False)
print(df_015[['desc', 'nu', 'lr', 'freqperc', 'normalize', 'enhanced_ctx', 'epochs', 'mean_r2', 'improvement_pp']].head(12).to_string())

print("\n\nResults for partial_perc=0.025:")
df_025 = pd.DataFrame(results_025).sort_values('mean_r2', ascending=False)
print(df_025[['desc', 'nu', 'lr', 'freqperc', 'normalize', 'enhanced_ctx', 'epochs', 'mean_r2', 'improvement_pp']].head(12).to_string())

# Compare enhanced vs non-enhanced
print("\n\n" + "="*60)
print("ENHANCED CONTEXT vs STANDARD COMPARISON")
print("="*60)
for pp, df in [("0.015", df_015), ("0.025", df_025)]:
    enhanced = df[df['enhanced_ctx'] == True]['mean_r2'].mean()
    standard = df[df['enhanced_ctx'] == False]['mean_r2'].mean()
    print(f"partial_perc={pp}: Enhanced avg R2={enhanced:.4f}, Standard avg R2={standard:.4f}, diff={(enhanced-standard)*100:+.2f}pp")

## Step 3: Validate Best Configuration (15 runs)

In [None]:
# Choose which partial_perc showed better improvement
improvement_015 = (best_r2_015 - best_baseline_015) * 100
improvement_025 = (best_r2_025 - best_baseline_025) * 100

if improvement_015 > improvement_025:
    final_partial_perc = 0.015
    final_config = best_config_015
    final_baseline = best_baseline_015
else:
    final_partial_perc = 0.025
    final_config = best_config_025
    final_baseline = best_baseline_025

nu, lr, freqperc, normalize, epochs, enhanced_ctx, desc = final_config

print(f"\n{'='*60}")
print(f"FINAL VALIDATION (15 runs)")
print(f"{'='*60}")
print(f"\nSelected: partial_perc={final_partial_perc}")
print(f"Config: nu={nu}, lr={lr}, freqperc={freqperc}, normalize={normalize}, epochs={epochs}, enhanced_context={enhanced_ctx}")
print(f"Description: {desc}")
print(f"\nRunning 15 validation runs...")

h_mean_final, h_std_final, h_results_final = test_config(
    'use hypothesis', final_partial_perc, epochs, lr, nu, normalize, freqperc, 
    n_runs=15, verbose=True, use_enhanced_context=enhanced_ctx
)

final_improvement = (h_mean_final - final_baseline) * 100

print(f"\n{'='*60}")
print(f"FINAL RESULTS")
print(f"{'='*60}")
print(f"Use Hypothesis R2: {h_mean_final:.4f} +/- {h_std_final:.4f}")
print(f"Best Baseline R2:  {final_baseline:.4f}")
print(f"Improvement:       {final_improvement:.2f} percentage points")
print(f"Target achieved:   {'YES!' if final_improvement >= 4.5 else 'Not yet'}")
print(f"Enhanced context:  {enhanced_ctx}")