# Wine_Claude: Optimizing Gradient Guided Hypotheses

This notebook explores configurations to achieve gradient selection performance
that significantly exceeds baseline methods.

In [None]:
import torch
import numpy as np
import pandas as pd
from tqdm import tqdm
import sys
sys.path.insert(0, '../')
sys.path.insert(0, '../GGH')

from GGH.data_ops import DataOperator
from GGH.selection_algorithms import AlgoModulators
from GGH.models import initialize_model, load_model
from GGH.train_val_loop import TrainValidationManager
from GGH.inspector import Inspector, visualize_train_val_error, selection_histograms, clean_final_analysis
import warnings
warnings.filterwarnings('ignore')

def set_to_deterministic(rand_state):
    import random
    random.seed(rand_state)
    np.random.seed(rand_state)
    torch.manual_seed(rand_state)
    torch.set_num_threads(1)
    torch.use_deterministic_algorithms(True)

In [None]:
# Data configuration
data_path = '../data/wine/red_wine.csv'
results_path = "../saved_results/Red Wine Claude"
inpt_vars = ['volatile acidity', 'total sulfur dioxide', 'citric acid'] 
target_vars = ['quality']
miss_vars = ['alcohol']
hypothesis = [[9.35, 10, 11.5, 15]]

# Model parameters
hidden_size = 32
output_size = len(target_vars)

# Initialize inspector
INSPECT = Inspector(results_path, hidden_size)

In [None]:
def full_experiment(use_info, DO, INSPECT, batch_size, hidden_size, output_size, num_epochs, rand_state, results_path,
                    dropout=0.05, lr=0.004, nu=0.1, normalize_grads_contx=False, use_context=True, 
                    final_analysis=False, freqperc_cutoff=0.25):
    
    AM = AlgoModulators(DO, lr=lr, nu=nu, normalize_grads_contx=normalize_grads_contx, 
                        use_context=use_context, freqperc_cutoff=freqperc_cutoff)
    dataloader = DO.prep_dataloader(use_info, batch_size)
    model = initialize_model(DO, dataloader, hidden_size, rand_state, dropout=dropout)
    
    TVM = TrainValidationManager(use_info, num_epochs, dataloader, batch_size, rand_state, 
                                  results_path, final_analysis=final_analysis)
    TVM.train_model(DO, AM, model, final_analysis=final_analysis)
    INSPECT.save_train_val_logs(DO, AM, TVM, model, final_analysis=final_analysis)
    
    return DO, TVM, model

def multi_experiments(total_runs, use_info, num_epochs, data_path, inpt_vars, target_vars, miss_vars, 
                      hypothesis, partial_perc, INSPECT, batch_size, hidden_size, output_size, 
                      results_path, hyperparameters, final_analysis=True):
    
    clean_final_analysis(results_path, use_info)
    progress_bar = tqdm(total=total_runs)
    
    for r_state in range(2000):
        set_to_deterministic(r_state)
        DO = DataOperator(data_path, inpt_vars, target_vars, miss_vars, hypothesis, 
                          partial_perc, r_state, device="cpu")
        DO.problem_type = 'regression'
        
        if not DO.lack_partial_coverage:
            full_experiment(use_info, DO, INSPECT, batch_size, hidden_size, output_size, num_epochs, 
                           r_state, results_path,
                           dropout=hyperparameters["dropout"]["value"],
                           lr=hyperparameters["lr"]["value"],
                           nu=hyperparameters["nu"]["value"],
                           normalize_grads_contx=hyperparameters["normalize_grads_contx"]["value"],
                           use_context=hyperparameters["use_context"]["value"],
                           final_analysis=final_analysis,
                           freqperc_cutoff=hyperparameters.get("freqperc_cutoff", {"value": 0.25})["value"])
            progress_bar.update(1)
        
        if progress_bar.n == total_runs:
            break
    
    progress_bar.close()

## Step 1: Run Baselines

In [None]:
# Experiment parameters
num_loops = 15
partial_perc = 0.015  # Testing with 1.5% partial data

# Baseline hyperparameters
baseline_hyperparams = {
    "lr": {"value": 0.001},
    "dropout": {"value": 0.05},
    "nu": {"value": 0.1},
    "normalize_grads_contx": {"value": False},
    "use_context": {"value": True},
    "freqperc_cutoff": {"value": 0.25}
}

batch_size = 100 * len(hypothesis[0])
num_epochs_baseline = 200

In [None]:
%%capture
# Run partial info baseline
multi_experiments(num_loops, "partial info", num_epochs_baseline, data_path, inpt_vars, target_vars, 
                  miss_vars, hypothesis, partial_perc, INSPECT, batch_size, hidden_size, output_size, 
                  results_path, baseline_hyperparams)

In [None]:
%%capture
# Run use known only baseline
multi_experiments(num_loops, "use known only", num_epochs_baseline, data_path, inpt_vars, target_vars,
                  miss_vars, hypothesis, partial_perc, INSPECT, batch_size, hidden_size, output_size,
                  results_path, baseline_hyperparams)

In [None]:
%%capture
# Run full info (oracle upper bound)
multi_experiments(num_loops, "full info", num_epochs_baseline, data_path, inpt_vars, target_vars,
                  miss_vars, hypothesis, partial_perc, INSPECT, batch_size, hidden_size, output_size,
                  results_path, baseline_hyperparams)

In [None]:
# Check baseline results
df_baseline, df_baseline_notavg = INSPECT.create_test_comparison_table(
    data_path, inpt_vars, target_vars, miss_vars, hypothesis,
    partial_perc, batch_size, best_imput=""
)
print("Baseline Results:")
print(df_baseline[["Method", "avg_r2_score", "std_r2_score", "avg_mse"]])

## Step 2: Optimize Gradient Selection (Use Hypothesis)

In [None]:
# Optimized hyperparameters for gradient selection
# Key changes: lower nu (more permissive), normalize gradients, adjusted learning rate
optimized_hyperparams = {
    "lr": {"value": 0.002},
    "dropout": {"value": 0.05},
    "nu": {"value": 0.15},  # More permissive to capture more correct hypotheses
    "normalize_grads_contx": {"value": True},  # Normalize for better separation
    "use_context": {"value": True},
    "freqperc_cutoff": {"value": 0.20}  # Lower cutoff, be more inclusive
}

num_epochs_hypothesis = 60  # Fewer epochs for hypothesis selection

In [None]:
%%capture
# Run gradient selection with optimized parameters
multi_experiments(num_loops, "use hypothesis", num_epochs_hypothesis, data_path, inpt_vars, target_vars,
                  miss_vars, hypothesis, partial_perc, INSPECT, batch_size, hidden_size, output_size,
                  results_path, optimized_hyperparams)

In [None]:
# Check results with hypothesis
df_results, df_results_notavg = INSPECT.create_test_comparison_table(
    data_path, inpt_vars, target_vars, miss_vars, hypothesis,
    partial_perc, batch_size, best_imput=""
)
print("Results with Gradient Selection:")
print(df_results[["Method", "avg_r2_score", "std_r2_score", "avg_mse"]])

# Calculate improvement
if "use hypothesis" in df_results["Method"].values:
    hyp_r2 = df_results[df_results["Method"] == "use hypothesis"]["avg_r2_score"].values[0]
    partial_r2 = df_results[df_results["Method"] == "partial info"]["avg_r2_score"].values[0]
    known_r2 = df_results[df_results["Method"] == "use known only"]["avg_r2_score"].values[0]
    best_baseline = max(partial_r2, known_r2)
    improvement = (hyp_r2 - best_baseline) * 100
    print(f"\nImprovement over best baseline: {improvement:.2f} percentage points")

## Step 3: Fine-tune Parameters

In [None]:
# Parameter search for best configuration
def quick_test(nu, lr, freqperc_cutoff, normalize, num_epochs, partial_perc, n_runs=5):
    """Quick test with fewer runs to find promising configurations."""
    hyperparams = {
        "lr": {"value": lr},
        "dropout": {"value": 0.05},
        "nu": {"value": nu},
        "normalize_grads_contx": {"value": normalize},
        "use_context": {"value": True},
        "freqperc_cutoff": {"value": freqperc_cutoff}
    }
    
    test_results = []
    for r_state in range(100):
        set_to_deterministic(r_state)
        DO = DataOperator(data_path, inpt_vars, target_vars, miss_vars, hypothesis,
                          partial_perc, r_state, device="cpu")
        DO.problem_type = 'regression'
        
        if not DO.lack_partial_coverage:
            DO, TVM, model = full_experiment("use hypothesis", DO, INSPECT, batch_size, hidden_size, 
                                              output_size, num_epochs, r_state, results_path,
                                              dropout=0.05, lr=lr, nu=nu,
                                              normalize_grads_contx=normalize,
                                              use_context=True, final_analysis=False,
                                              freqperc_cutoff=freqperc_cutoff)
            
            best_model = load_model(DO, TVM.weights_save_path, batch_size)
            test_r2 = INSPECT.calculate_val_r2score(DO, TVM, best_model, data="test")
            test_results.append(test_r2)
            
            if len(test_results) >= n_runs:
                break
    
    return np.mean(test_results), np.std(test_results)

In [None]:
# Grid search over key parameters
print("Running parameter search...")
results = []

for nu in [0.05, 0.10, 0.20, 0.3]:
    for lr in [0.001, 0.0025]:
        for freqperc in [0.15, 0.25, 0.75]:
            for normalize in [True, False]:
                for epochs in [50]:
                    mean_r2, std_r2 = quick_test(nu, lr, freqperc, normalize, epochs, partial_perc=0.015, n_runs=3)
                    results.append({
                        'nu': nu, 'lr': lr, 'freqperc': freqperc, 
                        'normalize': normalize, 'epochs': epochs,
                        'mean_r2': mean_r2, 'std_r2': std_r2
                    })
                    print(f"nu={nu}, lr={lr}, freq={freqperc}, norm={normalize}, ep={epochs}: R2={mean_r2:.4f}")

results_df = pd.DataFrame(results)
print("\nTop 10 configurations:")
print(results_df.nlargest(10, 'mean_r2'))

In [None]:
# Use best configuration found
best_config = results_df.nlargest(1, 'mean_r2').iloc[0]
print(f"Best configuration:")
print(best_config)

## Step 4: Final Benchmark (15 loops)

In [None]:
# Final hyperparameters based on search
final_hyperparams = {
    "lr": {"value": best_config['lr']},
    "dropout": {"value": 0.05},
    "nu": {"value": best_config['nu']},
    "normalize_grads_contx": {"value": best_config['normalize']},
    "use_context": {"value": True},
    "freqperc_cutoff": {"value": best_config['freqperc']}
}

final_epochs = int(best_config['epochs'])
print(f"Running final benchmark with: {final_hyperparams}")
print(f"Epochs: {final_epochs}")

In [None]:
%%capture
# Run final hypothesis experiment
multi_experiments(num_loops, "use hypothesis", final_epochs, data_path, inpt_vars, target_vars,
                  miss_vars, hypothesis, partial_perc, INSPECT, batch_size, hidden_size, output_size,
                  results_path, final_hyperparams)

In [None]:
# Final comparison
df_final, df_final_notavg = INSPECT.create_test_comparison_table(
    data_path, inpt_vars, target_vars, miss_vars, hypothesis,
    partial_perc, batch_size, best_imput=""
)

print("="*60)
print("FINAL RESULTS")
print("="*60)
print(df_final[["Method", "avg_r2_score", "std_r2_score", "avg_mse", "avg_mae"]])

# Calculate and display improvement
hyp_r2 = df_final[df_final["Method"] == "use hypothesis"]["avg_r2_score"].values[0]
partial_r2 = df_final[df_final["Method"] == "partial info"]["avg_r2_score"].values[0]
known_r2 = df_final[df_final["Method"] == "use known only"]["avg_r2_score"].values[0]
full_r2 = df_final[df_final["Method"] == "full info"]["avg_r2_score"].values[0]

best_baseline = max(partial_r2, known_r2)
improvement = (hyp_r2 - best_baseline) * 100

print(f"\n" + "="*60)
print(f"IMPROVEMENT ANALYSIS")
print(f"="*60)
print(f"Use Hypothesis R2: {hyp_r2:.4f}")
print(f"Best Baseline R2:  {best_baseline:.4f} ({'partial info' if partial_r2 > known_r2 else 'use known only'})")
print(f"Full Info R2:      {full_r2:.4f} (oracle upper bound)")
print(f"")
print(f"Improvement over best baseline: {improvement:.2f} percentage points")
print(f"Gap to oracle closed: {((hyp_r2 - best_baseline) / (full_r2 - best_baseline)) * 100:.1f}%")

In [None]:
# Show per-run results
print("\nPer-run R2 scores:")
print(df_final_notavg[["Method", "rand_states", "avg_r2_score"]])