# Photocell Degradation - Noise Detection Benchmark (v2)

## Goal
Benchmark GGH as a noise detection method on the Photocell Degradation dataset.

## Challenge
- **Simulated noise**: 40% of target values corrupted (range 0.4-0.6 around mean)
- **Unsupervised**: No labeled clean samples available during detection
- **Evaluation only**: Ground truth labels used only to measure detection performance

## Methods Compared
1. **Full Info (No Noise)**: Oracle upper bound - trained on clean data
2. **Full Info Noisy**: Baseline - trained on noisy data without removal
3. **Old GGH (DBSCAN)**: Gradient clustering approach from original notebooks
4. **New GGH (Soft Refinement)**: Bootstrap anchors from loss distribution, iterative refinement

## Expected Results (from original notebook)
- Full Info: R² ~0.845
- Old GGH (DBSCAN): R² ~0.799
- Full Info Noisy: R² ~0.674

In [None]:
# CUDA determinism fix (must be before torch import)
import os
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':4096:8'

import numpy as np
import torch
import sys
from tqdm import tqdm
from scipy import stats
import warnings
warnings.filterwarnings('ignore')

sys.path.insert(0, '../')
sys.path.insert(0, '../GGH')

from GGH.data_ops import DataOperator
from GGH_2.noise_detection import (
    set_to_deterministic,
    run_full_info,
    run_full_info_noisy,
    run_old_ggh_dbscan,
    run_new_ggh_unsupervised,
)
from GGH_2.noise_detection_viz import plot_all_noise_detection_metrics

print('Imports successful!')
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f'Using device: {DEVICE}')

In [None]:
# =============================================================================
# CONFIGURATION - Photocell Degradation Dataset
# =============================================================================
data_path = '../data/dataset_photo_pce10/data.csv'
results_path = '../saved_results/Photocell_Noise_Detection_v2'

# Variables
inpt_vars = ['P3HT', 'PTB7-Th', 'PCBM']
target_vars = ['Degradation']
miss_vars = []
hypothesis = [[1, 2, 3]]

# Model parameters
hidden_size = 32
dropout = 0.05
lr = 0.001

# Noise simulation parameters
DATA_NOISE_PERC = 0.40  # 40% of data will have noise
NOISE_MINRANGE = 0.40   # Noise factor range
NOISE_MAXRANGE = 0.60
noise_profile = {
    'DATA_NOISE_PERC': DATA_NOISE_PERC,
    'NOISE_MINRANGE': NOISE_MINRANGE,
    'NOISE_MAXRANGE': NOISE_MAXRANGE
}

# Training parameters
partial_perc = 0.03
batch_size = 250

# Benchmark parameters
BENCHMARK_N_RUNS = 15
FULL_INFO_EPOCHS = 600
NOISY_EPOCHS = 600

# Old GGH (DBSCAN) parameters
OLD_GGH_EPOCHS = 585
OLD_GGH_END_EPOCHS = 15
OLD_GGH_EPS_VALUES = [0.15, 0.2]
OLD_GGH_MIN_SAMPLES_RATIOS = [0.15, 0.2, 0.25]

# New GGH (Soft Refinement) parameters
GGH_ITER1_EPOCHS = 60
GGH_ITER1_ANALYSIS_EPOCHS = 5
GGH_ITER2_EPOCHS = 30
GGH_FINAL_EPOCHS = 600
GGH_MIN_WEIGHT = 0.1
GGH_TEMPERATURE = 1.0
GGH_NOISE_THRESHOLD = 0.3
GGH_CLEAN_PERCENTILE = 0.60

# Shared config dict for noise detection functions
nd_config = {
    'data_path': data_path,
    'inpt_vars': inpt_vars,
    'target_vars': target_vars,
    'miss_vars': miss_vars,
    'hypothesis': hypothesis,
    'partial_perc': partial_perc,
    'batch_size': batch_size,
    'hidden_size': hidden_size,
    'lr': lr,
    'dropout': dropout,
    # Old GGH
    'old_ggh_epochs': OLD_GGH_EPOCHS,
    'old_ggh_end_epochs': OLD_GGH_END_EPOCHS,
    'old_ggh_eps_values': OLD_GGH_EPS_VALUES,
    'old_ggh_min_samples_ratios': OLD_GGH_MIN_SAMPLES_RATIOS,
    'final_epochs': GGH_FINAL_EPOCHS,
    # New GGH
    'ggh_iter1_epochs': GGH_ITER1_EPOCHS,
    'ggh_iter1_analysis_epochs': GGH_ITER1_ANALYSIS_EPOCHS,
    'ggh_iter2_epochs': GGH_ITER2_EPOCHS,
    'ggh_min_weight': GGH_MIN_WEIGHT,
    'ggh_temperature': GGH_TEMPERATURE,
    'ggh_noise_threshold': GGH_NOISE_THRESHOLD,
    'ggh_clean_percentile': GGH_CLEAN_PERCENTILE,
}

os.makedirs(results_path, exist_ok=True)

print(f'Dataset: Photocell Degradation')
print(f'Noise simulation: {DATA_NOISE_PERC*100}% of data, range [{NOISE_MINRANGE}, {NOISE_MAXRANGE}]')
print(f'Benchmark runs: {BENCHMARK_N_RUNS}')
print(f'Results path: {results_path}')

## Main Benchmark

Run all methods across multiple random seeds:
1. **Full Info (No Noise)** - Oracle upper bound
2. **Full Info Noisy** - Baseline with noise, no removal
3. **Old GGH (DBSCAN)** - Gradient clustering + retrain on cleaned data
4. **New GGH (Soft Refinement)** - Bootstrap anchors, iterative refinement + retrain

In [None]:
# =============================================================================
# BENCHMARK EXECUTION
# =============================================================================
print('=' * 80)
print('BENCHMARK: Noise Detection on Photocell Degradation')
print('=' * 80)

all_results = {
    'Full Info': [],
    'Full Info Noisy': [],
    'Old GGH (DBSCAN)': [],
    'New GGH (Soft Ref)': [],
}

for run_idx in range(BENCHMARK_N_RUNS):
    r_state = run_idx
    print(f'\n--- RUN {run_idx + 1}/{BENCHMARK_N_RUNS} (r_state={r_state}) ---')

    # Create DO with noise for this run
    set_to_deterministic(r_state)
    DO = DataOperator(data_path, inpt_vars, target_vars, miss_vars, hypothesis,
                      partial_perc, r_state, device='cpu', use_case='noise detection')
    DO.simulate_noise(DATA_NOISE_PERC, NOISE_MINRANGE, NOISE_MAXRANGE)

    n_total = len(DO.df_train_noisy)
    n_noisy = DO.df_train_noisy['noise_added'].sum()
    print(f'  True noisy: {n_noisy}/{n_total} ({n_noisy/n_total*100:.1f}%)')

    # --- Full Info (No Noise) ---
    print('  Running Full Info (no noise)...')
    set_to_deterministic(r_state)
    DO_clean = DataOperator(data_path, inpt_vars, target_vars, miss_vars, hypothesis,
                            partial_perc, r_state, device='cpu', use_case='noise detection')
    full_result = run_full_info(DO_clean, r_state, batch_size, hidden_size, lr,
                                FULL_INFO_EPOCHS, dropout, results_path)
    all_results['Full Info'].append(full_result)
    print(f'    R2: {full_result["test_r2"]:.4f}')

    # --- Full Info Noisy (Baseline) ---
    print('  Running Full Info Noisy (baseline)...')
    noisy_result = run_full_info_noisy(DO, r_state, batch_size, hidden_size, lr,
                                       NOISY_EPOCHS, dropout, results_path)
    all_results['Full Info Noisy'].append(noisy_result)
    print(f'    R2: {noisy_result["test_r2"]:.4f}')

    # --- Old GGH (DBSCAN) ---
    print('  Running Old GGH (DBSCAN)...')
    old_result = run_old_ggh_dbscan(DO, r_state, nd_config)
    all_results['Old GGH (DBSCAN)'].append(old_result)
    print(f'    R2: {old_result["test_r2"]:.4f}, '
          f'Detection P/R: {old_result["detection"]["precision"]:.3f}/{old_result["detection"]["recall"]:.3f}')

    # --- New GGH (Soft Refinement) ---
    print('  Running New GGH (Soft Refinement)...')
    new_result = run_new_ggh_unsupervised(DO, r_state, nd_config)
    all_results['New GGH (Soft Ref)'].append(new_result)
    print(f'    R2: {new_result["test_r2"]:.4f}, '
          f'Detection P/R: {new_result["detection"]["precision"]:.3f}/{new_result["detection"]["recall"]:.3f}')

print(f'\n{"=" * 80}')
print('BENCHMARK COMPLETE')
print(f'{"=" * 80}')

## Results Summary

In [None]:
# =============================================================================
# SUMMARY TABLE
# =============================================================================
print('\n' + '=' * 90)
print('RESULTS SUMMARY')
print('=' * 90)

print(f'\n{"Method":<25} {"R2":>10} {"MSE":>12} {"MAE":>12}')
print('-' * 60)

for method, results in all_results.items():
    r2_list = [r['test_r2'] for r in results]
    mse_list = [r['test_mse'] for r in results]
    mae_list = [r['test_mae'] for r in results]
    print(f'{method:<25} '
          f'{np.mean(r2_list):>6.4f} +/- {np.std(r2_list):.4f}  '
          f'{np.mean(mse_list):.6f}  '
          f'{np.mean(mae_list):.6f}')

# Detection metrics for GGH methods
print(f'\n{"="*90}')
print('DETECTION METRICS')
print(f'{"="*90}')
print(f'{"Method":<25} {"Precision":>10} {"Recall":>10} {"F1":>10} {"Accuracy":>10}')
print('-' * 70)

for method in ['Old GGH (DBSCAN)', 'New GGH (Soft Ref)']:
    results = all_results[method]
    precision = [r['detection']['precision'] for r in results]
    recall = [r['detection']['recall'] for r in results]
    f1 = [r['detection']['f1'] for r in results]
    accuracy = [r['detection']['accuracy'] for r in results]
    print(f'{method:<25} '
          f'{np.mean(precision):>6.3f}+/-{np.std(precision):.3f} '
          f'{np.mean(recall):>6.3f}+/-{np.std(recall):.3f} '
          f'{np.mean(f1):>6.3f}+/-{np.std(f1):.3f} '
          f'{np.mean(accuracy):>6.3f}+/-{np.std(accuracy):.3f}')

# Statistical tests
print(f'\n{"="*90}')
print('STATISTICAL TESTS (Paired t-test on R2)')
print(f'{"="*90}')

old_r2 = [r['test_r2'] for r in all_results['Old GGH (DBSCAN)']]
new_r2 = [r['test_r2'] for r in all_results['New GGH (Soft Ref)']]
noisy_r2 = [r['test_r2'] for r in all_results['Full Info Noisy']]

t_stat, p_val = stats.ttest_rel(new_r2, old_r2)
diff = np.mean(new_r2) - np.mean(old_r2)
sig = '***' if p_val < 0.001 else '**' if p_val < 0.01 else '*' if p_val < 0.05 else ''
print(f'\nNew GGH vs Old GGH: diff={diff:+.4f}, t={t_stat:.3f}, p={p_val:.4f} {sig}')

t_stat2, p_val2 = stats.ttest_rel(old_r2, noisy_r2)
diff2 = np.mean(old_r2) - np.mean(noisy_r2)
sig2 = '***' if p_val2 < 0.001 else '**' if p_val2 < 0.01 else '*' if p_val2 < 0.05 else ''
print(f'Old GGH vs Noisy:   diff={diff2:+.4f}, t={t_stat2:.3f}, p={p_val2:.4f} {sig2}')

t_stat3, p_val3 = stats.ttest_rel(new_r2, noisy_r2)
diff3 = np.mean(new_r2) - np.mean(noisy_r2)
sig3 = '***' if p_val3 < 0.001 else '**' if p_val3 < 0.01 else '*' if p_val3 < 0.05 else ''
print(f'New GGH vs Noisy:   diff={diff3:+.4f}, t={t_stat3:.3f}, p={p_val3:.4f} {sig3}')

## Visualizations

In [None]:
# =============================================================================
# VISUALIZATION: ALL METRICS
# =============================================================================
plot_all_noise_detection_metrics(all_results, 'Photocell Degradation', results_path)