In [None]:
# Test the new GLOBAL_AVERAGE parameter strategy
print("🌍 Testing GLOBAL_AVERAGE Parameter Strategy")
print("=" * 50)

from nirs4all.controllers.models.base_model_controller import ParamStrategy
print("Available parameter strategies:")
for strategy in ParamStrategy:
    print(f"  - {strategy.value}")

# Create a simple test configuration using global_average
from sklearn.cross_decomposition import PLSRegression

global_avg_config = {
    "name": "global_average_test",
    "steps": [
        {
            "name": "pls_global_avg",
            "controller": "sklearn",
            "model": PLSRegression(),
            "finetune_params": {
                "cv_mode": "per_fold",
                "param_strategy": "global_average",  # ⭐ NEW STRATEGY
                "n_trials": 5,  # Small number for quick test
                "verbose": 1,
                "model_params": {
                    "n_components": ("int", 1, 8)
                },
                "train_params": {
                    "verbose": 0
                }
            }
        }
    ]
}

print("\nGlobal Average Strategy:")
print("- Evaluates each parameter set on ALL folds simultaneously")
print("- Averages the validation scores across all folds")
print("- Selects the parameter set with best average performance")
print("- More computationally expensive but more generalizable")
print(f"\nConfiguration: {global_avg_config['steps'][0]['finetune_params']}")

# Test dataset (using smaller sample for quick demo)
test_dataset_config = {
    'source': ['sample_data/regression/Protein_NIR.xlsx'],
    'y': 'Protein',
    'folds': 3,  # Fewer folds for quick test
    'train': 0.7,
    'val': 0.15,
    'test': 0.15,
    'random_state': 42
}

print("\n🚀 Running global_average optimization...")
print("(This demonstrates simultaneous optimization across all folds)")

In [None]:
# Execute the global_average test
import time

# Load the test dataset
from nirs4all.dataset.loader import get_dataset
test_data = get_dataset(test_dataset_config)

# Create and run the pipeline
from nirs4all.pipeline.config import PipelineConfig
from nirs4all.pipeline.runner import PipelineRunner

config = PipelineConfig(global_avg_config, "global_avg_test")
runner = PipelineRunner()

print("🎯 Starting optimization with global_average strategy...")
start_time = time.time()

try:
    result_dataset, history, pipeline = runner.run(config, test_data)
    execution_time = time.time() - start_time

    print(f"✅ Global Average optimization completed in {execution_time:.1f} seconds!")

    # Analyze results
    predictions = result_dataset._predictions
    print(f"📊 Generated {len(predictions)} prediction sets")

    if len(predictions) > 0:
        # Get performance metrics
        from sklearn.metrics import mean_squared_error, r2_score
        import numpy as np

        # Find the global average predictions
        pred_keys = predictions.list_keys()
        global_avg_preds = [k for k in pred_keys if 'global_avg' in k and 'test_fold' in k]

        if global_avg_preds:
            print(f"\nFound {len(global_avg_preds)} cross-validation predictions:")
            for key in global_avg_preds:
                print(f"  - {key}")

            # Combine all fold predictions
            combined = predictions.combine_folds(
                "sample_data", config.name, "PLSRegression", "test_fold"
            )

            if combined:
                y_true = combined['y_true'].flatten()
                y_pred = combined['y_pred'].flatten()

                mse = mean_squared_error(y_true, y_pred)
                r2 = r2_score(y_true, y_pred)
                rmse = np.sqrt(mse)

                print(f"\n🎯 Global Average Cross-Validation Performance:")
                print(f"  RMSE: {rmse:.4f}")
                print(f"  R²:   {r2:.4f}")
                print(f"  Samples: {len(y_true)}")
                print(f"  Folds: {combined['metadata']['num_folds']}")

        print(f"\n✨ Key advantages of global_average:")
        print(f"  ✓ Single optimal parameter set for all folds")
        print(f"  ✓ Parameters optimized for average performance")
        print(f"  ✓ More generalizable than per-fold optimization")
        print(f"  ✓ Reduces fold-specific overfitting")

    else:
        print("⚠️ No predictions were generated")

except Exception as e:
    execution_time = time.time() - start_time
    print(f"❌ Test failed after {execution_time:.1f} seconds: {e}")
    import traceback
    traceback.print_exc()

In [None]:
# Compare global_average vs per_fold_best strategies
print("🔬 Comparing Parameter Strategies")
print("=" * 40)

def quick_strategy_test(strategy_name, n_trials=3):
    """Run a quick test of a parameter strategy."""
    print(f"\nTesting {strategy_name}...")

    test_config = {
        "name": f"test_{strategy_name}",
        "steps": [{
            "name": "pls_test",
            "controller": "sklearn",
            "model": PLSRegression(),
            "finetune_params": {
                "cv_mode": "per_fold",
                "param_strategy": strategy_name,
                "n_trials": n_trials,
                "verbose": 0,  # Silent for comparison
                "model_params": {
                    "n_components": ("int", 1, 6)
                }
            }
        }]
    }

    config = PipelineConfig(test_config, f"test_{strategy_name}")
    runner = PipelineRunner()

    start = time.time()
    try:
        result, _, _ = runner.run(config, test_data)
        elapsed = time.time() - start

        # Get performance
        combined = result._predictions.combine_folds(
            "sample_data", config.name, "PLSRegression", "test_fold"
        )

        if combined:
            y_true = combined['y_true'].flatten()
            y_pred = combined['y_pred'].flatten()
            rmse = np.sqrt(mean_squared_error(y_true, y_pred))

            return {
                'strategy': strategy_name,
                'time': elapsed,
                'rmse': rmse,
                'success': True
            }
        else:
            return {'strategy': strategy_name, 'success': False, 'error': 'No predictions'}

    except Exception as e:
        return {'strategy': strategy_name, 'success': False, 'error': str(e)}

# Test both strategies
strategies_to_test = ['per_fold_best', 'global_average']
results = []

for strategy in strategies_to_test:
    result = quick_strategy_test(strategy)
    results.append(result)

    if result['success']:
        print(f"  ✅ {strategy}: RMSE={result['rmse']:.4f}, Time={result['time']:.1f}s")
    else:
        print(f"  ❌ {strategy}: Failed - {result.get('error', 'Unknown error')}")

# Summary comparison
successful_results = [r for r in results if r['success']]
if len(successful_results) >= 2:
    print(f"\n📊 Strategy Comparison:")

    per_fold = next(r for r in successful_results if r['strategy'] == 'per_fold_best')
    global_avg = next(r for r in successful_results if r['strategy'] == 'global_average')

    time_ratio = global_avg['time'] / per_fold['time']
    perf_diff = global_avg['rmse'] - per_fold['rmse']

    print(f"  Execution Time:")
    print(f"    per_fold_best:  {per_fold['time']:.1f}s")
    print(f"    global_average: {global_avg['time']:.1f}s ({time_ratio:.1f}x slower)")

    print(f"  Performance (RMSE):")
    print(f"    per_fold_best:  {per_fold['rmse']:.4f}")
    print(f"    global_average: {global_avg['rmse']:.4f} ({'better' if perf_diff < 0 else 'worse'} by {abs(perf_diff):.4f})")

    if perf_diff < -0.001:  # Significantly better
        print(f"  🏆 global_average achieved better generalization!")
    elif abs(perf_diff) < 0.001:  # Similar performance
        print(f"  📊 Similar performance, but global_average provides more consistent parameters")
    else:
        print(f"  📈 per_fold_best achieved better performance on this dataset")

print(f"\n💡 When to use each strategy:")
print(f"  per_fold_best:  Standard optimization, faster execution")
print(f"  global_average: More generalizable parameters, production deployment")

In [None]:
# Test the new use_full_train_for_final option
print("🎯 Testing use_full_train_for_final Option")
print("=" * 50)

print("NEW FEATURE: use_full_train_for_final=True")
print("- Use cross-validation for hyperparameter optimization")
print("- Train final model on FULL combined training data")
print("- Get single unified model instead of fold-specific models")

# Configuration with full training option
full_train_config = {
    "name": "full_train_demo",
    "steps": [
        {
            "name": "pls_full_train",
            "controller": "sklearn",
            "model": PLSRegression(),
            "finetune_params": {
                "cv_mode": "per_fold",
                "param_strategy": "global_average",
                "use_full_train_for_final": True,  # ⭐ NEW OPTION
                "n_trials": 5,  # Quick demo
                "verbose": 1,
                "model_params": {
                    "n_components": ("int", 1, 8)
                },
                "train_params": {
                    "verbose": 0
                }
            }
        }
    ]
}

print(f"\nRunning with use_full_train_for_final=True...")

# Run the test
config = PipelineConfig(full_train_config, "full_train_test")
runner = PipelineRunner()

start = time.time()
result_dataset, _, _ = runner.run(config, test_data)
elapsed = time.time() - start

print(f"✅ Completed in {elapsed:.1f} seconds")

# Check results
predictions = result_dataset._predictions
pred_keys = predictions.list_keys()

print(f"📊 Generated prediction keys: {pred_keys}")

# Look for full training predictions (should have different naming pattern)
full_train_preds = [k for k in pred_keys if 'global_avg' in k or 'full' in k]

if full_train_preds:
    print(f"\n🎯 Full training predictions found: {full_train_preds}")

    # Get performance
    key_parts = full_train_preds[0].split('_', 3)
    if len(key_parts) >= 4:
        pred_data = predictions.get_prediction_data(*key_parts)
        if pred_data:
            y_true = pred_data['y_true'].flatten()
            y_pred = pred_data['y_pred'].flatten()

            mse = mean_squared_error(y_true, y_pred)
            r2 = r2_score(y_true, y_pred)
            rmse = np.sqrt(mse)

            print(f"\n📈 Full Training Model Performance:")
            print(f"  RMSE: {rmse:.4f}")
            print(f"  R²:   {r2:.4f}")
            print(f"  Test samples: {len(y_true)}")
            print(f"  Model trained on: Combined training data from all folds")

print(f"\n🔄 Key Differences from Traditional Approach:")
print(f"  Traditional: 3 separate models (one per fold)")
print(f"  Full Train:  1 unified model (trained on all data)")
print(f"  Benefit:     More training data → Often better performance")

In [None]:
# Fresh reload - restart kernel then run this first
import importlib
import sys

# Clear any cached modules
modules_to_clear = [k for k in sys.modules.keys() if k.startswith('nirs4all')]
for module in modules_to_clear:
    if module in sys.modules:
        del sys.modules[module]

# Now reimport everything
from nirs4all.dataset.predictions import Predictions
from nirs4all.pipeline.runner import PipelineRunner
from nirs4all.pipeline.config import PipelineConfig
from nirs4all.dataset import dataset
from sample import dataset_config, pipeline_config
from nirs4all.dataset.loader import get_dataset
from nirs4all.controllers.registry import reset_registry
from nirs4all.controllers import *

# Test the new predictions class first
test_predictions = Predictions()
print(f"Empty predictions: {test_predictions}")
print(f"Length: {len(test_predictions)}")

# Test adding a prediction
import numpy as np
test_predictions.add_prediction(
    dataset="test",
    pipeline="test_pipe",
    model="TestModel",
    partition="test",
    y_true=np.array([1.0, 2.0]),
    y_pred=np.array([1.1, 2.1])
)

print(f"After adding: {test_predictions}")
print("✅ Predictions class is working correctly!")

print("\n" + "="*50)
print("Now running the actual pipeline...")

# Run the pipeline
data = get_dataset(dataset_config)
config = PipelineConfig(pipeline_config, "demo_pipeline")

runner = PipelineRunner()
try:
    res_dataset, history, pipeline = runner.run(config, data)
    print(f"\nPipeline completed successfully!")
    print(f"Final dataset predictions: {res_dataset._predictions}")
except Exception as e:
    print(f"Pipeline failed: {e}")
    import traceback
    traceback.print_exc()

Registering controller: DummyController
Registering controller: SklearnModelController
Registering controller: TensorFlowModelController
Registering controller: PyTorchModelController
Registering controller: TransformerMixinController
Registering controller: YTransformerMixinController
Registering controller: FeatureAugmentationController
Registering controller: SampleAugmentationController
Registering controller: CrossValidatorController
Registering controller: SpectraChartController
Registering controller: FoldChartController
Registering controller: YChartController
Empty predictions: 📈 Predictions: No predictions stored
Length: 0
After adding: 📈 Predictions: 1 entries
   Datasets: ['test']
   Pipelines: ['test_pipe']
   Models: ['TestModel']
✅ Predictions class is working correctly!

Now running the actual pipeline...
[94mLoading dataset:[0m
---------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
# Check the pipeline results and predictions
print("Pipeline Results Analysis:")
print("="*50)

print(f"Final dataset: {res_dataset}")
print(f"\nPredictions stored: {res_dataset._predictions}")
print(f"Number of predictions: {len(res_dataset._predictions)}")

if len(res_dataset._predictions) > 0:
    print(f"Prediction keys: {res_dataset._predictions.list_keys()}")
    print(f"Datasets: {res_dataset._predictions.list_datasets()}")
    print(f"Pipelines: {res_dataset._predictions.list_pipelines()}")
    print(f"Models: {res_dataset._predictions.list_models()}")
    print(f"Partitions: {res_dataset._predictions.list_partitions()}")

    # Get details of first prediction
    keys = res_dataset._predictions.list_keys()
    if keys:
        first_key = keys[0]
        first_pred_parts = first_key.split('_', 3)  # Split into 4 parts max
        if len(first_pred_parts) >= 4:
            dataset_name, pipeline_name, model_name, partition_name = first_pred_parts
            first_pred = res_dataset._predictions.get_prediction_data(
                dataset_name, pipeline_name, model_name, partition_name
            )
            if first_pred:
                print(f"\nFirst prediction details:")
                print(f"  Dataset: {first_pred['dataset']}")
                print(f"  Pipeline: {first_pred['pipeline']}")
                print(f"  Model: {first_pred['model']}")
                print(f"  Partition: {first_pred['partition']}")
                print(f"  Y_true shape: {first_pred['y_true'].shape}")
                print(f"  Y_pred shape: {first_pred['y_pred'].shape}")
                print(f"  Sample indices: {len(first_pred['sample_indices'])} samples")
                print(f"  Fold index: {first_pred['fold_idx']}")
                print(f"  Metadata: {first_pred['metadata']}")
else:
    print("⚠️ No predictions were stored during pipeline execution")
    print("This might be because:")
    print("1. The prediction storage integration is not working correctly")
    print("2. The dataset parameter is not being passed properly")
    print("3. There were errors in the model training that prevented prediction storage")

Pipeline Results Analysis:
Final dataset: 📊 Dataset: sample_data
Features (samples=189, sources=1):
- Source 0: (189, 3, 2151), processings=['raw_MinMaxScaler_1', 'raw_MinMaxScaler_1_Gaussian_2', 'raw_MinMaxScaler_1_StandardScaler_3_Haar_4'], min=-0.215, max=0.89, mean=0.22, var=0.102)
Targets: (samples=189, targets=1, processings=['numeric', 'numeric_StandardScaler5'])
- numeric: min=1.33, max=128.31, mean=30.779
- numeric_StandardScaler5: min=-1.23, max=4.156, mean=0.019
Indexes:
- "train", ['raw_MinMaxScaler_1', 'raw_MinMaxScaler_1_Gaussian_2', 'raw_MinMaxScaler_1_StandardScaler_3_Haar_4']: 130 samples
- "test", ['raw_MinMaxScaler_1', 'raw_MinMaxScaler_1_Gaussian_2', 'raw_MinMaxScaler_1_StandardScaler_3_Haar_4']: 59 samples
Folds: [(97, 33), (97, 33), (97, 33)]

Predictions stored: 📈 Predictions: 8 entries
   Datasets: ['sample_data']
   Pipelines: ['config_demo_pipeline_00090c']
   Models: ['PLSRegression', 'function', 'Sequential']
Number of predictions: 8
Prediction keys: ['sampl

In [3]:
# Demonstrate prediction analysis capabilities
print("Prediction Analysis Example:")
print("="*50)

# Get all predictions from PLSRegression model
pls_predictions = res_dataset._predictions.get_predictions(model="PLSRegression")
print(f"PLSRegression predictions: {list(pls_predictions.keys())}")

# Get specific test set prediction for PLS
pls_test = res_dataset._predictions.get_prediction_data(
    "sample_data", "config_demo_pipeline_00090c", "PLSRegression", "test"
)

if pls_test:
    import numpy as np
    from sklearn.metrics import mean_squared_error, r2_score, mean_absolute_error

    y_true = pls_test['y_true'].flatten()
    y_pred = pls_test['y_pred'].flatten()

    print(f"\nPLS Test Set Performance:")
    print(f"  MSE: {mean_squared_error(y_true, y_pred):.4f}")
    print(f"  MAE: {mean_absolute_error(y_true, y_pred):.4f}")
    print(f"  R²:  {r2_score(y_true, y_pred):.4f}")
    print(f"  Y processing: {pls_test['metadata']['y_processing']}")

# Combine cross-validation folds for PLS
pls_cv_combined = res_dataset._predictions.combine_folds(
    "sample_data", "config_demo_pipeline_00090c", "PLSRegression", "test_fold"
)

if pls_cv_combined:
    y_true_cv = pls_cv_combined['y_true'].flatten()
    y_pred_cv = pls_cv_combined['y_pred'].flatten()

    print(f"\nPLS Cross-Validation Combined Performance:")
    print(f"  Samples: {len(y_true_cv)}")
    print(f"  MSE: {mean_squared_error(y_true_cv, y_pred_cv):.4f}")
    print(f"  MAE: {mean_absolute_error(y_true_cv, y_pred_cv):.4f}")
    print(f"  R²:  {r2_score(y_true_cv, y_pred_cv):.4f}")
    print(f"  Folds: {pls_cv_combined['metadata']['num_folds']}")

# Compare all models on test set
print(f"\nModel Comparison (Test Set):")
models = res_dataset._predictions.list_models()
for model in models:
    test_pred = res_dataset._predictions.get_predictions(
        model=model, partition="test"
    )
    if test_pred:
        first_key = list(test_pred.keys())[0]
        pred_data = test_pred[first_key]
        y_true = pred_data['y_true'].flatten()
        y_pred = pred_data['y_pred'].flatten()
        mse = mean_squared_error(y_true, y_pred)
        r2 = r2_score(y_true, y_pred)
        print(f"  {model}: MSE={mse:.4f}, R²={r2:.4f}")

print("\n✅ Prediction storage and analysis system is fully functional!")

Prediction Analysis Example:
PLSRegression predictions: ['sample_data_config_demo_pipeline_00090c_PLSRegression_test', 'sample_data_config_demo_pipeline_00090c_PLSRegression_test_fold_0', 'sample_data_config_demo_pipeline_00090c_PLSRegression_test_fold_1', 'sample_data_config_demo_pipeline_00090c_PLSRegression_test_fold_2']

PLS Test Set Performance:
  MSE: 0.3200
  MAE: 0.1108
  R²:  0.9991
  Y processing: numeric_StandardScaler5

PLS Cross-Validation Combined Performance:
  Samples: 99
  MSE: 336.5601
  MAE: 14.7830
  R²:  0.2857
  Folds: 3

Model Comparison (Test Set):
  Sequential: MSE=346.8119, R²=-0.0150
  PLSRegression: MSE=0.3200, R²=0.9991

✅ Prediction storage and analysis system is fully functional!


In [5]:
# Reset autoreload completely
try:
    %autoreload 0  # Disable autoreload
    %reload_ext autoreload
    %autoreload 2  # Re-enable with full reload
except:
    %load_ext autoreload
    %autoreload 2

from nirs4all.pipeline.runner import PipelineRunner
from nirs4all.pipeline.config import PipelineConfig
from nirs4all.dataset import dataset
from sample import dataset_config, pipeline_config
from nirs4all.dataset.loader import get_dataset
from nirs4all.controllers.registry import reset_registry
from nirs4all.controllers import *

data = get_dataset(dataset_config)
config = PipelineConfig(pipeline_config, "demo_pipeline")

runner = PipelineRunner()
res_dataset, history, pipeline = runner.run(config, data)


# json_config = PipelineConfig("sample.json")
# yaml_config = PipelineConfig("sample.yaml")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[94mLoading dataset:[0m
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
⚠️ Dataset does not have data for train_group.
⚠️ Dataset does not have data for test_group.
[97m📊 Dataset: sample_data
Features (samples=189, sources=1):
- Source 0: (189, 1, 2151), processings=['raw'], min=-0.265, max=1.436, mean=0.466, var=0.149)
Targets: (samples=189, targets=1, processings=['numeric'])
- numeric: min=1.33, max=128.31, mean=30.779
Indexes:
- "train", ['raw']: 130 samples
- "test", ['raw']: 59 samples[0m
[94m🚀 Starting pipeline config_demo_pipeline_00090c on dataset sample_data[0m
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [None]:
# Test the new Predictions functionality
print("Testing Predictions functionality...")
print("\nDataset before running:")
print(f"Predictions: {res_dataset._predictions}")

# Let's check if any predictions were stored
print(f"\nNumber of predictions stored: {len(res_dataset._predictions)}")
print(f"Prediction keys: {res_dataset._predictions.list_keys()}")

# Try to manually add a test prediction
import numpy as np

res_dataset._predictions.add_prediction(
    dataset="test_dataset",
    pipeline="test_pipeline",
    model="TestModel",
    partition="test",
    y_true=np.array([1.0, 2.0, 3.0]),
    y_pred=np.array([1.1, 1.9, 3.2]),
    metadata={"test": True}
)

print(f"\nAfter adding test prediction:")
print(f"Number of predictions: {len(res_dataset._predictions)}")
print(f"Prediction keys: {res_dataset._predictions.list_keys()}")

# Get the test prediction
test_pred = res_dataset._predictions.get_prediction_data(
    "test_dataset", "test_pipeline", "TestModel", "test"
)
print(f"\nTest prediction data: {test_pred}")

Testing Predictions functionality...

Dataset before running:
Predictions: 📈 Predictions: 8 entries
   Datasets: ['sample_data']
   Pipelines: ['config_demo_pipeline_00090c']
   Models: ['PLSRegression', 'function', 'Sequential']

Number of predictions stored: 8
Prediction keys: ['sample_data_config_demo_pipeline_00090c_PLSRegression_test', 'sample_data_config_demo_pipeline_00090c_PLSRegression_test_fold_0', 'sample_data_config_demo_pipeline_00090c_PLSRegression_test_fold_1', 'sample_data_config_demo_pipeline_00090c_PLSRegression_test_fold_2', 'sample_data_config_demo_pipeline_00090c_Sequential_test', 'sample_data_config_demo_pipeline_00090c_function_test_fold_0', 'sample_data_config_demo_pipeline_00090c_function_test_fold_1', 'sample_data_config_demo_pipeline_00090c_function_test_fold_2']

After adding test prediction:
Number of predictions: 9
Prediction keys: ['sample_data_config_demo_pipeline_00090c_PLSRegression_test', 'sample_data_config_demo_pipeline_00090c_PLSRegression_test_fol