In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
from sklearn.model_selection import ShuffleSplit
from sklearn.preprocessing import MinMaxScaler
from sklearn.cross_decomposition import PLSRegression

from nirs4all.operators.transformations import Gaussian, SavitzkyGolay, StandardNormalVariate, Haar
from nirs4all.pipeline.config import PipelineConfigs
from nirs4all.dataset.dataset_config import DatasetConfigs
from nirs4all.pipeline.runner import PipelineRunner
import json

pipeline_separated = [
    # Normalize the spectra reflectance
    MinMaxScaler(feature_range=(0.1, 0.8)),

    # Generate 10 version of feature augmentation combinations (3 elements with size 1 to 2, ie. [SG, [SNV, GS], Haar])
    {
        "feature_augmentation": {
            "_or_": [
                Gaussian, StandardNormalVariate, SavitzkyGolay, Haar,
            ],
            "size": [3, (1,2)],
            "count": 2,
        }
    },

    # Split the dataset in train and validation
    ShuffleSplit(n_splits=3, test_size=.25),

    # Normalize the y values
    {"y_processing": MinMaxScaler},

    # PLS regression with 1 to 60 components
    {
        "model": PLSRegression,
        "model_params": {
            "n_components": {
                "_range_": [1, 4],
            }
        }
    }
]

pipeline_commons = [
    # Normalize the spectra reflectance
    MinMaxScaler(),

    # Generate 10 version of feature augmentation combinations (3 elements with size 1 to 2, ie. [SG, [SNV, GS], Haar])
    {
        "feature_augmentation": {
            "_or_": [
                Gaussian, StandardNormalVariate, SavitzkyGolay, Haar,
            ],
            "size": [3, (1,2)],
            "count": 5,
        }
    },

    # Split the dataset in train and validation
    # ShuffleSplit(n_splits=3, test_size=.25),

    # Normalize the y values
    {"y_processing": MinMaxScaler},
]

for i in range(5, 45):
    pipeline_commons.append(PLSRegression(n_components=i))

# create pipeline config
config = PipelineConfigs(pipeline_commons)


# path = ['../../sample_data/regression', '../../sample_data/classification', '../../sample_data/binary']
path = '../../sample_data/regression'
dataset_config_obj = DatasetConfigs(path)

runner = PipelineRunner(save_binaries=False)
results = runner.run(config, dataset_config_obj)


✅ Loaded pipeline(s) with 2 configuration(s).
✅ Loaded dataset 'regression' with 130 training and 59 test samples.
📥 Loaded 20 predictions from results\regression\regression_predictions.json
[94m🚀 Starting pipeline config_a8a011f7 on dataset regression[0m
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[94m🔄 Running 18 steps in sequential mode[0m
[92m🔷 Step 1: {'class': 'sklearn.preprocessing._data.MinMaxScaler', '_runtime_instance': MinMaxScaler()}[0m
🔹 Executing controller TransformerMixinController with operator MinMaxScaler
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[92m🔷 Step 2: {'feature_augmentation': ['sklearn.preprocessing._data.StandardScaler', ['nirs4all.operator

In [None]:
# Test the fixed predictions summary
if 'results' in locals() and len(results) > 0:
    last_result = results[-1]
    dataset, history, pipeline = last_result

    print(f"=== Manual Test of Predictions Summary ===")
    print(f"Dataset: {dataset.name}")
    print(f"Total predictions: {len(dataset._predictions.list_keys())}")

    # Test the summary display manually with different counts
    print("\n--- Testing with 0 previous predictions (should show all) ---")
    dataset._predictions.display_best_scores_summary(dataset.name, 0)

    print("\n--- Testing with 10 previous predictions (should show new ones) ---")
    dataset._predictions.display_best_scores_summary(dataset.name, 10)
else:
    print("No results available")

=== Manual Test of Predictions Summary ===
Dataset: regression
Total predictions: 20

--- Testing with 0 previous predictions (should show all) ---
--------------------------------------------------------------------------------------------------------------------------------------------
🏆 Best from this run: 5 (config_cdd22a14_PLSRegression) - mse=116.5067↓
🥇 Best overall: 5 (config_cdd22a14_PLSRegression) - mse=116.5067↓

--- Testing with 10 previous predictions (should show new ones) ---
--------------------------------------------------------------------------------------------------------------------------------------------
🏆 Best from this run: 5 (config_cdd22a14_PLSRegression) - mse=116.5067↓


In [6]:
# Debug the higher_is_better value for MSE
from nirs4all.utils.model_utils import ModelUtils, TaskType
import numpy as np

# Test with sample regression data
y_true = np.array([1.0, 2.0, 3.0])
y_pred = np.array([1.1, 2.1, 3.1])

task_type = ModelUtils.detect_task_type(y_true)
print(f"Task type: {task_type}")

best_metric, higher_is_better = ModelUtils.get_best_score_metric(task_type)
print(f"Best metric: {best_metric}")
print(f"Higher is better: {higher_is_better}")

scores = ModelUtils.calculate_scores(y_true, y_pred, task_type)
print(f"Sample scores: {scores}")

# This should show us what the correct direction should be

Task type: TaskType.REGRESSION
Best metric: mse
Higher is better: False
Sample scores: {'mse': 0.010000000000000018, 'mae': 0.10000000000000009}


In [None]:
# Debug the actual predictions to understand the issue
if 'results' in locals() and len(results) > 0:
    last_result = results[-1]
    dataset, history, pipeline = last_result

    # Look at a specific prediction to see what's happening
    all_keys = dataset._predictions.list_keys()
    if all_keys:
        first_key = all_keys[0]
        print(f"First prediction key: {first_key}")

        # Parse the key
        key_parts = first_key.split('_')
        pred_dataset_name = key_parts[0]
        pipeline_name = '_'.join(key_parts[1:-2])
        model_name = key_parts[-2]
        partition_name = key_parts[-1]

        print(f"Dataset: {pred_dataset_name}, Pipeline: {pipeline_name}, Model: {model_name}, Partition: {partition_name}")

        # Get prediction data
        pred_data = dataset._predictions.get_prediction_data(
            pred_dataset_name, pipeline_name, model_name, partition_name
        )

        if pred_data:
            task_type = ModelUtils.detect_task_type(pred_data['y_true'])
            best_metric, higher_is_better = ModelUtils.get_best_score_metric(task_type)
            print(f"Task: {task_type}, Metric: {best_metric}, Higher is better: {higher_is_better}")

            scores = ModelUtils.calculate_scores(pred_data['y_true'], pred_data['y_pred'], task_type)
            print(f"Scores: {scores}")

            # This should tell us what's really happening

First prediction key: regression_config_cb2255a1_PLSRegression_3_test
Dataset: regression, Pipeline: config_cb2255a1_PLSRegression, Model: 3, Partition: test
Task: TaskType.REGRESSION, Metric: mse, Higher is better: False
Scores: {'mse': 173.8910007539191, 'mae': 10.574806193325461}


In [9]:
# Create a simple custom version to debug the issue
if 'results' in locals() and len(results) > 0:
    last_result = results[-1]
    dataset, history, pipeline = last_result

    from nirs4all.utils.model_utils import ModelUtils

    # Get all predictions
    all_keys = dataset._predictions.list_keys()
    print(f"Total predictions: {len(all_keys)}")

    # Find best score manually
    best_score = None
    best_model = None
    higher_is_better = False

    for key in all_keys:
        key_parts = key.split('_')
        if len(key_parts) >= 4:
            pred_dataset_name = key_parts[0]
            pipeline_name = '_'.join(key_parts[1:-2])
            model_name = key_parts[-2]
            partition_name = key_parts[-1]

            if pred_dataset_name == "regression":
                pred_data = dataset._predictions.get_prediction_data(
                    pred_dataset_name, pipeline_name, model_name, partition_name
                )

                if pred_data and 'y_true' in pred_data and 'y_pred' in pred_data:
                    task_type = ModelUtils.detect_task_type(pred_data['y_true'])
                    scores = ModelUtils.calculate_scores(pred_data['y_true'], pred_data['y_pred'], task_type)
                    best_metric, metric_higher_is_better = ModelUtils.get_best_score_metric(task_type)
                    score = scores.get(best_metric)

                    # Set higher_is_better (should be consistent)
                    higher_is_better = metric_higher_is_better

                    if score is not None:
                        if best_score is None or (not higher_is_better and score < best_score):
                            best_score = score
                            best_model = model_name

    # Display with correct direction
    direction = "↑" if higher_is_better else "↓"
    print(f"\nDebug results:")
    print(f"Higher is better: {higher_is_better}")
    print(f"Direction should be: {direction}")
    print(f"Best score: {best_score}")
    print(f"Best model: {best_model}")
    print(f"🏆 Best: {best_model} - mse={best_score:.4f}{direction}")

Total predictions: 20

Debug results:
Higher is better: False
Direction should be: ↓
Best score: 116.50674098677325
Best model: 5
🏆 Best: 5 - mse=116.5067↓
