In [None]:
from sklearn.model_selection import ShuffleSplit
from sklearn.preprocessing import MinMaxScaler
from sklearn.cross_decomposition import PLSRegression

from nirs4all.operators.transformations import Gaussian, SavitzkyGolay, StandardNormalVariate, Haar
from nirs4all.pipeline.config import PipelineConfigs
from nirs4all.dataset.dataset_config import DatasetConfigs
from nirs4all.pipeline.runner import PipelineRunner
import json

%load_ext autoreload
%autoreload 2
import numpy as np
from nirs4all.dataset.prediction_visualizer import PredictionVisualizer


pipeline = [
    # Normalize the spectra reflectance
    MinMaxScaler(),

    # Generate 10 version of feature augmentation combinations (3 elements with size 1 to 2, ie. [SG, [SNV, GS], Haar])
    {
        "feature_augmentation": {
            "_or_": [
                Gaussian, StandardNormalVariate, SavitzkyGolay, Haar,
            ],
            "size": [3, (1,2)],
            "count": 5,
        }
    },

    # Split the dataset in train and validation
    ShuffleSplit(n_splits=3, test_size=.25),

    # Normalize the y values
    {"y_processing": MinMaxScaler},
    {"model": PLSRegression(10)},
]

p_configs = PipelineConfigs(pipeline)


# path = ['../../sample_data/regression', '../../sample_data/classification', '../../sample_data/binary']
path = '../../sample_data/regression'
d_configs = DatasetConfigs(path)

runner = PipelineRunner()
predictions, results = runner.run(p_configs, d_configs)



visualizer = PredictionVisualizer(predictions, dataset_name_override="dataset")
top_5 = visualizer.get_top_k(5, 'rmse')
for i, model in enumerate(top_5, 1):
    # print(f"{i}. {model['pipeline']} - RMSE: {model['rmse']:.6f}, R²: {model['r2']:.6f}, MAE: {model['mae']:.6f}")
    print(model['path'], f"RMSE: {model['rmse']:.6f}, R²: {model['r2']:.6f}, MAE: {model['mae']:.6f}")

predictions = PipelineRunner.predict(
    path=top_5[0]['path'],
    dataset=d_configs,
    verbose=1
)

✅ Loaded pipeline(s) with 5 configuration(s).
📥 Loaded 30 predictions from results\regression\regression_predictions.json
[94m🚀 Starting pipeline config_2ac2ed07 on dataset regression[0m
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[92m🔷 Step 1: {'class': 'sklearn.preprocessing._data.MinMaxScaler', '_runtime_instance': MinMaxScaler()}[0m
🔹 Executing controller TransformerMixinController with operator MinMaxScaler
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[92m🔷 Step 2: {'feature_augmentation': [['nirs4all.operators.transformations.nirs.SavitzkyGolay', 'sklearn.preprocessing._data.StandardScaler'], ['nirs4all.operators.transformations.nirs.Haar', 'sklearn.preprocessing._dat