In [None]:
%reload_ext autoreload
%autoreload 2

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')))

import time
from nirs4all.presets.ref_models import decon, nicon, customizable_nicon, nicon_classification
from nirs4all.presets.preprocessings import decon_set, nicon_set
from nirs4all.data_splitters import KennardStoneSplitter
from nirs4all.transformations import StandardNormalVariate as SNV, SavitzkyGolay as SG, Gaussian as GS, Derivate as  Dv
from nirs4all.transformations import Rotate_Translate as RT, Spline_X_Simplification as SXS, Random_X_Operation as RXO
from nirs4all.transformations import CropTransformer
from nirs4all.core.runner import ExperimentRunner
from nirs4all.core.config import Config

from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import KFold, RepeatedKFold, StratifiedKFold, RepeatedStratifiedKFold, ShuffleSplit, GroupKFold, StratifiedShuffleSplit, BaseCrossValidator, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)


model_sklearn = {
    "class": "sklearn.cross_decomposition.PLSRegression",
    "model_params": {
        "n_components": 21,
    }
}
    
finetune_pls_experiment = {
    "action": "finetune",
    "finetune_params": {
        'model_params': {
            'n_components': ('int', 5, 20),
        },
        'training_params': {},
        'tuner': 'sklearn'
    }
}

bacon_train = {"action": "train", "training_params": {"epochs": 2000, "batch_size": 500, "patience": 200, "cyclic_lr": True, "base_lr": 1e-6, "max_lr": 1e-3, "step_size": 400}}
bacon_train_short = {"action": "train", "training_params": {"epochs": 10, "batch_size": 500, "patience": 20, "cyclic_lr": True, "base_lr": 1e-6, "max_lr": 1e-3, "step_size": 40}}
bacon_finetune = {
    "action": "finetune",
    "finetune_params": {
        "n_trials": 5,
        "model_params": {
            "filters_1": [8, 16, 32, 64], 
            "filters_2": [8, 16, 32, 64], 
            "filters_3": [8, 16, 32, 64]
        }
    },
    "training_params": {
        "epochs": 10,
        "verbose":0
    }
}

full_bacon_finetune = {
    "action": "finetune",
    "training_params": {
        "epochs": 500,
        "patience": 100,
    },
    "finetune_params": {
        "nb_trials": 150,
        "model_params": {
            'spatial_dropout': (float, 0.01, 0.5),
            'filters1': [4, 8, 16, 32, 64, 128, 256],
            'kernel_size1': [3, 5, 7, 9, 11, 13, 15],
            # 'strides1': [1, 2, 3, 4, 5],
            # 'activation1': ['relu', 'selu', 'elu', 'swish'],
            'dropout_rate': (float, 0.01, 0.5),
            'filters2': [4, 8, 16, 32, 64, 128, 256],
            # 'kernel_size2': [3, 5, 7, 9, 11, 13, 15],
            # 'strides2': [1, 2, 3, 4, 5],
            'activation2': ['relu', 'selu', 'elu', 'swish'],
            'normalization_method1': ['BatchNormalization', 'LayerNormalization'],
            'filters3': [4, 8, 16, 32, 64, 128, 256],
            # 'kernel_size3': [3, 5, 7, 9, 11, 13, 15],
            # 'strides3': [1, 2, 3, 4, 5],
            'activation3': ['relu', 'selu', 'elu', 'swish'],
            # 'normalization_method2': ['BatchNormalization', 'LayerNormalization'],
            # 'dense_units': [4, 8, 16, 32, 64, 128, 256],
            'dense_activation': ['relu', 'selu', 'elu', 'swish'],
        },
        # "training_params": {
        #     "batch_size": [32, 64, 128, 256, 512],
        #     "cyclic_lr": [True, False],
        #     "base_lr": (float, 1e-6, 1e-2),
        #     "max_lr": (float, 1e-3, 1e-1),
        #     "step_size": (int, 500, 5000),
        # },
    }
}


x_pipeline_full = [
    RobustScaler(),
    {"samples": [None, None, None, None, SXS, RXO]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]


bacon_finetune_classif = {
    "action": "finetune",
    "task": "classification",
    "finetune_params": {
        "n_trials": 5,
        "model_params": {
            "filters_1": [8, 16, 32, 64], 
            "filters_2": [8, 16, 32, 64], 
            "filters_3": [8, 16, 32, 64]
        }
    },
    "training_params": {
        "epochs": 5,
        "verbose":0
    }
}

finetune_randomForestclassifier = {
    "action": "finetune",
    "task": "classification",
    "finetune_params": {
        'model_params': {
            'n_estimators': ('int', 5, 20),
        },
        'training_params': {},
        'tuner': 'sklearn'
    }
}

x_pipeline_PLS = [
    RobustScaler(),
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]
            
            
x_pipeline = [
    RobustScaler(), 
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)}, 
    # bacon_set(),
    MinMaxScaler()
]

x_pipelineb = [
    RobustScaler(), 
    {"samples": [RT(6)], "balance": True},
    # {"samples": [None, RT]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)}, 
    # {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]


y_pipeline = MinMaxScaler()

seed = 123459456

# processing only
config1 = Config("../sample_data/regression", x_pipeline_full, y_pipeline, None, None, seed)
## TRAINING
# regression
config2 = Config("../sample_data/regression", x_pipeline, y_pipeline, nicon, bacon_train_short, seed)
config3 = Config("../sample_data/regression", x_pipeline_PLS, y_pipeline, PLSRegression(n_components=10), None, seed)
# classification
config4 = Config("../sample_data/classification", x_pipeline, None, nicon_classification, {"task":"classification", "training_params":{"epochs":10, "patience": 100, "verbose":0}}, seed*2)
config4b = Config("../sample_data/binary", x_pipelineb, None, nicon_classification, {"task":"classification", "training_params":{"epochs":10, "patience": 100, "verbose":0}}, seed*2)
config5 = Config("../sample_data/binary", x_pipeline, None, nicon_classification, {"task":"classification", "training_params":{"epochs":5}, "verbose":0}, seed*2)
config6 = Config("../sample_data/classification", x_pipeline, None, RandomForestClassifier, {"task":"classification"}, seed*2)
config7 = Config("../sample_data/binary", x_pipeline, None, RandomForestClassifier, {"task":"classification"}, seed*2)
## FINETUNING
# regression
config8 = Config("../sample_data/regression", x_pipeline, y_pipeline, nicon, bacon_finetune, seed)
config9 = Config("../sample_data/regression", x_pipeline, y_pipeline, model_sklearn, finetune_pls_experiment, seed)
# classification
config10 = Config("../sample_data/classification", x_pipeline, None, nicon_classification, bacon_finetune_classif, seed*2)
config10b = Config("../sample_data/binary", x_pipeline, None, nicon_classification, bacon_finetune_classif, seed*2)
config11 = Config("../sample_data/classification", x_pipelineb, None, RandomForestClassifier, finetune_randomForestclassifier, seed*2)
config11b = Config("../sample_data/binary", x_pipeline, None, RandomForestClassifier, finetune_randomForestclassifier, seed*2)


# configs = [config1, config2, config3, config4, config4b, config5, config6, config7, config8, config9, config10, config10b, config11, config11b]
# configs = [config10b, config11, config11b]
configs = [config3]
config_names = ["config1", "config2", "config3", "config4", "config4b", "config5", "config6", "config7", "config8", "config9", "config10", "config10b", "config11", "config11b"]
for i, config in enumerate(configs):
    print("#" * 20)
    print(f"Config {i}: {config_names[i]}")
    print("#" * 20)
    start = time.time()
    runner = ExperimentRunner([config], resume_mode="restart")
    datasets, predictions, scores, best_params = runner.run()
    end = time.time()
    print(f"Time elapsed: {end-start} seconds")


In [3]:
%load_ext autoreload
%autoreload 2

from nirs4all.core.config import Config
from nirs4all.core.runner import ExperimentRunner

config = Config.from_json_file("../sample_data/fast_train.json")
config.dataset = "../sample_data/regression"
print(config)
runner = ExperimentRunner(config, resume_mode="restart")
datasets, predictions, scores, best_params = runner.run()

2025-05-14 00:28:22,406 - INFO - Running config: Config(dataset='../sample_data/regression', x_pipeline=['sklearn.preprocessing.RobustScaler', {'samples': ['nirs4all.transformations.Rotate_Translate', {'class': 'nirs4all.transformations.Rotate_Translate', 'params': {'p_range': 3, 'y_factor': 5}}]}, {'split': {'class': 'sklearn.model_selection.RepeatedKFold', 'params': {'n_splits': 3, 'n_repeats': 1}}}, {'features': [None, {'class': 'nirs4all.transformations.Gaussian', 'params': {'order': 2, 'sigma': 2}}, 'nirs4all.transformations.SavitzkyGolay', 'nirs4all.transformations.StandardNormalVariate', 'nirs4all.transformations.Derivate', 'nirs4all.transformations.Haar']}, 'sklearn.preprocessing.MinMaxScaler'], y_pipeline='sklearn.preprocessing.MinMaxScaler', model={'class': 'sklearn.cross_decomposition.PLSRegression', 'model_params': {'n_components': 21}}, experiment={'action': 'train'}, seed=None)
2025-05-14 00:28:22,407 - INFO - ### LOADING DATASET ###


The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Config(dataset='../sample_data/regression', x_pipeline=['sklearn.preprocessing.RobustScaler', {'samples': ['nirs4all.transformations.Rotate_Translate', {'class': 'nirs4all.transformations.Rotate_Translate', 'params': {'p_range': 3, 'y_factor': 5}}]}, {'split': {'class': 'sklearn.model_selection.RepeatedKFold', 'params': {'n_splits': 3, 'n_repeats': 1}}}, {'features': [None, {'class': 'nirs4all.transformations.Gaussian', 'params': {'order': 2, 'sigma': 2}}, 'nirs4all.transformations.SavitzkyGolay', 'nirs4all.transformations.StandardNormalVariate', 'nirs4all.transformations.Derivate', 'nirs4all.transformations.Haar']}, 'sklearn.preprocessing.MinMaxScaler'], y_pipeline='sklearn.preprocessing.MinMaxScaler', model={'class': 'sklearn.cross_decomposition.PLSRegression', 'model_params': {'n_components': 21}}, experiment={'action': 'train'}, seed=None)
>> Browsing ../sample_data/regression
No train_group fil

2025-05-14 00:28:23,674 - INFO - Dataset(x_train:(130, 2151) - y_train:(130, 1), x_test:(59, 2151) - y_test:(59, 1))
2025-05-14 00:28:23,674 - INFO - ### PROCESSING DATASET ###
2025-05-14 00:28:24,923 - INFO - Dataset(x_train:(260, 12906) - y_train:(260, 1), x_test:(59, 12906) - y_test:(59, 1))
Folds size: 86-44, 87-43, 87-43
2025-05-14 00:28:24,924 - INFO - ### PREPARING MODEL ###
2025-05-14 00:28:24,927 - INFO - Running config > {'dataset': '../sample_data/regression', 'x_pipeline': ['sklearn.preprocessing.RobustScaler', {'samples': ['nirs4all.transformations.Rotate_Translate', {'class': 'nirs4all.transformations.Rotate_Translate', 'params': {'p_range': 3, 'y_factor': 5}}]}, {'split': {'class': 'sklearn.model_selection.RepeatedKFold', 'params': {'n_splits': 3, 'n_repeats': 1}}}, {'features': [None, {'class': 'nirs4all.transformations.Gaussian', 'params': {'order': 2, 'sigma': 2}}, 'nirs4all.transformations.SavitzkyGolay', 'nirs4all.transformations.StandardNormalVariate', 'nirs4all.tr

Splitter method: sklearn.model_selection.RepeatedKFold
Splitter params: {'n_splits': 3, 'n_repeats': 1}
Loaded splitter class: <class 'sklearn.model_selection._split.RepeatedKFold'>
Using framework: sklearn
Training fold 1, with shapes: (172, 12906) (172, 1) (88, 12906) (88, 1)
Training fold 2, with shapes: (174, 12906) (174, 1) (86, 12906) (86, 1)


2025-05-14 00:28:25,100 - INFO - Saved model to results\sample_dataregression\PLSRegression\experiment_72ea00aa
2025-05-14 00:28:25,118 - INFO - Evaluation Metrics fold_0: {'mse': 399.68833504057557, 'mae': 16.08065514648115}
2025-05-14 00:28:25,120 - INFO - Evaluation Metrics fold_1: {'mse': 407.0605190522869, 'mae': 16.487930924229502}
2025-05-14 00:28:25,121 - INFO - Evaluation Metrics fold_2: {'mse': 427.2875456387469, 'mae': 16.258452634014162}
2025-05-14 00:28:25,122 - INFO - Evaluation Metrics mean: {'mse': 398.55595761149385, 'mae': 15.878677111684443}
2025-05-14 00:28:25,124 - INFO - Evaluation Metrics best: {'mse': 399.68833504057557, 'mae': 16.08065514648115}
2025-05-14 00:28:25,125 - INFO - Evaluation Metrics weighted: {'mse': 398.2234778021958, 'mae': 15.878048109731408}
2025-05-14 00:28:25,126 - INFO - Metrics saved to results\sample_dataregression\PLSRegression\experiment_72ea00aa\metrics.json
2025-05-14 00:28:25,129 - INFO - Predictions saved to results\sample_dataregre

Training fold 3, with shapes: (174, 12906) (174, 1) (86, 12906) (86, 1)
