In [10]:
%reload_ext autoreload
%autoreload 2

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')))

import time
from nirs4all.presets.ref_models import decon, nicon, customizable_nicon, nicon_classification
from nirs4all.presets.preprocessings import decon_set, nicon_set
from nirs4all.data_splitters import KennardStoneSplitter
from nirs4all.transformations import StandardNormalVariate as SNV, SavitzkyGolay as SG, Gaussian as GS, Derivate as  Dv
from nirs4all.transformations import Rotate_Translate as RT, Spline_X_Simplification as SXS, Random_X_Operation as RXO
from nirs4all.transformations import CropTransformer
from nirs4all.core.runner import ExperimentRunner
from nirs4all.core.config import Config

from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import KFold, RepeatedKFold, StratifiedKFold, RepeatedStratifiedKFold, ShuffleSplit, GroupKFold, StratifiedShuffleSplit, BaseCrossValidator, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)


model_sklearn = {
    "class": "sklearn.cross_decomposition.PLSRegression",
    "model_params": {
        "n_components": 21,
    }
}
    
finetune_pls_experiment = {
    "action": "finetune",
    "finetune_params": {
        'model_params': {
            'n_components': ('int', 5, 20),
        },
        'training_params': {},
        'tuner': 'sklearn'
    }
}

bacon_train = {"action": "train", "training_params": {"epochs": 2000, "batch_size": 500, "patience": 200, "cyclic_lr": True, "base_lr": 1e-6, "max_lr": 1e-3, "step_size": 400}}
bacon_train_short = {"action": "train", "training_params": {"epochs": 10, "batch_size": 500, "patience": 20, "cyclic_lr": True, "base_lr": 1e-6, "max_lr": 1e-3, "step_size": 40}}
bacon_finetune = {
    "action": "finetune",
    "finetune_params": {
        "n_trials": 5,
        "model_params": {
            "filters_1": [8, 16, 32, 64], 
            "filters_2": [8, 16, 32, 64], 
            "filters_3": [8, 16, 32, 64]
        }
    },
    "training_params": {
        "epochs": 10,
        "verbose":0
    }
}

full_bacon_finetune = {
    "action": "finetune",
    "training_params": {
        "epochs": 500,
        "patience": 100,
    },
    "finetune_params": {
        "nb_trials": 150,
        "model_params": {
            'spatial_dropout': (float, 0.01, 0.5),
            'filters1': [4, 8, 16, 32, 64, 128, 256],
            'kernel_size1': [3, 5, 7, 9, 11, 13, 15],
            # 'strides1': [1, 2, 3, 4, 5],
            # 'activation1': ['relu', 'selu', 'elu', 'swish'],
            'dropout_rate': (float, 0.01, 0.5),
            'filters2': [4, 8, 16, 32, 64, 128, 256],
            # 'kernel_size2': [3, 5, 7, 9, 11, 13, 15],
            # 'strides2': [1, 2, 3, 4, 5],
            'activation2': ['relu', 'selu', 'elu', 'swish'],
            'normalization_method1': ['BatchNormalization', 'LayerNormalization'],
            'filters3': [4, 8, 16, 32, 64, 128, 256],
            # 'kernel_size3': [3, 5, 7, 9, 11, 13, 15],
            # 'strides3': [1, 2, 3, 4, 5],
            'activation3': ['relu', 'selu', 'elu', 'swish'],
            # 'normalization_method2': ['BatchNormalization', 'LayerNormalization'],
            # 'dense_units': [4, 8, 16, 32, 64, 128, 256],
            'dense_activation': ['relu', 'selu', 'elu', 'swish'],
        },
        # "training_params": {
        #     "batch_size": [32, 64, 128, 256, 512],
        #     "cyclic_lr": [True, False],
        #     "base_lr": (float, 1e-6, 1e-2),
        #     "max_lr": (float, 1e-3, 1e-1),
        #     "step_size": (int, 500, 5000),
        # },
    }
}


x_pipeline_full = [
    RobustScaler(quantile_range=(0.05, 0.95)),
    {"samples": [None, None, None, None, SXS, RXO]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(3,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler(feature_range=(0, 0.9), clip=False)
]


bacon_finetune_classif = {
    "action": "finetune",
    "task": "classification",
    "finetune_params": {
        "n_trials": 5,
        "model_params": {
            "filters_1": [8, 16, 32, 64], 
            "filters_2": [8, 16, 32, 64], 
            "filters_3": [8, 16, 32, 64]
        }
    },
    "training_params": {
        "epochs": 5,
        "verbose":0
    }
}

finetune_randomForestclassifier = {
    "action": "finetune",
    "task": "classification",
    "finetune_params": {
        'model_params': {
            'n_estimators': ('int', 5, 20),
        },
        'training_params': {},
        'tuner': 'sklearn'
    }
}

x_pipeline_PLS = [
    RobustScaler(),
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]
            
            
x_pipeline = [
    RobustScaler(), 
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)}, 
    # bacon_set(),
    MinMaxScaler()
]

x_pipelineb = [
    RobustScaler(), 
    {"samples": [RT(6)], "balance": True},
    # {"samples": [None, RT]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)}, 
    # {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]


y_pipeline = MinMaxScaler()

seed = 123459456

# processing only
config1 = Config("../sample_data/regression", x_pipeline_full, y_pipeline, None, None, seed)
## TRAINING
# regression
config2 = Config("../sample_data/regression", x_pipeline, y_pipeline, nicon, bacon_train_short, seed)
config3 = Config("../sample_data/regression", x_pipeline_PLS, y_pipeline, PLSRegression(n_components=10), None, seed)
# classification
config4 = Config("../sample_data/classification", x_pipeline, None, nicon_classification, {"task":"classification", "training_params":{"epochs":10, "patience": 100, "verbose":0}}, seed*2)
config4b = Config("../sample_data/binary", x_pipelineb, None, nicon_classification, {"task":"classification", "training_params":{"epochs":10, "patience": 100, "verbose":0}}, seed*2)
config5 = Config("../sample_data/binary", x_pipeline, None, nicon_classification, {"task":"classification", "training_params":{"epochs":5}, "verbose":0}, seed*2)
config6 = Config("../sample_data/classification", x_pipeline, None, RandomForestClassifier, {"task":"classification"}, seed*2)
config7 = Config("../sample_data/binary", x_pipeline, None, RandomForestClassifier, {"task":"classification"}, seed*2)
## FINETUNING
# regression
config8 = Config("../sample_data/regression", x_pipeline, y_pipeline, nicon, bacon_finetune, seed)
config9 = Config("../sample_data/regression", x_pipeline, y_pipeline, model_sklearn, finetune_pls_experiment, seed)
# classification
config10 = Config("../sample_data/classification", x_pipeline, None, nicon_classification, bacon_finetune_classif, seed*2)
config10b = Config("../sample_data/binary", x_pipeline, None, nicon_classification, bacon_finetune_classif, seed*2)
config11 = Config("../sample_data/classification", x_pipelineb, None, RandomForestClassifier, finetune_randomForestclassifier, seed*2)
config11b = Config("../sample_data/binary", x_pipeline, None, RandomForestClassifier, finetune_randomForestclassifier, seed*2)


configs = [config1, config2, config3, config4, config4b, config5, config6, config7, config8, config9, config10, config10b, config11, config11b]
# configs = [config10b, config11, config11b]
# configs = [config3]
config_names = ["config1", "config2", "config3", "config4", "config4b", "config5", "config6", "config7", "config8", "config9", "config10", "config10b", "config11", "config11b"]
# for i, config in enumerate(configs):
#     print("#" * 20)
#     print(f"Config {i}: {config_names[i]}")
#     print("#" * 20)
#     start = time.time()
#     runner = ExperimentRunner([config], resume_mode="restart")
#     datasets, predictions, scores, best_params = runner.run()
#     end = time.time()
#     print(f"Time elapsed: {end-start} seconds")
    



In [11]:
%load_ext autoreload
%autoreload 2

from nirs4all.utils.serialization import _serialize_component, _deserialize_component
for config, name in zip(configs, config_names):
    # print(" ")
    # print("--" * 20)
    # print("#" * 20)
    # print(config)
    # print("#" * 20)
    serialized_config = config.to_dict()
    # print(serialized_config)
    # print("#" * 20)
    deserialized_config = Config.from_dict(serialized_config)
    # print(deserialized_config)
    print("--" * 40)
    if str(config) != str(deserialized_config):
        print(f"{name}")
        print("Config and deserialized config are not equal")
        print(f"OConfig: {config}")
        print("-" * 20)
        print(f"SConfig: {serialized_config}")
        print("-" * 20)
        print(f"DConfig: {deserialized_config}")
        
    
#     # start = time.time()
#     # runner = ExperimentRunner(deserialized_config, resume_mode="restart")
#     # datasets, predictions, scores, best_params = runner.run()
#     # end = time.time()
#     # print(f"Time elapsed: {end-start} seconds")
# import json
# # json load ../nirs4all/presets/configs/fast_train.json
# json_config = json.load(open("../nirs4all/presets/configs/fast_train.json"))
# print(json_config)
# deserialized_config = Config.from_dict(json_config)
# print(deserialized_config)

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
Type mismatch: <class 'int'> != <class 'str'>
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
--------------------------------------------------------------------------------
config9
Config and deserialized config are not equal
OCo

In [12]:
%load_ext autoreload
%autoreload 2

import inspect
from typing import Any, Union, Callable, Dict, Tuple
from typing import Type
import inspect
import importlib

from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import KFold, RepeatedKFold, StratifiedKFold, RepeatedStratifiedKFold, ShuffleSplit, GroupKFold, StratifiedShuffleSplit, BaseCrossValidator, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler

from nirs4all.utils.serialization import _serialize_component, _deserialize_component


print("="*20) 
class custom_obj:
    def __init__(self, a, b: str, c = (0.0, 0.1)): 
        self.a = a
        self.b = b
        self.c = c
        
    def __repr__(self):
        return f"custom_obj(a={self.a}, b={self.b}, c={self.c})"
    def __str__(self):
        return f"custom_obj(a={self.a}, b={self.b}, c={self.c})"
        
a = custom_obj(1, "test", (0.25,0))
print(a)
print(a.c)
ser = _serialize_component(a)
print(ser)
deser = _deserialize_component(ser)
print(deser)


print("="*20)
def test_func(a: int, b: str, c = (0.0, 0.1)):
    return a + b + c[0] + c[1]

ser_func = _serialize_component(test_func)
print(test_func)
print(ser_func)
deser_func = _deserialize_component(ser_func)
print(deser_func)


print("="*20)
obj = RepeatedKFold(n_splits=3, n_repeats=1)
ser_obj = _serialize_component(obj)
print("="*20)
print(obj)
print(ser_obj)
deser_obj = _deserialize_component(ser_obj)
print(deser_obj)


print("="*20)
class Foo:
    def __init__(self, coords: Tuple[int, int]):
        self.coords = coords
        
    def __repr__(self):
        return f"Foo(coords={self.coords})"
    
    def __str__(self):
        return f"Foo(coords={self.coords})"

f = Foo((1, 2))
print(f)
blob = _serialize_component(f)
print(blob)
g = _deserialize_component(blob)
print(g)



The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
custom_obj(a=1, b=test, c=(0.25, 0))
(0.25, 0)
{'instance': '__main__.custom_obj', 'params': {'a': 1, 'b': 'test', 'c': [0.25, 0]}}
custom_obj(a=1, b=test, c=(0.25, 0))
<function test_func at 0x0000029E4EA43D00>
{'function': '__main__.test_func'}
<function test_func at 0x0000029E4EA43D00>
RepeatedKFold(n_repeats=1, n_splits=3, random_state=None)
{'instance': 'sklearn.model_selection._split.RepeatedKFold', 'params': {'n_splits': 3, 'n_repeats': 1}}
RepeatedKFold(n_repeats=1, n_splits=3, random_state=None)
Foo(coords=(1, 2))
{'instance': '__main__.Foo', 'params': {'coords': [1, 2]}}
Foo(coords=(1, 2))


In [13]:
%load_ext autoreload
%autoreload 2

from nirs4all.core.config import Config
from nirs4all.core.runner import ExperimentRunner

config = Config.from_json_file("../nirs4all/presets/configs/fast_train.json")
config.dataset = "../sample_data/regression"
print(config)
runner = ExperimentRunner(config, resume_mode="restart")
datasets, predictions, scores, best_params = runner.run()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
Config(dataset='../sample_data/regression', x_pipeline=['sklearn.preprocessing.RobustScaler', {'samples': ['nirs4all.transformations.Rotate_Translate', {'class': <class 'nirs4all.transformations._random_augmentation.Rotate_Translate'>, 'params': {'p_range': 3, 'y_factor': 5}}]}, {'split': {'class': <class 'sklearn.model_selection._split.RepeatedKFold'>, 'params': {'n_splits': 3, 'n_repeats': 1}}}, {'features': [None, {'class': <class 'nirs4all.transformations._standard.Gaussian'>, 'params': {'order': 2, 'sigma': 2}}, 'nirs4all.transformations.SavitzkyGolay', 'nirs4all.transformations.StandardNormalVariate', 'nirs4all.transformations.Derivate', 'nirs4all.transformations.Haar']}, 'sklearn.preprocessing.MinMaxScaler'], y_pipeline='sklearn.preprocessing.MinMaxScaler', model=<class 'sklearn.cross_decomposition._pls.PLSRegression'>, experiment={'action': 'train'}, seed=None)


2025-05-16 16:23:41,735 - INFO - Running config: Config(dataset='../sample_data/regression', x_pipeline=['sklearn.preprocessing.RobustScaler', {'samples': ['nirs4all.transformations.Rotate_Translate', {'class': <class 'nirs4all.transformations._random_augmentation.Rotate_Translate'>, 'params': {'p_range': 3, 'y_factor': 5}}]}, {'split': {'class': <class 'sklearn.model_selection._split.RepeatedKFold'>, 'params': {'n_splits': 3, 'n_repeats': 1}}}, {'features': [None, {'class': <class 'nirs4all.transformations._standard.Gaussian'>, 'params': {'order': 2, 'sigma': 2}}, 'nirs4all.transformations.SavitzkyGolay', 'nirs4all.transformations.StandardNormalVariate', 'nirs4all.transformations.Derivate', 'nirs4all.transformations.Haar']}, 'sklearn.preprocessing.MinMaxScaler'], y_pipeline='sklearn.preprocessing.MinMaxScaler', model=<class 'sklearn.cross_decomposition._pls.PLSRegression'>, experiment={'action': 'train'}, seed=None)
2025-05-16 16:23:41,736 - INFO - ### LOADING DATASET ###


>> Browsing ../sample_data/regression
No train_group file found for ../sample_data/regression.
No test_group file found for ../sample_data/regression.


2025-05-16 16:23:42,514 - INFO - Dataset(x_train:(130, 2151) - y_train:(130, 1), x_test:(59, 2151) - y_test:(59, 1))
2025-05-16 16:23:42,515 - INFO - ### PROCESSING DATASET ###


ValueError: Class <class 'nirs4all.transformations._random_augmentation.Rotate_Translate'> not found: type object 'Rotate_Translate' has no attribute 'split'

In [None]:
x_pipeline_full = [
    RobustScaler(quantile_range=(0.05, 0.95)),
    {"samples": [None, None, None, None, SXS(), RXO()]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(3,1), SG(), SNV(), Dv(), [GS(), SNV()], [GS(), GS()],[GS(), SG()],[SG(), SNV()], [GS(), Dv()], [SG(), Dv()]]},
    MinMaxScaler(feature_range=(0, 0.9), clip=False)
]

print(x_pipeline_full)
# save x_pipeline_full to pickle file
import pickle
with open("x_pipeline_full.pkl", "wb") as f:
    pickle.dump(x_pipeline_full, f)
# load x_pipeline_full from pickle file
with open("x_pipeline_full.pkl", "rb") as f:
    x_pipeline_full = pickle.load(f)
print(x_pipeline_full)

In [None]:
from hypothesis_jsonschema import from_schema
import json
from nirs4all.data.schema import schema

# Créez la stratégie une seule fois
strategy = from_schema(schema)

# Puis générez des exemples « concrets »
for _ in range(10):
    sample = strategy.example()
    print(json.dumps(sample, ensure_ascii=False, indent=2))
    print("=" * 20)

{
  "data": {
    "train": {
      "X": {
        "path": "/STnh6Foy.npy"
      },
      "Y": {
        "to": 51
      }
    }
  }
}
{
  "data": "/2_mUp/B4/"
}
{
  "data": {
    "train": {
      "X": "/0.csv",
      "Y": "/0.csv"
    }
  }
}
{
  "data": {
    "train": {
      "X": "/4g-e9MNQLYT1kObN.csv.gz",
      "Y": {
        "from": 160
      }
    }
  }
}
{
  "data": {
    "train": {
      "X": "/Id9-R/o.csv.gz",
      "Y": {
        "path": "/yMm/gqyT/hKdpY/N3/KRE/F.mY0/21unRqqI.csv"
      }
    }
  }
}
{
  "data": {
    "path": "/JQ/MFW/"
  }
}
{
  "data": {
    "path": "/0/"
  }
}
{
  "data": {
    "path": "/P08nm/"
  }
}
{
  "data": "/-8b/iSUpby/n/F/wWx3C/ft5j/lH2ay/"
}
{
  "data": "/jgY/HoUgaM/HwgqhpHC0-XfkLZbZ0aD_/t.7-u/rc/ss9kO5vcWHVaS.mWT/3D99/"
}
