In [20]:
%reload_ext autoreload
%autoreload 2

import sys
import os
sys.path.append(os.path.abspath(os.path.join(os.path.dirname('__file__'), '..')))

import time
from nirs4all.presets.ref_models import decon, nicon, customizable_nicon, nicon_classification
from nirs4all.presets.preprocessings import decon_set, nicon_set
from nirs4all.data_splitters import KennardStoneSplitter
from nirs4all.transformations import StandardNormalVariate as SNV, SavitzkyGolay as SG, Gaussian as GS, Derivate as  Dv
from nirs4all.transformations import Rotate_Translate as RT, Spline_X_Simplification as SXS, Random_X_Operation as RXO
from nirs4all.transformations import CropTransformer
from nirs4all.core.runner import ExperimentRunner
from nirs4all.core.config import Config

from sklearn.ensemble import RandomForestClassifier
from sklearn.cross_decomposition import PLSRegression
from sklearn.model_selection import KFold, RepeatedKFold, StratifiedKFold, RepeatedStratifiedKFold, ShuffleSplit, GroupKFold, StratifiedShuffleSplit, BaseCrossValidator, TimeSeriesSplit
from sklearn.preprocessing import StandardScaler, MinMaxScaler, RobustScaler
import warnings
from sklearn.exceptions import ConvergenceWarning

warnings.filterwarnings("ignore", category=ConvergenceWarning)
warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", category=FutureWarning)


model_sklearn = {
    "class": "sklearn.cross_decomposition.PLSRegression",
    "model_params": {
        "n_components": 21,
    }
}
    
finetune_pls_experiment = {
    "action": "finetune",
    "finetune_params": {
        'model_params': {
            'n_components': ('int', 5, 20),
        },
        'training_params': {},
        'tuner': 'sklearn'
    }
}

bacon_train = {"action": "train", "training_params": {"epochs": 2000, "batch_size": 500, "patience": 200, "cyclic_lr": True, "base_lr": 1e-6, "max_lr": 1e-3, "step_size": 400}}
bacon_train_short = {"action": "train", "training_params": {"epochs": 10, "batch_size": 500, "patience": 20, "cyclic_lr": True, "base_lr": 1e-6, "max_lr": 1e-3, "step_size": 40}}
bacon_finetune = {
    "action": "finetune",
    "finetune_params": {
        "n_trials": 5,
        "model_params": {
            "filters_1": [8, 16, 32, 64], 
            "filters_2": [8, 16, 32, 64], 
            "filters_3": [8, 16, 32, 64]
        }
    },
    "training_params": {
        "epochs": 10,
        "verbose":0
    }
}

full_bacon_finetune = {
    "action": "finetune",
    "training_params": {
        "epochs": 500,
        "patience": 100,
    },
    "finetune_params": {
        "nb_trials": 150,
        "model_params": {
            'spatial_dropout': (float, 0.01, 0.5),
            'filters1': [4, 8, 16, 32, 64, 128, 256],
            'kernel_size1': [3, 5, 7, 9, 11, 13, 15],
            # 'strides1': [1, 2, 3, 4, 5],
            # 'activation1': ['relu', 'selu', 'elu', 'swish'],
            'dropout_rate': (float, 0.01, 0.5),
            'filters2': [4, 8, 16, 32, 64, 128, 256],
            # 'kernel_size2': [3, 5, 7, 9, 11, 13, 15],
            # 'strides2': [1, 2, 3, 4, 5],
            'activation2': ['relu', 'selu', 'elu', 'swish'],
            'normalization_method1': ['BatchNormalization', 'LayerNormalization'],
            'filters3': [4, 8, 16, 32, 64, 128, 256],
            # 'kernel_size3': [3, 5, 7, 9, 11, 13, 15],
            # 'strides3': [1, 2, 3, 4, 5],
            'activation3': ['relu', 'selu', 'elu', 'swish'],
            # 'normalization_method2': ['BatchNormalization', 'LayerNormalization'],
            # 'dense_units': [4, 8, 16, 32, 64, 128, 256],
            'dense_activation': ['relu', 'selu', 'elu', 'swish'],
        },
        # "training_params": {
        #     "batch_size": [32, 64, 128, 256, 512],
        #     "cyclic_lr": [True, False],
        #     "base_lr": (float, 1e-6, 1e-2),
        #     "max_lr": (float, 1e-3, 1e-1),
        #     "step_size": (int, 500, 5000),
        # },
    }
}


x_pipeline_full = [
    RobustScaler(),
    {"samples": [None, None, None, None, SXS, RXO]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]


bacon_finetune_classif = {
    "action": "finetune",
    "task": "classification",
    "finetune_params": {
        "n_trials": 5,
        "model_params": {
            "filters_1": [8, 16, 32, 64], 
            "filters_2": [8, 16, 32, 64], 
            "filters_3": [8, 16, 32, 64]
        }
    },
    "training_params": {
        "epochs": 5,
        "verbose":0
    }
}

finetune_randomForestclassifier = {
    "action": "finetune",
    "task": "classification",
    "finetune_params": {
        'model_params': {
            'n_estimators': ('int', 5, 20),
        },
        'training_params': {},
        'tuner': 'sklearn'
    }
}

x_pipeline_PLS = [
    RobustScaler(),
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)},
    {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]
            
            
x_pipeline = [
    RobustScaler(), 
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)}, 
    # bacon_set(),
    MinMaxScaler()
]

x_pipelineb = [
    RobustScaler(), 
    {"samples": [RT(6)], "balance": True},
    # {"samples": [None, RT]},
    {"split": RepeatedKFold(n_splits=3, n_repeats=1)}, 
    # {"features": [None, GS(2,1), SG, SNV, Dv, [GS, SNV], [GS, GS],[GS, SG],[SG, SNV], [GS, Dv], [SG, Dv]]},
    MinMaxScaler()
]


y_pipeline = MinMaxScaler()

seed = 123459456

# processing only
config1 = Config("../sample_data/regression", x_pipeline_full, y_pipeline, None, None, seed)
## TRAINING
# regression
config2 = Config("../sample_data/regression", x_pipeline, y_pipeline, nicon, bacon_train_short, seed)
config3 = Config("../sample_data/regression", x_pipeline_PLS, y_pipeline, PLSRegression(n_components=10), None, seed)
# classification
config4 = Config("../sample_data/classification", x_pipeline, None, nicon_classification, {"task":"classification", "training_params":{"epochs":10, "patience": 100, "verbose":0}}, seed*2)
config4b = Config("../sample_data/binary", x_pipelineb, None, nicon_classification, {"task":"classification", "training_params":{"epochs":10, "patience": 100, "verbose":0}}, seed*2)
config5 = Config("../sample_data/binary", x_pipeline, None, nicon_classification, {"task":"classification", "training_params":{"epochs":5}, "verbose":0}, seed*2)
config6 = Config("../sample_data/classification", x_pipeline, None, RandomForestClassifier, {"task":"classification"}, seed*2)
config7 = Config("../sample_data/binary", x_pipeline, None, RandomForestClassifier, {"task":"classification"}, seed*2)
## FINETUNING
# regression
config8 = Config("../sample_data/regression", x_pipeline, y_pipeline, nicon, bacon_finetune, seed)
config9 = Config("../sample_data/regression", x_pipeline, y_pipeline, model_sklearn, finetune_pls_experiment, seed)
# classification
config10 = Config("../sample_data/classification", x_pipeline, None, nicon_classification, bacon_finetune_classif, seed*2)
config10b = Config("../sample_data/binary", x_pipeline, None, nicon_classification, bacon_finetune_classif, seed*2)
config11 = Config("../sample_data/classification", x_pipelineb, None, RandomForestClassifier, finetune_randomForestclassifier, seed*2)
config11b = Config("../sample_data/binary", x_pipeline, None, RandomForestClassifier, finetune_randomForestclassifier, seed*2)


# configs = [config1, config2, config3, config4, config4b, config5, config6, config7, config8, config9, config10, config10b, config11, config11b]
# configs = [config10b, config11, config11b]
# configs = [config3]
# config_names = ["config1", "config2", "config3", "config4", "config4b", "config5", "config6", "config7", "config8", "config9", "config10", "config10b", "config11", "config11b"]
# for i, config in enumerate(configs):
#     print("#" * 20)
#     print(f"Config {i}: {config_names[i]}")
#     print("#" * 20)
#     start = time.time()
#     runner = ExperimentRunner([config], resume_mode="restart")
#     datasets, predictions, scores, best_params = runner.run()
#     end = time.time()
#     print(f"Time elapsed: {end-start} seconds")
    



In [18]:
%load_ext autoreload
%autoreload 2
print("#" * 20)
print(config4)
config_dict = config4.to_dict()
print("#" * 20)
print(config_dict)
new_config10 = Config.from_dict(config_dict)
print("#" * 20)
print(new_config10)
# runner = ExperimentRunner([new_config10], resume_mode="restart")
# datasets, predictions, scores, best_params = runner.run()

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
####################
Config(dataset='../sample_data/classification', x_pipeline=[RobustScaler(), {'split': RepeatedKFold(n_repeats=1, n_splits=3, random_state=None)}, MinMaxScaler()], y_pipeline=None, model=<function nicon_classification at 0x00000226D9EEBD00>, experiment={'task': 'classification', 'training_params': {'epochs': 10, 'patience': 100, 'verbose': 0}}, seed=246918912)
RobustScaler()
obj.__dict__: {'with_centering': True, 'with_scaling': True, 'quantile_range': (25.0, 75.0), 'unit_variance': False, 'copy': True}
> get_params: {'copy': True, 'quantile_range': (25.0, 75.0), 'unit_variance': False, 'with_centering': True, 'with_scaling': True}
> inspect init: (self, *, with_centering=True, with_scaling=True, quantile_range=(25.0, 75.0), copy=True, unit_variance=False)
> inspect dict: {'with_centering': True, 'with_scaling': True, 'quantile_range': (25.0, 75.0), 'unit_variance': False, 'copy'

In [None]:
import inspect

def changed_kwargs(obj):
    """Return {param: value} for every __init__ param whose current
    value differs from its default."""
    sig = inspect.signature(obj.__class__.__init__)
    out = {}

    for name, param in sig.parameters.items():
        if name == "self":
            continue

        # default may be `inspect._empty`
        default = param.default if param.default is not inspect._empty else None

        # getattr() sees properties as well as real attributes
        try:
            current = getattr(obj, name)
        except AttributeError:
            # fall back to what's in cvargs if it exists
            current = obj.__dict__.get("cvargs", {}).get(name, default)

        if current != default:
            current_type = type(current)
            out[name] = (current, current_type)
    return out

from typing import Any, get_type_hints
import inspect

def resolve_type(obj_or_cls: Any, name: str) -> type | Any | None:
    cls = obj_or_cls if inspect.isclass(obj_or_cls) else obj_or_cls.__class__
    sig = inspect.signature(obj_or_cls.__class__.__init__)
    if name in sig.parameters:
        print(">>", name)
        if sig.parameters[name].default is inspect._empty:
            if sig.parameters[name].annotation is not inspect._empty:
                return sig.parameters[name].annotation
            else:
                if hasattr(obj_or_cls, name):
                    return type(getattr(obj_or_cls, name))
                else:
                    return None
        else:
            return type(sig.parameters[name].default)
    
    class_hints = get_type_hints(cls, include_extras=True)
    if name in class_hints:
        return class_hints[name]
    
    init_hints = get_type_hints(cls.__init__, include_extras=True)
    init_hints.pop('return', None)
    if name in init_hints:
        return init_hints[name]

    if not inspect.isclass(obj_or_cls) and hasattr(obj_or_cls, name):
        return type(getattr(obj_or_cls, name))

    return None

class custom_obj:
    def __init__(self, a, b: str, c = 0.0):
        self.a = a
        self.b = b
        self.c = c
        
obj = custom_obj(1, "test", 3.14)
print(changed_kwargs(obj))
print(resolve_type(obj, "a"))
print(resolve_type(obj, "b"))
print(resolve_type(obj, "c"))

obj = RepeatedKFold(n_splits=3, n_repeats=1)
print(changed_kwargs(obj))
print(resolve_type(obj, "n_splits"))
print(resolve_type(obj, "n_repeats"))
obj = StandardScaler()
print(changed_kwargs(obj))
print(resolve_type(obj, "with_mean"))
print(resolve_type(obj, "with_std"))
obj = MinMaxScaler(feature_range=(0, 0.9))
print(changed_kwargs(obj))
print(resolve_type(obj, "feature_range"))


# print(obj)  # RepeatedKFold(n_repeats=1, n_splits=3, random_state=None)
# sig = inspect.signature(obj.__class__.__init__)
# print(sig)  # (self, *, n_splits=5, n_repeats=10, random_state=None)

{'a': (1, <class 'int'>), 'b': ('test', <class 'str'>), 'c': (3.14, <class 'float'>)}
>> a
<class 'int'>
>> b
<class 'str'>
>> c
<class 'float'>
{'n_splits': (3, <class 'int'>), 'n_repeats': (1, <class 'int'>)}
>> n_splits
<class 'int'>
>> n_repeats
<class 'int'>
{}
>> with_mean
<class 'bool'>
>> with_std
<class 'bool'>
{'feature_range': ((0, 0.9), <class 'tuple'>)}
>> feature_range
<class 'tuple'>


In [None]:
%load_ext autoreload
%autoreload 2

from nirs4all.core.config import Config
from nirs4all.core.runner import ExperimentRunner

config = Config.from_json_file("../nirs4all/presets/configs/fast_train.json")
config.dataset = "../sample_data/regression"
print(config)
runner = ExperimentRunner(config, resume_mode="restart")
datasets, predictions, scores, best_params = runner.run()