In [14]:
# Test TensorFlow train_params specifically
%reload_ext autoreload

from nirs4all.dataset.loader import get_dataset
from nirs4all.pipeline.runner import PipelineRunner
from nirs4all.pipeline.config import PipelineConfig
from sample import dataset_config
from sklearn.preprocessing import MinMaxScaler, StandardScaler
from nirs4all.operators.models.cirad_tf import nicon

print("🧪 Testing TensorFlow controller with train_params...")

# Test with your exact configuration
test_data = get_dataset(dataset_config)
tf_config = {
    "pipeline": [
        MinMaxScaler(feature_range=(0.1, 0.8)),
        {"y_processing": StandardScaler()},
        {
            "model": nicon,
            "train_params": {
                "epochs": 10,
                "patience": 5,
                "learning_rate": 0.001,
                "optimizer": "adam",
                "loss": "mse",
                "metrics": ["mae", "mse"],
                "early_stopping": True,
                # "reduce_lr_on_plateau": True,
                # "reduce_lr_factor": 0.5,
                # "reduce_lr_patience": 3,
                "best_model_memory": True,
                "cyclic_lr": True,
                "step_size": 5,
                "batch_size": 8,
                "verbose": 0
            },
        }
    ]
}

config = PipelineConfig(tf_config, "tf_train_params_test")
runner = PipelineRunner()

try:
    result = runner.run(config, test_data)
    print("\n✅ SUCCESS: TensorFlow train_params are now working!")
    print("    - epochs: 500 should be used instead of default 100")
    print("    - patience: 100 should be used instead of default 10")
except Exception as e:
    print(f"\n❌ Failed: {e}")
    import traceback
    traceback.print_exc()

🧪 Testing TensorFlow controller with train_params...
[94mLoading dataset:[0m
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
⚠️ Dataset does not have data for train_group.
⚠️ Dataset does not have data for test_group.
[97m📊 Dataset: sample_data
Features (samples=189, sources=1):
- Source 0: (189, 1, 2151), processings=['raw'], min=-0.265, max=1.436, mean=0.466, var=0.149)
Targets: (samples=189, targets=1, processings=['numeric'])
- numeric: min=1.33, max=128.31, mean=30.779
Indexes:
- "train", ['raw']: 130 samples
- "test", ['raw']: 59 samples[0m
[94m🚀 Starting pipeline config_tf_train_params_test_8bdc1c on dataset sample_data[0m
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
[

In [15]:
# Reset autoreload completely
try:
    %reload_ext autoreload
    %autoreload 0  # Disable autoreload
    %autoreload 2  # Re-enable with full reload
except:
    %load_ext autoreload
    %autoreload 2

from nirs4all.pipeline.runner import PipelineRunner
from nirs4all.pipeline.config import PipelineConfig
from nirs4all.dataset import dataset
from sample import dataset_config, pipeline_config
from nirs4all.dataset.loader import get_dataset
from nirs4all.controllers.registry import reset_registry
from nirs4all.controllers import *

data = get_dataset(dataset_config)
config = PipelineConfig(pipeline_config, "demo_pipeline")

runner = PipelineRunner()
res_dataset, history, pipeline = runner.run(config, data)


# json_config = PipelineConfig("sample.json")
# yaml_config = PipelineConfig("sample.yaml")

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
[94mLoading dataset:[0m
--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------
⚠️ Dataset does not have data for train_group.
⚠️ Dataset does not have data for test_group.
[97m📊 Dataset: sample_data
Features (samples=189, sources=1):
- Source 0: (189, 1, 2151), processings=['raw'], min=-0.265, max=1.436, mean=0.466, var=0.149)
Targets: (samples=189, targets=1, processings=['numeric'])
- numeric: min=1.33, max=128.31, mean=30.779
Indexes:
- "train", ['raw']: 130 samples
- "test", ['raw']: 59 samples[0m
[94m🚀 Starting pipeline config_demo_pipeline_9a390a on dataset sample_data[0m
-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------

In [19]:
# Direct test of nested configuration structure (just test the config parsing)
print("🧪 Testing nested configuration parsing...")

from sklearn.ensemble import RandomForestRegressor

# Test configuration structure compatibility without running full pipeline
# Just verify that the configuration parsing works correctly

# Test nested structure config
nested_config = {
    "model": RandomForestRegressor(random_state=42),
    "train_params": {
        # Final training parameters
        "oob_score": True,
        "n_jobs": -1
    },
    "finetune_params": {
        "n_trials": 4,
        "approach": "grid",
        "model_params": {
            # Parameters to optimize
            "n_estimators": [10, 30],
            "max_depth": [3, 5]
        },
        "train_params": {
            # Training parameters during trials
            "n_jobs": 1
        }
    }
}

# Test the extraction logic directly
print("🔍 Testing model config extraction...")
from nirs4all.controllers.sklearn.op_model import SklearnModelController

controller = SklearnModelController()
extracted_config = controller._extract_model_config(nested_config)

print(f"✓ Extracted model config: {extracted_config}")
print(f"✓ Found train_params: {extracted_config.get('train_params', {})}")
print(f"✓ Found finetune_params: {extracted_config.get('finetune_params', {})}")

if 'finetune_params' in extracted_config:
    finetune = extracted_config['finetune_params']
    print(f"✓ Found model_params in finetune_params: {finetune.get('model_params', {})}")
    print(f"✓ Found train_params in finetune_params: {finetune.get('train_params', {})}")

# Test hyperparameter sampling with nested structure
print("\n🔍 Testing hyperparameter sampling...")
from unittest.mock import Mock

mock_trial = Mock()
mock_trial.suggest_categorical = Mock(side_effect=lambda name, choices: choices[0])

finetune_params = nested_config['finetune_params']
sampled_params = controller._sample_hyperparameters(mock_trial, finetune_params)

print(f"✓ Sampled parameters: {sampled_params}")
expected_params = ['n_estimators', 'max_depth']
for param in expected_params:
    if param in sampled_params:
        print(f"  ✓ {param}: {sampled_params[param]}")
    else:
        print(f"  ❌ Missing {param}")

print("\n✅ Configuration structure parsing test completed successfully!")
print("The new nested structure (model_params within finetune_params) is working!")

🧪 Testing nested configuration parsing...
🔍 Testing model config extraction...
✓ Extracted model config: {'model': RandomForestRegressor(random_state=42), 'train_params': {'oob_score': True, 'n_jobs': -1}, 'finetune_params': {'n_trials': 4, 'approach': 'grid', 'model_params': {'n_estimators': [10, 30], 'max_depth': [3, 5]}, 'train_params': {'n_jobs': 1}}, 'model_instance': RandomForestRegressor(random_state=42)}
✓ Found train_params: {'oob_score': True, 'n_jobs': -1}
✓ Found finetune_params: {'n_trials': 4, 'approach': 'grid', 'model_params': {'n_estimators': [10, 30], 'max_depth': [3, 5]}, 'train_params': {'n_jobs': 1}}
✓ Found model_params in finetune_params: {'n_estimators': [10, 30], 'max_depth': [3, 5]}
✓ Found train_params in finetune_params: {'n_jobs': 1}

🔍 Testing hyperparameter sampling...
✓ Sampled parameters: {'n_estimators': 10, 'max_depth': 3}
  ✓ n_estimators: 10
  ✓ max_depth: 3

✅ Configuration structure parsing test completed successfully!
The new nested structure (mo

In [20]:
# Test backward compatibility with old flat structure
print("🔄 Testing backward compatibility with old flat structure...")

from sklearn.ensemble import RandomForestRegressor

# Test old flat structure (parameters directly in finetune_params)
old_flat_config = {
    "model": RandomForestRegressor(random_state=42),
    "train_params": {
        "oob_score": True,
        "n_jobs": -1
    },
    "finetune_params": {
        # Old style - parameters directly in finetune_params
        "n_trials": 4,
        "approach": "grid",
        "n_estimators": [10, 30],  # Direct parameter
        "max_depth": [3, 5]        # Direct parameter
    }
}

controller = SklearnModelController()
extracted_config = controller._extract_model_config(old_flat_config)

print(f"✓ Extracted old format config: {extracted_config}")

# Test hyperparameter sampling with old flat structure
mock_trial = Mock()
mock_trial.suggest_categorical = Mock(side_effect=lambda name, choices: choices[0])

finetune_params = old_flat_config['finetune_params']
sampled_params = controller._sample_hyperparameters(mock_trial, finetune_params)

print(f"✓ Sampled parameters from old format: {sampled_params}")
expected_params = ['n_estimators', 'max_depth']
for param in expected_params:
    if param in sampled_params:
        print(f"  ✓ {param}: {sampled_params[param]}")
    else:
        print(f"  ❌ Missing {param}")

print("\n✅ Backward compatibility test passed!")
print("Both old flat structure and new nested structure work correctly!")

🔄 Testing backward compatibility with old flat structure...
✓ Extracted old format config: {'model': RandomForestRegressor(random_state=42), 'train_params': {'oob_score': True, 'n_jobs': -1}, 'finetune_params': {'n_trials': 4, 'approach': 'grid', 'n_estimators': [10, 30], 'max_depth': [3, 5]}, 'model_instance': RandomForestRegressor(random_state=42)}
✓ Sampled parameters from old format: {'n_estimators': 10, 'max_depth': 3}
  ✓ n_estimators: 10
  ✓ max_depth: 3

✅ Backward compatibility test passed!
Both old flat structure and new nested structure work correctly!
