In [22]:
import importlib
import nirs4all.pipeline.generator as gen_module
importlib.reload(gen_module)
from nirs4all.pipeline.generator import expand_spec, count_combinations

In [7]:
pipeline_config = [
        {"_or_": [None, "A", "B"]},  # scale the features
        [
            {"_or_": ["a", "b"]},
            None,
            [{"_or_": ["1", "2"]}, {"_or_": ["x", "y"]}],
        ]
    ]

results = expand_spec(pipeline_config)
print(f"Number of combinations (24): {len(results)}")
# for i, cfg in enumerate(results):
    # print(f"  {i+1}: {cfg}")

Number of combinations (24): 24


In [8]:
pipeline_config_with_size = [
        {"_or_": [None, "A", "B", "C", "D"], "size": 4},  # scale the features
        [
            {"_or_": ["a", "b"]},
            None,
            [{"_or_": ["1", "2"]}, {"_or_": ["x", "y"]}],
        ]
    ]

results_with_size = expand_spec(pipeline_config_with_size)
# print(f"Expected: C(5,3) = 10 c(5,4) = 5")
print(f"Number of combinations (40): {len(results_with_size)}")
# for i, cfg in enumerate(results_with_size):
    # print(f"  {i+1}: {cfg}")

Number of combinations (40): 40


In [9]:
pipeline_config_with_tuple_size = [
        {"_or_": [None, "A", "B", "C", "D"], "size": (4, 5)},  # scale the features
        [
            {"_or_": ["a", "b"]},
            None,
            [{"_or_": ["1", "2"]}, {"_or_": ["x", "y"]}],
        ]
    ]

results_with_tuple_size = expand_spec(pipeline_config_with_tuple_size)
# print(f"Expected: C(5,3) + C(5,4) + C(5,5) = 10 + 5 + 1 = 16 combinations (before multiplying by other expansions)")
print(f"Number of combinations (48): {len(results_with_tuple_size)}")
# for i, cfg in enumerate(results_with_tuple_size):
    # print(f"  {i+1}: {cfg}")

Number of combinations (48): 48


In [10]:
pipeline_config_with_tuple_size = [
        {"_or_": [None, "A", "B", "C", {"_or_": ["a", "b"]}], "size": 4},  # scale the features
    ]
results_with_tuple_size = expand_spec(pipeline_config_with_tuple_size)
print(f"Number of combinations: {len(results_with_tuple_size)}")
# for i, cfg in enumerate(results_with_tuple_size):
    # print(f"  {i+1}: {cfg}")

pipeline_config_with_tuple_size = [
        {"_or_": [None, "A", "B", "C", {"_or_": ["a", "b"]}], "size": (3, 4)},  # scale the features
    ]
results_with_tuple_size = expand_spec(pipeline_config_with_tuple_size)
print(f"Number of combinations: {len(results_with_tuple_size)}")
# for i, cfg in enumerate(results_with_tuple_size):
    # print(f"  {i+1}: {cfg}")

Number of combinations: 9
Number of combinations: 25


In [11]:
p = [{"_or_": ["A", "B", "C"], "size": (1, 3)}]
r = expand_spec(p)
for item in r:
    print(item)

p = [{"_or_": ["A", "B", "C"]}]
r = expand_spec(p)
for item in r:
    print(item)

['A']
['B']
['C']
['A', 'B']
['A', 'C']
['B', 'C']
['A', 'B', 'C']
['A']
['B']
['C']


In [12]:
# Second-order combinations: [outer, inner] where inner uses permutations
config = [{"_or_": ["A", "B", "C"], "size": [2, 2]}]  # outer=2, inner=2
results = expand_spec(config)

print(f"Second-order [2, 2] results: {len(results)}")
for i, result in enumerate(results):
    print(f"  {i+1}: {result}")
# Expected: Inner permutations like ['A','B'] and ['B','A'] are different

Second-order [2, 2] results: 15
  1: [['A', 'B'], ['A', 'C']]
  2: [['A', 'B'], ['B', 'A']]
  3: [['A', 'B'], ['B', 'C']]
  4: [['A', 'B'], ['C', 'A']]
  5: [['A', 'B'], ['C', 'B']]
  6: [['A', 'C'], ['B', 'A']]
  7: [['A', 'C'], ['B', 'C']]
  8: [['A', 'C'], ['C', 'A']]
  9: [['A', 'C'], ['C', 'B']]
  10: [['B', 'A'], ['B', 'C']]
  11: [['B', 'A'], ['C', 'A']]
  12: [['B', 'A'], ['C', 'B']]
  13: [['B', 'C'], ['C', 'A']]
  14: [['B', 'C'], ['C', 'B']]
  15: [['C', 'A'], ['C', 'B']]


In [13]:
# Compare first-order vs second-order
choices = ["A", "B"]

# First-order: combinations (order doesn't matter)
config1 = [{"_or_": choices, "size": 2}]
results1 = expand_spec(config1)
print("First-order size=2:")
for i, result in enumerate(results1):
    print(f"  {i+1}: {result}")

# Second-order: permutations within inner (order matters)
config2 = [{"_or_": choices, "size": [1, 2]}]  # outer=1, inner=2
results2 = expand_spec(config2)
print(f"\nSecond-order [1, 2]:")
for i, result in enumerate(results2):
    print(f"  {i+1}: {result}")
# Expected: Both ['A','B'] and ['B','A'] appear

First-order size=2:
  1: ['A', 'B']

Second-order [1, 2]:
  1: [['A', 'B']]
  2: [['B', 'A']]


In [14]:
# Test inner permutations: [A, [B,C]] != [A, [C,B]]
config = [{"_or_": ["A", "B", "C"], "size": [(1,2), 2]}]
results = expand_spec(config)

print("Testing inner permutations:")
for i, result in enumerate(results):
    print(f"  {i+1}: {result}")
# Expected: Different orderings within sub-arrays

Testing inner permutations:
  1: [['A', 'B']]
  2: [['A', 'C']]
  3: [['B', 'A']]
  4: [['B', 'C']]
  5: [['C', 'A']]
  6: [['C', 'B']]
  7: [['A', 'B'], ['A', 'C']]
  8: [['A', 'B'], ['B', 'A']]
  9: [['A', 'B'], ['B', 'C']]
  10: [['A', 'B'], ['C', 'A']]
  11: [['A', 'B'], ['C', 'B']]
  12: [['A', 'C'], ['B', 'A']]
  13: [['A', 'C'], ['B', 'C']]
  14: [['A', 'C'], ['C', 'A']]
  15: [['A', 'C'], ['C', 'B']]
  16: [['B', 'A'], ['B', 'C']]
  17: [['B', 'A'], ['C', 'A']]
  18: [['B', 'A'], ['C', 'B']]
  19: [['B', 'C'], ['C', 'A']]
  20: [['B', 'C'], ['C', 'B']]
  21: [['C', 'A'], ['C', 'B']]


In [15]:
# Summary of all features
examples = [
    ("Basic or", [{"_or_": ["A", "B", "C"]}]),
    ("Size", [{"_or_": ["A", "B", "C", "D"], "size": 2}]),
    ("Size range", [{"_or_": ["A", "B", "C", "D"], "size": (2, 3)}]),
    ("Second-order", [{"_or_": ["A", "B", "C"], "size": [2, 2]}]),
    ("With count", [{"_or_": ["A", "B", "C", "D"], "size": (2, 3), "count": 5}]),
]

for name, config in examples:
    result = expand_spec(config)
    print(f"{name:12}: {len(result):4} combinations")

Basic or    :    3 combinations
Size        :    6 combinations
Size range  :   10 combinations
Second-order:   15 combinations
With count  :    5 combinations


In [16]:
# Count feature: random sampling
config = [{"_or_": ["A", "B", "C", "D"], "size": (2, 3), "count": 5}]
results = expand_spec(config)

print("Random 5 combinations from size (2,3):")
for i, result in enumerate(results):
    print(f"  {i+1}: {result}")
# Expected: Random 5 from total 16 combinations

Random 5 combinations from size (2,3):
  1: ['A', 'C']
  2: ['A', 'D']
  3: ['A', 'B', 'C']
  4: ['B', 'C']
  5: ['A', 'B']


In [17]:
# Second-order with count
config = [{"_or_": ["A", "B", "C", "D"], "size": [2, 2], "count": 4}]
results = expand_spec(config)

print("Second-order [2, 2] with count=4:")
for i, result in enumerate(results):
    print(f"  {i+1}: {result}")
# Expected: Random 4 from many permutation-based combinations

Second-order [2, 2] with count=4:
  1: [['A', 'B'], ['B', 'C']]
  2: [['B', 'A'], ['C', 'A']]
  3: [['B', 'A'], ['B', 'D']]
  4: [['A', 'C'], ['B', 'D']]


In [18]:
# Second-order with count
config = [{"_or_": ["A", "B", "C", "D"], "size": [3, (1,4)]}]#, "count": 4}]
results = expand_spec(config)

print(len(results), "results")
# print 10 random results
for i, result in enumerate(random.sample(results, min(10, len(results)))):
    print(f"  {i+1}: {result}")

# for i, result in enumerate(results):
    # print(f"  {i+1}: {result}")
# Expected: Random 4 from many permutation-based combinations

41664 results


NameError: name 'random' is not defined

## ✅ FINAL SYNTAX REFERENCE

### Basic Features:
- `{"or": ["A", "B", "C"]}` → All choices
- `{"or": ["A", "B", "C"], "size": 2}` → Combinations of size 2  
- `{"or": ["A", "B", "C"], "size": (1, 3)}` → Combinations of size 1 to 3
- `{"or": ["A", "B", "C"], "count": 5}` → Random 5 choices

### Second-Order (Array Syntax):
- `{"or": ["A", "B", "C"], "size": [outer, inner]}`
- **Inner**: Uses **permutations** (order matters within sub-arrays)  
- **Outer**: Uses **combinations** (order doesn't matter for selection)
- **Key**: `['A', 'B']` ≠ `['B', 'A']` within inner arrays

### Examples:
- `[2, 2]`: Select 2 arrangements of 2 elements each
- `[(1,3), 2]`: Select 1-3 arrangements of exactly 2 elements  
- `[2, (1,3)]`: Select exactly 2 arrangements of 1-3 elements
- `[2, 2, "count": 4]`: Random 4 from all second-order combinations

In [None]:
config = [
    # {"_or_": ["MinmaxScaler", "StandardScaler"]},
    # {"y": {"_or_": ["MinmaxScaler", "RobustScaler"]}},
    {
        "feature":[
            {"_or_": ["detrend", "savgol", "snv", "msc", "haar", "gaussian","derivate","gauss2","acp"], "size": [(1, 7),(1,3)]}
        ]
    }
]

count = count_combinations(config)
print(count)

In [None]:
# Test the counting function vs actual generation
test_configs = [
    ("Basic OR", [{"_or_": ["A", "B", "C"]}]),
    ("Size 2", [{"_or_": ["A", "B", "C", "D"], "size": 2}]),
    ("Size range", [{"_or_": ["A", "B", "C", "D"], "size": (2, 3)}]),
    ("Second-order", [{"_or_": ["A", "B", "C"], "size": [2, 2]}]),
    ("With count", [{"_or_": ["A", "B", "C", "D"], "size": (2, 3), "count": 5}]),
]

print("Configuration Counting vs Actual Generation:")
print("=" * 50)
for name, config in test_configs:
    # Count without generating
    estimated_count = count_combinations(config)

    # Actually generate to verify
    actual_results = expand_spec(config)
    actual_count = len(actual_results)

    match = "✅" if estimated_count == actual_count else "❌"
    print(f"{name:12}: Estimated {estimated_count:4}, Actual {actual_count:4} {match}")

# Test large configuration without actually generating
print(f"\nLarge configuration analysis:")
large_config = [{"_or_": ["A", "B", "C", "D", "E"], "size": [(1, 4), (1, 3)]}]
estimated = count_combinations(large_config)
print(f"Estimated combinations: {estimated:,}")
print(f"(Would be expensive to generate all {estimated:,} combinations!)")

In [None]:
# Quick utility: estimate before generating
def estimate_and_generate(config, max_safe=1000):
    """Estimate count and conditionally generate based on safety threshold."""
    estimated = count_combinations(config)

    print(f"Estimated combinations: {estimated:,}")

    if estimated <= max_safe:
        print("✅ Safe to generate - proceeding...")
        results = expand_spec(config)
        print(f"Generated {len(results)} results")
        return results
    else:
        print(f"⚠️  Large space detected! Consider adding 'count' limit.")
        print(f"   Recommend: add '\"count\": {max_safe}' to sample randomly")
        return None

# Example usage
print("🔍 Smart generation with safety check:")
safe_config = [{"_or_": ["A", "B", "C"], "size": 2}]
results = estimate_and_generate(safe_config)

print(f"\n🚨 Large configuration example:")
large_config = [{"_or_": ["A", "B", "C", "D", "E"], "size": [(1, 4), (1, 3)]}]
estimate_and_generate(large_config, max_safe=100)

# 🆕 Testing _range_ Feature

Testing the new `_range_` keyword for numeric sequences with both array and dictionary syntax.

In [23]:
# Test 1: Basic array syntax [from, to] and [from, to, step]
print("🔧 Test 1: Array syntax")

# Basic range with default step=1
config1 = [{"_range_": [1, 5]}]
results1 = expand_spec(config1)
print(f"Range [1, 5]: {results1}")

# Range with custom step
config2 = [{"_range_": [0, 10, 2]}]
results2 = expand_spec(config2)
print(f"Range [0, 10, 2]: {results2}")

# Negative step (descending)
config3 = [{"_range_": [10, 1, -2]}]
results3 = expand_spec(config3)
print(f"Range [10, 1, -2]: {results3}")

print(f"✅ Array syntax tests completed")

🔧 Test 1: Array syntax
Range [1, 5]: [[1], [2], [3], [4], [5]]
Range [0, 10, 2]: [[0], [2], [4], [6], [8], [10]]
Range [10, 1, -2]: [[10], [8], [6], [4], [2]]
✅ Array syntax tests completed


In [None]:
# Test 2: Dictionary syntax with from/to/step
print("🔧 Test 2: Dictionary syntax")

# Basic range with dictionary syntax
config1 = [{"_range_": {"from": 1, "to": 5}}]
results1 = expand_spec(config1)
print(f"Range dict {{\"from\": 1, \"to\": 5}}: {results1}")

# Range with custom step using dict
config2 = [{"_range_": {"from": 0, "to": 20, "step": 5}}]
results2 = expand_spec(config2)
print(f"Range dict {{\"from\": 0, \"to\": 20, \"step\": 5}}: {results2}")

# Mixed with count sampling
config3 = [{"_range_": {"from": 1, "to": 100, "step": 7}, "count": 5}]
results3 = expand_spec(config3)
print(f"Range with count=5: {results3} (random 5 from 1 to 100 step 7)")

print(f"✅ Dictionary syntax tests completed")

In [24]:
# Test 3: Combinations with other features
print("🔧 Test 3: Combining _range_ with _or_ and complex structures")

# Combine range with OR choices
config1 = [
    {"_or_": ["A", "B"]},
    {"param": {"_range_": [1, 3]}},
    {"_range_": [10, 12]}
]
results1 = expand_spec(config1)
print(f"Complex combination count: {len(results1)}")
print("Sample results:")
for i, result in enumerate(results1[:4]):  # Show first 4
    print(f"  {i+1}: {result}")

# Use counting to verify
estimated_count = count_combinations(config1)
print(f"Estimated: {estimated_count}, Actual: {len(results1)} ({'✅' if estimated_count == len(results1) else '❌'})")

# Range in value position with other constraints
config2 = {
    "model_params": {
        "n_estimators": {"_range_": [10, 50, 10]},
        "max_depth": {"_or_": [3, 5, 7]}
    }
}
results2 = expand_spec(config2)
print(f"\nModel params combinations: {len(results2)}")
for i, result in enumerate(results2):
    print(f"  {i+1}: {result}")

print(f"✅ Complex combination tests completed")

🔧 Test 3: Combining _range_ with _or_ and complex structures
Complex combination count: 18
Sample results:
  1: ['A', {'param': 1}, 10]
  2: ['A', {'param': 1}, 11]
  3: ['A', {'param': 1}, 12]
  4: ['A', {'param': 2}, 10]
Estimated: 18, Actual: 18 (✅)

Model params combinations: 15
  1: {'model_params': {'n_estimators': 10, 'max_depth': 3}}
  2: {'model_params': {'n_estimators': 10, 'max_depth': 5}}
  3: {'model_params': {'n_estimators': 10, 'max_depth': 7}}
  4: {'model_params': {'n_estimators': 20, 'max_depth': 3}}
  5: {'model_params': {'n_estimators': 20, 'max_depth': 5}}
  6: {'model_params': {'n_estimators': 20, 'max_depth': 7}}
  7: {'model_params': {'n_estimators': 30, 'max_depth': 3}}
  8: {'model_params': {'n_estimators': 30, 'max_depth': 5}}
  9: {'model_params': {'n_estimators': 30, 'max_depth': 7}}
  10: {'model_params': {'n_estimators': 40, 'max_depth': 3}}
  11: {'model_params': {'n_estimators': 40, 'max_depth': 5}}
  12: {'model_params': {'n_estimators': 40, 'max_depth