In [71]:
# Fix: Add list handling to expand_spec function
from itertools import product, combinations
from collections.abc import Mapping

def expand_spec_fixed(node):
    # NEW: Handle lists by expanding each element and taking the product
    if isinstance(node, list):
        if not node:
            return [[]]  # Empty list -> single empty result

        # Special case: if there's only one element in the list, check if we should unwrap
        if len(node) == 1:
            element_result = expand_spec_fixed(node[0])

            # If the result contains lists (combinations), return directly to prevent extra wrapping
            # If the result contains scalars, we still need to wrap them properly
            if element_result and isinstance(element_result[0], list):
                return element_result
            # Otherwise, fall through to normal processing

        # Expand each element in the list
        expanded_elements = [expand_spec_fixed(element) for element in node]

        # Take Cartesian product of all expansions
        results = []
        for combo in product(*expanded_elements):
            results.append(list(combo))  # Convert tuple to list
        return results

    # Rest of the logic remains the same for dictionaries
    if not isinstance(node, Mapping):
        return [node]

    # Case 1: pure OR node (with optional size)
    if set(node.keys()) == {"or"} or set(node.keys()) == {"or", "size"}:
        choices = node["or"]
        size = node.get("size", None)

        if size is not None:
            # NEW: Check for nested tuple size notation ((inner), (outer))
            if (isinstance(size, tuple) and len(size) == 2 and
                isinstance(size[0], tuple) and isinstance(size[1], tuple)):
                return _handle_nested_combinations(choices, size)

            # Apply size constraints first, then expand
            out = []

            # Handle tuple size (from, to) or single size
            if isinstance(size, tuple) and len(size) == 2:
                from_size, to_size = size
                # Generate combinations for all sizes from from_size to to_size (inclusive)
                for s in range(from_size, to_size + 1):
                    if s > len(choices):
                        continue
                    for combo in combinations(choices, s):
                        # Expand this specific combination and return as individual elements
                        combo_results = _expand_combination(combo)
                        out.extend(combo_results)
            else:
                # Single size value
                if size <= len(choices):
                    for combo in combinations(choices, size):
                        # Expand this specific combination and return as individual elements
                        combo_results = _expand_combination(combo)
                        out.extend(combo_results)

            return out
        else:
            # Original behavior: expand all choices
            out = []
            for choice in choices:
                out.extend(expand_spec_fixed(choice))
            return out

    # Case 2: dict that also contains "or" -> branch and merge
    if "or" in node:
        # Extract size if present
        size = node.get("size", None)
        base = {k: v for k, v in node.items() if k not in ["or", "size"]}
        base_expanded = expand_spec_fixed(base)            # list[dict]

        # Create a temporary or node with size
        or_node = {"or": node["or"]}
        if size is not None:
            or_node["size"] = size

        choice_expanded = expand_spec_fixed(or_node)  # list[dict or scalar]
        results = []
        for b in base_expanded:
            for c in choice_expanded:
                if isinstance(c, Mapping):
                    merged = {**b, **c}
                    results.append(merged)
                else:
                    # Scalar choices only make sense as values under a key, not top-level merges
                    raise ValueError("Top-level 'or' choices must be dicts.")
        return results

    # Case 3: normal dict -> product over keys
    keys, options = zip(*[(k, _expand_value_fixed(v)) for k, v in node.items()]) if node else ([], [])
    if not keys:
        return [{}]
    out = []
    for combo in product(*options):
        d = {}
        for k, v in zip(keys, combo):
            d[k] = v
        out.append(d)
    return out

def _handle_nested_combinations(choices, nested_size):
    """Handle nested combination sizes like ((1,3), (1,4))"""
    inner_size, outer_size = nested_size
    inner_from, inner_to = inner_size
    outer_from, outer_to = outer_size

    # Step 1: Generate all possible inner combinations (combinations of 1-3 elements)
    inner_combinations = []
    for inner_s in range(inner_from, inner_to + 1):
        if inner_s > len(choices):
            continue
        for combo in combinations(choices, inner_s):
            # Each inner combination is a simple list
            if len(combo) == 1:
                # Single element - don't wrap in extra list
                inner_combinations.append(combo[0])
            else:
                # Multiple elements - keep as list
                inner_combinations.append(list(combo))

    # Step 2: Treat these inner combinations as new "choices" and select outer combinations
    out = []
    for outer_s in range(outer_from, outer_to + 1):
        if outer_s > len(inner_combinations):
            continue
        for outer_combo in combinations(inner_combinations, outer_s):
            # Convert tuple to list and add to results
            out.append(list(outer_combo))

    return out

def _expand_combination(combo):
    """Expand a specific combination of choices by taking their cartesian product."""
    expanded_choices = []
    for choice in combo:
        expanded_choices.append(expand_spec_fixed(choice))

    # Take cartesian product
    results = []
    for expanded_combo in product(*expanded_choices):
        # Return the combination as a single list (flatten one level)
        results.append(list(expanded_combo))

    return results

def _expand_value_fixed(v):
    # Value position returns a list of *values* (scalars or dicts)
    if isinstance(v, Mapping) and ("or" in v.keys()):
        # Value-level OR can yield scalars or dicts as values (with optional size)
        choices = v["or"]
        size = v.get("size", None)

        if size is not None:
            # NEW: Check for nested tuple size notation
            if (isinstance(size, tuple) and len(size) == 2 and
                isinstance(size[0], tuple) and isinstance(size[1], tuple)):
                return _handle_nested_combinations(choices, size)

            # Apply size constraints first, then expand
            vals = []

            # Handle tuple size (from, to) or single size
            if isinstance(size, tuple) and len(size) == 2:
                from_size, to_size = size
                # Generate combinations for all sizes from from_size to to_size (inclusive)
                for s in range(from_size, to_size + 1):
                    if s > len(choices):
                        continue
                    for combo in combinations(choices, s):
                        # Expand this specific combination
                        combo_results = _expand_combination(combo)
                        vals.extend(combo_results)
            else:
                # Single size value
                if size <= len(choices):
                    for combo in combinations(choices, size):
                        # Expand this specific combination
                        combo_results = _expand_combination(combo)
                        vals.extend(combo_results)

            return vals
        else:
            # Original behavior: expand all choices
            vals = []
            for choice in choices:
                ex = expand_spec_fixed(choice)
                # expand_spec returns list; extend with each item (scalar or dict value)
                vals.extend(ex)
            return vals
    elif isinstance(v, Mapping):
        # Nested object: expand to list of dict values
        return expand_spec_fixed(v)
    elif isinstance(v, list):
        # Handle lists in value positions
        return expand_spec_fixed(v)
    else:
        return [v]

In [72]:
pipeline_config = [
        {"or": [None, "A", "B"]},  # scale the features
        [
            {"or": ["a", "b"]},
            None,
            [{"or": ["1", "2"]}, {"or": ["x", "y"]}],
        ]
    ]

results = expand_spec_fixed(pipeline_config)
print(f"Number of combinations (24): {len(results)}")
# for i, cfg in enumerate(results):
    # print(f"  {i+1}: {cfg}")

Number of combinations (24): 24


In [73]:
pipeline_config_with_size = [
        {"or": [None, "A", "B", "C", "D"], "size": 4},  # scale the features
        [
            {"or": ["a", "b"]},
            None,
            [{"or": ["1", "2"]}, {"or": ["x", "y"]}],
        ]
    ]

results_with_size = expand_spec_fixed(pipeline_config_with_size)
# print(f"Expected: C(5,3) = 10 c(5,4) = 5")
print(f"Number of combinations (40): {len(results_with_size)}")
# for i, cfg in enumerate(results_with_size):
    # print(f"  {i+1}: {cfg}")

Number of combinations (40): 40


In [83]:
pipeline_config_with_tuple_size = [
        {"or": [None, "A", "B", "C", "D"], "size": (4, 5)},  # scale the features
        [
            {"or": ["a", "b"]},
            None,
            [{"or": ["1", "2"]}, {"or": ["x", "y"]}],
        ]
    ]

results_with_tuple_size = expand_spec_fixed(pipeline_config_with_tuple_size)
# print(f"Expected: C(5,3) + C(5,4) + C(5,5) = 10 + 5 + 1 = 16 combinations (before multiplying by other expansions)")
print(f"Number of combinations (48): {len(results_with_tuple_size)}")
# for i, cfg in enumerate(results_with_tuple_size):
    # print(f"  {i+1}: {cfg}")

Number of combinations (48): 48


In [84]:
pipeline_config_with_tuple_size = [
        {"or": [None, "A", "B", "C", {"or": ["a", "b"]}], "size": 4},  # scale the features
    ]
results_with_tuple_size = expand_spec_fixed(pipeline_config_with_tuple_size)
print(f"Number of combinations: {len(results_with_tuple_size)}")
# for i, cfg in enumerate(results_with_tuple_size):
    # print(f"  {i+1}: {cfg}")

pipeline_config_with_tuple_size = [
        {"or": [None, "A", "B", "C", {"or": ["a", "b"]}], "size": (3, 4)},  # scale the features
    ]
results_with_tuple_size = expand_spec_fixed(pipeline_config_with_tuple_size)
print(f"Number of combinations: {len(results_with_tuple_size)}")
# for i, cfg in enumerate(results_with_tuple_size):
    # print(f"  {i+1}: {cfg}")

Number of combinations: 9
Number of combinations: 25


In [85]:
p = [{"or": ["A", "B", "C"], "size": (1, 3)}]
r = expand_spec_fixed(p)
for item in r:
    print(item)

p = [{"or": ["A", "B", "C"]}]
r = expand_spec_fixed(p)
for item in r:
    print(item)

['A']
['B']
['C']
['A', 'B']
['A', 'C']
['B', 'C']
['A', 'B', 'C']
['A']
['B']
['C']


In [86]:
print("🚀 NEW: Second-order combinations!")
print("="*50)

# Example: size = ((1,3), (1,4)) means:
# - Inner: combinations of 1-3 elements from [A, B, C, D]
# - Outer: select 1-4 of those inner combinations

test_config = [{"or": ["A", "B", "C", "D"], "size": ((1, 3), (1, 4))}]
results = expand_spec_fixed(test_config)

print(f"Total combinations: {len(results)}")
print(f"\nBreakdown:")

# Let's analyze the structure
inner_1_combos = len(list(combinations(["A", "B", "C", "D"], 1)))  # C(4,1) = 4
inner_2_combos = len(list(combinations(["A", "B", "C", "D"], 2)))  # C(4,2) = 6
inner_3_combos = len(list(combinations(["A", "B", "C", "D"], 3)))  # C(4,3) = 4
total_inner = inner_1_combos + inner_2_combos + inner_3_combos     # 4+6+4 = 14

print(f"Inner combinations (size 1-3): {total_inner}")
print(f"  Size 1: {inner_1_combos} combinations")
print(f"  Size 2: {inner_2_combos} combinations")
print(f"  Size 3: {inner_3_combos} combinations")

# Now we select 1-4 of these 14 inner combinations
outer_combos = sum(len(list(combinations(range(total_inner), s))) for s in range(1, 5))
print(f"\nOuter combinations (select 1-4 from {total_inner}): {outer_combos}")
print(f"Expected total: {outer_combos}")

print(f"\nFirst 10 results:")
for i, result in enumerate(results[:10]):
    print(f"  {i+1}: {result}")

if len(results) > 10:
    print(f"  ... and {len(results) - 10} more")

🚀 NEW: Second-order combinations!
Total combinations: 1470

Breakdown:
Inner combinations (size 1-3): 14
  Size 1: 4 combinations
  Size 2: 6 combinations
  Size 3: 4 combinations

Outer combinations (select 1-4 from 14): 1470
Expected total: 1470

First 10 results:
  1: ['A']
  2: ['B']
  3: ['C']
  4: ['D']
  5: [['A', 'B']]
  6: [['A', 'C']]
  7: [['A', 'D']]
  8: [['B', 'C']]
  9: [['B', 'D']]
  10: [['C', 'D']]
  ... and 1460 more


In [87]:
print("\n" + "="*60)
print("🔥 ADVANCED EXAMPLES: Different nested combination patterns")
print("="*60)

# Example 1: Your original request - ((1,3), (1,4))
print("\n1️⃣ Original request: size = ((1,3), (1,4))")
print("   Inner: 1-3 elements, Outer: select 1-4 of those")
config1 = [{"or": ["A", "B", "C", "D"], "size": ((1, 3), (1, 4))}]
results1 = expand_spec_fixed(config1)
print(f"   Results: {len(results1)} combinations")
print(f"   Sample: {results1[20:25]}")  # Show middle results

# Example 2: Smaller nested combinations
print("\n2️⃣ Smaller example: size = ((1,2), (1,2))")
print("   Inner: 1-2 elements, Outer: select 1-2 of those")
config2 = [{"or": ["X", "Y", "Z"], "size": ((1, 2), (1, 2))}]
results2 = expand_spec_fixed(config2)
print(f"   Results: {len(results2)} combinations")
print("   All results:")
for i, result in enumerate(results2):
    print(f"     {i+1}: {result}")

# Example 3: Fixed inner, variable outer
print("\n3️⃣ Fixed inner size: size = ((2,2), (1,3))")
print("   Inner: exactly 2 elements, Outer: select 1-3 of those")
config3 = [{"or": ["P", "Q", "R", "S"], "size": ((2, 2), (1, 3))}]
results3 = expand_spec_fixed(config3)
print(f"   Results: {len(results3)} combinations")
print(f"   First 5: {results3[:5]}")



🔥 ADVANCED EXAMPLES: Different nested combination patterns

1️⃣ Original request: size = ((1,3), (1,4))
   Inner: 1-3 elements, Outer: select 1-4 of those
   Results: 1470 combinations
   Sample: [['A', ['B', 'C']], ['A', ['B', 'D']], ['A', ['C', 'D']], ['A', ['A', 'B', 'C']], ['A', ['A', 'B', 'D']]]

2️⃣ Smaller example: size = ((1,2), (1,2))
   Inner: 1-2 elements, Outer: select 1-2 of those
   Results: 21 combinations
   All results:
     1: ['X']
     2: ['Y']
     3: ['Z']
     4: [['X', 'Y']]
     5: [['X', 'Z']]
     6: [['Y', 'Z']]
     7: ['X', 'Y']
     8: ['X', 'Z']
     9: ['X', ['X', 'Y']]
     10: ['X', ['X', 'Z']]
     11: ['X', ['Y', 'Z']]
     12: ['Y', 'Z']
     13: ['Y', ['X', 'Y']]
     14: ['Y', ['X', 'Z']]
     15: ['Y', ['Y', 'Z']]
     16: ['Z', ['X', 'Y']]
     17: ['Z', ['X', 'Z']]
     18: ['Z', ['Y', 'Z']]
     19: [['X', 'Y'], ['X', 'Z']]
     20: [['X', 'Y'], ['Y', 'Z']]
     21: [['X', 'Z'], ['Y', 'Z']]

3️⃣ Fixed inner size: size = ((2,2), (1,3))
   Inn

In [88]:
print("🔧 TESTING BRACKET FIX:")
print("="*35)

# Test the bracket fix with simple example
test_simple = [{"or": ["A", "B", "C"], "size": ((1, 2), (1, 2))}]
results_simple = expand_spec_fixed(test_simple)

print("Results after bracket fix:")
for i, result in enumerate(results_simple):
    print(f"  {i+1}: {result}")


🔧 TESTING BRACKET FIX:
Results after bracket fix:
  1: ['A']
  2: ['B']
  3: ['C']
  4: [['A', 'B']]
  5: [['A', 'C']]
  6: [['B', 'C']]
  7: ['A', 'B']
  8: ['A', 'C']
  9: ['A', ['A', 'B']]
  10: ['A', ['A', 'C']]
  11: ['A', ['B', 'C']]
  12: ['B', 'C']
  13: ['B', ['A', 'B']]
  14: ['B', ['A', 'C']]
  15: ['B', ['B', 'C']]
  16: ['C', ['A', 'B']]
  17: ['C', ['A', 'C']]
  18: ['C', ['B', 'C']]
  19: [['A', 'B'], ['A', 'C']]
  20: [['A', 'B'], ['B', 'C']]
  21: [['A', 'C'], ['B', 'C']]


In [89]:
examples = [
    ("Basic or", [{"or": ["A", "B", "C"]}]),
    ("Size", [{"or": ["A", "B", "C", "D"], "size": 2}]),
    ("Size range", [{"or": ["A", "B", "C", "D"], "size": (2, 3)}]),
    ("Second-order", [{"or": ["A", "B", "C"], "size": ((1, 2), (1, 2))}]),
]

for name, config in examples:
    result = expand_spec_fixed(config)
    print(f"  • {name:12}: {len(result):4} combinations")



  • Basic or    :    3 combinations
  • Size        :    6 combinations
  • Size range  :   10 combinations
  • Second-order:   21 combinations


In [None]:
pipeline = [
    # {"or":['MinMaxScaler', 'StandardScaler']},
    {"feature": {"or":["None", "savgol", "snv", "msc", "haar", "gaussian"], "size": ((1,6),(1,3))}}
]

r = expand_spec_fixed(pipeline)
print(f"Number of combinations: {len(r)}")
# for i, cfg in enumerate(r):
    # print(f"  {i+1}: {cfg}")

Number of combinations: 41727
