In [1]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Simple test case to debug PREV function issue
data = [
    ('cust_1', '2020-05-11', 100),
    ('cust_1', '2020-05-12', 200),
    ('cust_1', '2020-05-14', 100),
    ('cust_1', '2020-05-16', 50),
    ('cust_1', '2020-05-17', 100),
]

# Create DataFrame
df = pd.DataFrame(data, columns=['customer_id', 'order_date', 'price'])
df['order_date'] = pd.to_datetime(df['order_date'])

print("Test data:")
print(df)
print()

# Test simple query without PREV first
query_simple = """
SELECT customer_id, start_price, final_price, start_date, final_date
    FROM orders
        MATCH_RECOGNIZE (
            PARTITION BY customer_id
            ORDER BY order_date
            MEASURES
                START.price AS start_price,
                LAST(DOWN.price) AS final_price,
                START.order_date AS start_date,
                LAST(DOWN.order_date) AS final_date
            ONE ROW PER MATCH
            AFTER MATCH SKIP PAST LAST ROW
            PATTERN (START DOWN+)
            DEFINE
                DOWN AS price < 150
            );
"""

print("Testing simple query without PREV:")
try:
    result = match_recognize(query_simple, df)
    print(result)
except Exception as e:
    print(f"Error: {e}")
    import traceback
    traceback.print_exc()
print()

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT customer_id, start_price, final_price, start_date, final_date FROM orders MATCH_RECOGNIZE ( PARTITION BY customer_id ORDER BY order_date MEASURES START.price AS start_price, LAST(DOWN.price) AS final_price, START.order_date AS start_date, LAST(DOWN.order_date) AS final_date ONE ROW PER MATCH AFTER MATCH SKIP PAST LAST ROW PATTERN (START DOWN+) DEFINE DOWN AS price < 150 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=customer_id, metadata={}), SelectItem(expression=start_price, metadata={}), SelectItem(expression=final_price, metadata={}), SelectItem(expression=start_date, metadata={}), SelectItem(expression=final_date, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extrac

Test data:
  customer_id order_date  price
0      cust_1 2020-05-11    100
1      cust_1 2020-05-12    200
2      cust_1 2020-05-14    100
3      cust_1 2020-05-16     50
4      cust_1 2020-05-17    100

Testing simple query without PREV:
Pattern value: 'START DOWN+'
Pattern value: 'START DOWN+'
Creating transition for variable 'START' with condition: 'TRUE'
Creating transition for variable 'DOWN' with condition: 'price < 150'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: START)
Testing row 0, data: {'customer_id': 'cust_1', 'order_date': Timestamp('2020-05-11 00:00:00'), 'price': 100}
  Evaluating condition for var: START
    Condition passed for START
  Assigned row 0 to variable START
Testing row 1, data: {'customer_id': 'cust_1', 'order_date': Timestamp('2020-05-12 00:00:00'), 'price': 200}
  Evaluating condition for var: DOWN
    Condition fa

In [6]:

import pandas as pd
from src.executor.match_recognize import match_recognize
import pandas as pd

# Define the data
data = [
    ('cust_1', '2020-05-11', 100),
    ('cust_1', '2020-05-12', 200),
    ('cust_2', '2020-05-13',   8),
    ('cust_1', '2020-05-14', 100),
    ('cust_2', '2020-05-15',   4),
    ('cust_1', '2020-05-16',  50),
    ('cust_1', '2020-05-17', 100),
    ('cust_2', '2020-05-18',   6),
]

# Create DataFrame
df = pd.DataFrame(data, columns=['customer_id', 'order_date', 'price'])

# Convert order_date column to datetime
df['order_date'] = pd.to_datetime(df['order_date'])

# Display the DataFrame
print(df)

query_basic_permute = """
SELECT customer_id, start_price, bottom_price, final_price, start_date, final_date
    FROM orders
        MATCH_RECOGNIZE (
            PARTITION BY customer_id
            ORDER BY order_date
            MEASURES
                START.price AS start_price,
                LAST(DOWN.price) AS bottom_price,
                LAST(UP.price) AS final_price,
                START.order_date AS start_date,
                LAST(UP.order_date) AS final_date
            ONE ROW PER MATCH
            AFTER MATCH SKIP PAST LAST ROW
            PATTERN (START DOWN+ UP+)
            DEFINE
                DOWN AS price < PREV(price),
                UP AS price > PREV(price)
            );
"""

print("Test 1: Basic PERMUTE - Should match all sequences with A, B, C in any order")
output_df = match_recognize(query_basic_permute, df)
print(output_df)
print("\n")

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT customer_id, start_price, bottom_price, final_price, start_date, final_date FROM orders MATCH_RECOGNIZE ( PARTITION BY customer_id ORDER BY order_date MEASURES START.price AS start_price, LAST(DOWN.price) AS bottom_price, LAST(UP.price) AS final_price, START.order_date AS start_date, LAST(UP.order_date) AS final_date ONE ROW PER MATCH AFTER MATCH SKIP PAST LAST ROW PATTERN (START DOWN+ UP+) DEFINE DOWN AS price < PREV(price), UP AS price > PREV(price) );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=customer_id, metadata={}), SelectItem(expression=start_price, metadata={}), SelectItem(expression=bottom_price, metadata={}), SelectItem(expression=final_price, metadata={}), SelectItem(expression=start_date, metadata={}), SelectItem(expression=final_date, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orde

  customer_id order_date  price
0      cust_1 2020-05-11    100
1      cust_1 2020-05-12    200
2      cust_2 2020-05-13      8
3      cust_1 2020-05-14    100
4      cust_2 2020-05-15      4
5      cust_1 2020-05-16     50
6      cust_1 2020-05-17    100
7      cust_2 2020-05-18      6
Test 1: Basic PERMUTE - Should match all sequences with A, B, C in any order
Pattern value: 'START DOWN+ UP+'
Pattern value: 'START DOWN+ UP+'
Creating transition for variable 'START' with condition: 'TRUE'
Creating transition for variable 'DOWN' with condition: 'price < PREV(price)'
Creating transition for variable 'UP' with condition: 'price > PREV(price)'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: START)
Testing row 0, data: {'customer_id': 'cust_1', 'order_date': Timestamp('2020-05-11 00:00:00'), 'price': 100}
  Evaluating condition for var: START
    Condit

In [1]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different permutation patterns
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE Patterns\n")

# Test 1: Basic PERMUTE - Match any order of A, B, C
query_basic_permute = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        A.value AS a_value,
        B.value AS b_value,
        C.value AS c_value
    ONE ROW PER MATCH
    PATTERN (PERMUTE(A, B, C))
    DEFINE 
        A AS event_type = 'start',
        B AS event_type = 'middle',
        C AS event_type = 'end'
);
"""

print("Test 1: Basic PERMUTE - Should match all sequences with A, B, C in any order")
output_df = match_recognize(query_basic_permute, df)
print(output_df)
print("\n")

INFO:row_match_recognize.src.matcher.production_aggregates:MeasureEvaluator enhanced with production aggregate support
INFO:row_match_recognize.src.executor.match_recognize:Production aggregates enabled for MeasureEvaluator
DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, A.value AS a_value, B.value AS b_value, C.value AS c_value ONE ROW PER MATCH PATTERN (PERMUTE(A, B, C)) DEFINE A AS event_type = 'start', B AS event_type = 'middle', C AS event_type = 'end' );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: Part

Testing PERMUTE Patterns

Test 1: Basic PERMUTE - Should match all sequences with A, B, C in any order
Pattern value: 'PERMUTE(A, B, C)'
Pattern value: 'PERMUTE(A, B, C)'


DEBUG:row_match_recognize.src.matcher.condition_evaluator:[DEBUG] COMPARE RESULT: start Eq start = True (evaluating for var=A)
DEBUG:row_match_recognize.src.matcher.matcher:    Condition passed for A
DEBUG:row_match_recognize.src.matcher.matcher:    DEBUG: condition result=True, type=<class 'bool'>
DEBUG:row_match_recognize.src.matcher.matcher:  [DEBUG] Clearing context.current_var (was A)
DEBUG:row_match_recognize.src.matcher.matcher:  Transition A: accepting=False, has_back_ref=False, is_prerequisite=False
DEBUG:row_match_recognize.src.matcher.matcher:Selected simple transition: A -> state 1 (alternation priority: N/A)
DEBUG:row_match_recognize.src.matcher.matcher:  Assigned row 0 to variable A
DEBUG:row_match_recognize.src.matcher.matcher:Testing row 1, data: {'id': 8, 'seq': 3, 'step': 2, 'event_type': 'end', 'value': 275}
DEBUG:row_match_recognize.src.matcher.matcher:  Current var_assignments: {'A': [0]}
DEBUG:row_match_recognize.src.matcher.matcher:  Evaluating condition for var:

   seq  step event_type pattern_var  match_num  a_value  b_value  c_value
0    1     1      start           C          1      100      200      300




In [14]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different permutation patterns
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE Patterns\n")

# Test 1: Basic PERMUTE - Match any order of A, B, C
query_basic_permute = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        A.value AS a_value,
        B.value AS b_value,
        C.value AS c_value
  
    PATTERN (PERMUTE(A, B, C))
    DEFINE 
        A AS event_type = 'start',
        B AS event_type = 'middle',
        C AS event_type = 'end'
);
"""

print("Test 1: Basic PERMUTE - Should match all sequences with A, B, C in any order")
output_df = match_recognize(query_basic_permute, df)
print(output_df)
print("\n")

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, A.value AS a_value, B.value AS b_value, C.value AS c_value PATTERN (PERMUTE(A, B, C)) DEFINE A AS event_type = 'start', B AS event_type = 'middle', C AS event_type = 'end' );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['seq'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='step', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASUR

Testing PERMUTE Patterns

Test 1: Basic PERMUTE - Should match all sequences with A, B, C in any order
Pattern value: 'PERMUTE(A, B, C)'
Pattern value: 'PERMUTE(A, B, C)'
Creating transition for variable 'A' with condition: 'event_type = 'start''
Creating transition for variable 'B' with condition: 'event_type = 'middle''
Creating transition for variable 'C' with condition: 'event_type = 'end''
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A, B, C)
Testing row 0, data: {'id': 1, 'seq': 1, 'step': 1, 'event_type': 'start', 'value': 100}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'seq': 1, 'step': 2, 'event_type': 'middle', 'value': 200}
  Evaluating condition for var: B
    Condition passed for

In [15]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different permutation patterns
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE Patterns\n")
# Test 2: PERMUTE with Quantifier
query_permute_quantifier = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        FIRST(A.value) AS first_a_value,
        LAST(C.value) AS last_c_value
    ONE ROW PER MATCH
    PATTERN (PERMUTE(A, B, C)+)
    DEFINE 
        A AS event_type = 'start',
        B AS event_type = 'middle',
        C AS event_type = 'end'
);
"""

print("Test 2: PERMUTE with Quantifier - Should match one or more occurrences of permutations")
output_df = match_recognize(query_permute_quantifier, df)
print(output_df)
print("\n")

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, FIRST(A.value) AS first_a_value, LAST(C.value) AS last_c_value ONE ROW PER MATCH PATTERN (PERMUTE(A, B, C)+) DEFINE A AS event_type = 'start', B AS event_type = 'middle', C AS event_type = 'end' );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['seq'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='step', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_ext

Testing PERMUTE Patterns

Test 2: PERMUTE with Quantifier - Should match one or more occurrences of permutations
Pattern value: 'PERMUTE(A, B, C)+'
Pattern value: 'PERMUTE(A, B, C)+'
Creating transition for variable 'A' with condition: 'event_type = 'start''
Creating transition for variable 'B' with condition: 'event_type = 'middle''
Creating transition for variable 'C' with condition: 'event_type = 'end''
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A, B, C)
Testing row 0, data: {'id': 1, 'seq': 1, 'step': 1, 'event_type': 'start', 'value': 100}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'seq': 1, 'step': 2, 'event_type': 'middle', 'value': 200}
  Evaluating condition for var: A
    Conditio

In [16]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different permutation patterns
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE Patterns\n")
# Test 3: PERMUTE with ALL ROWS PER MATCH
query_permute_all_rows = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        RUNNING LAST(A.value) AS running_a_value
    ALL ROWS PER MATCH
    PATTERN (PERMUTE(A, B, C))
    DEFINE 
        A AS event_type = 'start',
        B AS event_type = 'middle',
        C AS event_type = 'end'
);
"""

print("Test 3: PERMUTE with ALL ROWS PER MATCH - Shows all matched rows")
output_df = match_recognize(query_permute_all_rows, df)
print(output_df)
print("\n")


Testing PERMUTE Patterns

Test 3: PERMUTE with ALL ROWS PER MATCH - Shows all matched rows


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, RUNNING LAST(A.value) AS running_a_value ALL ROWS PER MATCH PATTERN (PERMUTE(A, B, C)) DEFINE A AS event_type = 'start', B AS event_type = 'middle', C AS event_type = 'end' );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['seq'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='step', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASU

Pattern value: 'PERMUTE(A, B, C)'
Pattern value: 'PERMUTE(A, B, C)'
Creating transition for variable 'A' with condition: 'event_type = 'start''
Creating transition for variable 'B' with condition: 'event_type = 'middle''
Creating transition for variable 'C' with condition: 'event_type = 'end''
Initialized matcher with excluded variables: set()
Find matches with all_rows=True, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A, B, C)
Testing row 0, data: {'id': 1, 'seq': 1, 'step': 1, 'event_type': 'start', 'value': 100}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'seq': 1, 'step': 2, 'event_type': 'middle', 'value': 200}
  Evaluating condition for var: B
    Condition passed for B
  Assigned row 1 to variable B
Reached accepting state 2 at row 1
  Current longest match: 0-1, vars:

In [17]:

import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different permutation patterns
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE Patterns\n")
# Test 4: PERMUTE with Subset Variables
query_permute_subset = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        X.value AS x_value,
        Y.value AS y_value
    ONE ROW PER MATCH
    PATTERN (PERMUTE(A, B, C))
    SUBSET
        X = (A, B),
        Y = (B, C)
    DEFINE 
        A AS event_type = 'start',
        B AS event_type = 'middle',
        C AS event_type = 'end'
);
"""

print("Test 4: PERMUTE with Subset Variables - Using subset groupings")
output_df = match_recognize(query_permute_subset, df)
print(output_df)
print("\n")


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, X.value AS x_value, Y.value AS y_value ONE ROW PER MATCH PATTERN (PERMUTE(A, B, C)) SUBSET X = (A, B), Y = (B, C) DEFINE A AS event_type = 'start', B AS event_type = 'middle', C AS event_type = 'end' );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['seq'])


Testing PERMUTE Patterns

Test 4: PERMUTE with Subset Variables - Using subset groupings


DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='step', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='CLASSIFIER()', alias='pattern_var', metadata={'semantics': 'RUNNING'}, is_classifier=True, is_match_number=False), Measure(expression='MATCH_NUMBER()', alias='match_num', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=True), Measure(expression='X.value', alias='x_value', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False), Measure(expression='Y.value', alias='y_value', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)])
DEBUG:src.parser.match_recognize_extractor:Extracted ROWS PER MATCH: ONE ROW PER MATCH
DEBUG:src.parser.match_recognize_extractor:Extracted ROWS PER MATCH: ONE ROW PER MATCH
DEBUG:src.parser.match_recognize_extractor:Updated Pattern to

Pattern value: 'PERMUTE(A, B, C)'
Extracted subset definition: X = (A, B)
Extracted subset definition: Y = (B, C)


DEBUG:src.parser.match_recognize_extractor:Updated Pattern tokens: {'variables': ['B', 'C', 'A'], 'base_variables': ['B', 'C', 'A'], 'permute': True, 'nested_permute': False}
DEBUG:src.parser.match_recognize_extractor:PATTERN clause validated successfully: PERMUTE(A, B, C)
DEBUG:src.parser.match_recognize_extractor:PERMUTE pattern detected - skipping variable validation
DEBUG:src.parser.match_recognize_extractor:PERMUTE pattern detected - skipping variable validation
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: CLASSIFIER()
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: MATCH_NUMBER()
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: X.value
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: Y.value
DEBUG:src.parser.match_recognize_extractor:Extracted MATCH_RECOGNIZE clause via recursive search.


Pattern value: 'PERMUTE(A, B, C)'
Creating transition for variable 'A' with condition: 'event_type = 'start''
Creating transition for variable 'B' with condition: 'event_type = 'middle''
Creating transition for variable 'C' with condition: 'event_type = 'end''
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A, B, C)
Testing row 0, data: {'id': 1, 'seq': 1, 'step': 1, 'event_type': 'start', 'value': 100}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'seq': 1, 'step': 2, 'event_type': 'middle', 'value': 200}
  Evaluating condition for var: B
    Condition passed for B
  Assigned row 1 to variable B
Reached accepting state 2 at row 1
  Current longest match: 0-1, vars: ['A', 'B']
Testing row 2, data: 

In [1]:

import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different permutation patterns
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE Patterns\n")
# Test 5: Nested PERMUTE patterns
query_nested_permute = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        A.value AS a_value,
        B.value AS b_value,
        C.value AS c_value
    ONE ROW PER MATCH
    PATTERN (PERMUTE(A, PERMUTE(B, C)))
    DEFINE 
        A AS event_type = 'start',
        B AS event_type = 'middle',
        C AS event_type = 'end'
);
"""

print("Test 5: Nested PERMUTE - Testing nested permutation patterns")
output_df = match_recognize(query_nested_permute, df)
print(output_df)
print("\n")

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, A.value AS a_value, B.value AS b_value, C.value AS c_value ONE ROW PER MATCH PATTERN (PERMUTE(A, PERMUTE(B, C))) DEFINE A AS event_type = 'start', B AS event_type = 'middle', C AS event_type = 'end' );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['seq'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='step', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize

Testing PERMUTE Patterns

Test 5: Nested PERMUTE - Testing nested permutation patterns
Pattern value: 'PERMUTE(A, PERMUTE(B, C))'
Pattern value: 'PERMUTE(A, PERMUTE(B, C))'


DEBUG:row_match_recognize.src.matcher.condition_evaluator:[DEBUG] COMPARE: left=start (<class 'str'>), right=start (<class 'str'>)
DEBUG:row_match_recognize.src.matcher.condition_evaluator:[DEBUG] COMPARE AST: left=Name(id='event_type', ctx=Load()), right=Constant(value='start')
DEBUG:row_match_recognize.src.matcher.condition_evaluator:[DEBUG] COMPARE RESULT: start Eq start = True (evaluating for var=A)
DEBUG:row_match_recognize.src.matcher.matcher:    Condition passed for A
DEBUG:row_match_recognize.src.matcher.matcher:    DEBUG: condition result=True, type=<class 'bool'>
DEBUG:row_match_recognize.src.matcher.matcher:  [DEBUG] Clearing context.current_var (was A)
DEBUG:row_match_recognize.src.matcher.matcher:  Assigned row 2 to variable A
DEBUG:row_match_recognize.src.matcher.matcher:Reached accepting state 10 at row 2
DEBUG:row_match_recognize.src.matcher.matcher:  Updated longest match: 0-2, vars: ['C', 'B', 'A']
DEBUG:row_match_recognize.src.matcher.matcher:Found non-empty match: {

   seq pattern_var  match_num  a_value  b_value  c_value
0    1           C          1      100      200      300
1    3           B          1      175      375      275
2    4           A          1      425      325      225




In [1]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different permutation patterns
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE Patterns\n")
# Test 6: PERMUTE with Complex Conditions
query_permute_complex = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        A.value AS start_value,
        B.value AS middle_value,
        C.value AS end_value
    ONE ROW PER MATCH
    PATTERN (PERMUTE(A, B, C))
    DEFINE 
        A AS event_type = 'start' AND A.value < NEXT(A.value),
        B AS event_type = 'middle' AND B.value > PREV(B.value),
        C AS event_type = 'end' AND C.value > FIRST(A.value)
);
"""

print("Test 6: PERMUTE with Complex Conditions - Testing complex pattern definitions")
output_df = match_recognize(query_permute_complex, df)
print(output_df)
print("\n")

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, A.value AS start_value, B.value AS middle_value, C.value AS end_value ONE ROW PER MATCH PATTERN (PERMUTE(A, B, C)) DEFINE A AS event_type = 'start' AND A.value < NEXT(A.value), B AS event_type = 'middle' AND B.value > PREV(B.value), C AS event_type = 'end' AND C.value > FIRST(A.value) );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['seq'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem

Testing PERMUTE Patterns

Test 6: PERMUTE with Complex Conditions - Testing complex pattern definitions
Pattern value: 'PERMUTE(A, B, C)'
Pattern value: 'PERMUTE(A, B, C)'


DEBUG:row_match_recognize.src.matcher.matcher:  Evaluating condition for var: A
DEBUG:row_match_recognize.src.matcher.matcher:  [DEBUG] Set context.current_var = A
DEBUG:row_match_recognize.src.matcher.matcher:    DEBUG: Calling condition function with row={'id': 6, 'seq': 2, 'step': 3, 'event_type': 'end', 'value': 350}
DEBUG:row_match_recognize.src.matcher.condition_evaluator:Universal pattern variable 'event_type' resolved to: end
DEBUG:row_match_recognize.src.matcher.condition_evaluator:[DEBUG] COMPARE: left=end (<class 'str'>), right=start (<class 'str'>)
DEBUG:row_match_recognize.src.matcher.condition_evaluator:[DEBUG] COMPARE AST: left=Name(id='event_type', ctx=Load()), right=Constant(value='start')
DEBUG:row_match_recognize.src.matcher.condition_evaluator:[DEBUG] COMPARE RESULT: end Eq start = False (evaluating for var=A)
DEBUG:row_match_recognize.src.matcher.condition_evaluator:Universal pattern variable 'event_type' resolved to: end
DEBUG:row_match_recognize.src.matcher.condi

   seq pattern_var  match_num  start_value  middle_value  end_value
0    1           C          1          100           200        300
1    3           B          1          175           375        275




In [20]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different permutation patterns
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE Patterns\n")
# Test 6: PERMUTE with Complex Conditions
query_permute_complex = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        A.value AS start_value,
        B.value AS middle_value,
        C.value AS end_value
    ONE ROW PER MATCH
    PATTERN (PERMUTE(A, B, C))
    DEFINE 
        A AS event_type = 'start' AND A.value < NEXT(A.value),
        B AS event_type = 'middle' AND B.value > PREV(B.value),
        C AS event_type = 'end' AND C.value > FIRST(A.value)
);
"""

print("Test 6: PERMUTE with Complex Conditions - Testing complex pattern definitions")
output_df = match_recognize(query_permute_complex, df)
print(output_df)
print("\n")

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, A.value AS start_value, B.value AS middle_value, C.value AS end_value ONE ROW PER MATCH PATTERN (PERMUTE(A, B, C)) DEFINE A AS event_type = 'start' AND A.value < NEXT(A.value), B AS event_type = 'middle' AND B.value > PREV(B.value), C AS event_type = 'end' AND C.value > FIRST(A.value) );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['seq'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem

Testing PERMUTE Patterns

Test 6: PERMUTE with Complex Conditions - Testing complex pattern definitions


DEBUG:src.parser.match_recognize_extractor:Updated Pattern tokens: {'variables': ['B', 'C', 'A'], 'base_variables': ['B', 'C', 'A'], 'permute': True, 'nested_permute': False}
DEBUG:src.parser.match_recognize_extractor:PATTERN clause validated successfully: PERMUTE(A, B, C)
DEBUG:src.parser.match_recognize_extractor:Extracted Pattern: PatternClause(pattern='PERMUTE(A, B, C)', metadata={'variables': ['A', 'B', 'C'], 'base_variables': ['A', 'B', 'C'], 'permute': True, 'nested_permute': False})
DEBUG:src.parser.match_recognize_extractor:Extracted DEFINE: DefineClause(definitions=[Define(variable='A', condition="event_type = 'start' AND A.value < NEXT(A.value)"), Define(variable='B', condition="event_type = 'middle' AND B.value > PREV(B.value)"), Define(variable='C', condition="event_type = 'end' AND C.value > FIRST(A.value)")])
DEBUG:src.parser.match_recognize_extractor:Updated Pattern tokens: {'variables': ['B', 'C', 'A'], 'base_variables': ['B', 'C', 'A'], 'permute': True, 'nested_permut

Pattern value: 'PERMUTE(A, B, C)'
Pattern value: 'PERMUTE(A, B, C)'
Creating transition for variable 'A' with condition: 'event_type = 'start' AND A.value < NEXT(A.value)'
Creating transition for variable 'B' with condition: 'event_type = 'middle' AND B.value > PREV(B.value)'
Creating transition for variable 'C' with condition: 'event_type = 'end' AND C.value > FIRST(A.value)'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A, B, C)
Testing row 0, data: {'id': 1, 'seq': 1, 'step': 1, 'event_type': 'start', 'value': 100}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'seq': 1, 'step': 2, 'event_type': 'middle', 'value': 200}
  Evaluating condition for var: B
    Condition passed for B
  Assigned row 

In [21]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data for PERMUTE with subset variables
data = [
    # Sequence 1: Has A-B-C pattern
    {"id": 1, "seq": 1, "step": 1, "event_type": "start", "value": 100},  # A
    {"id": 2, "seq": 1, "step": 2, "event_type": "middle", "value": 200}, # B
    {"id": 3, "seq": 1, "step": 3, "event_type": "end", "value": 300},    # C
    
    # Sequence 2: Has B-A-C pattern
    {"id": 4, "seq": 2, "step": 1, "event_type": "middle", "value": 250}, # B
    {"id": 5, "seq": 2, "step": 2, "event_type": "start", "value": 150},  # A
    {"id": 6, "seq": 2, "step": 3, "event_type": "end", "value": 350},    # C
    
    # Sequence 3: Has A-C-B pattern
    {"id": 7, "seq": 3, "step": 1, "event_type": "start", "value": 175},  # A
    {"id": 8, "seq": 3, "step": 2, "event_type": "end", "value": 275},    # C
    {"id": 9, "seq": 3, "step": 3, "event_type": "middle", "value": 375}, # B
    
    # Sequence 4: Has C-B-A pattern
    {"id": 10, "seq": 4, "step": 1, "event_type": "end", "value": 225},   # C
    {"id": 11, "seq": 4, "step": 2, "event_type": "middle", "value": 325}, # B
    {"id": 12, "seq": 4, "step": 3, "event_type": "start", "value": 425},  # A
]

df = pd.DataFrame(data)

print("Testing PERMUTE with Subset Variables - Trino Compatibility\n")


# Test 7: PERMUTE with Edge Cases
query_permute_edge_cases = """
SELECT * FROM memory.default.op2 MATCH_RECOGNIZE(
    PARTITION BY seq
    ORDER BY step
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num,
        A.value AS a_value,
        LAST(B.value) AS last_b_value,
        FIRST(C.value) AS first_c_value
    ALL ROWS PER MATCH
    PATTERN (PERMUTE(A, B?, C?))
    DEFINE 
        A AS event_type = 'start',
        B AS event_type = 'middle',
        C AS event_type = 'end'
);
"""

print("Test 7: PERMUTE with Edge Cases - Testing optional elements")
output_df = match_recognize(query_permute_edge_cases, df)
print(output_df)

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.op2 MATCH_RECOGNIZE( PARTITION BY seq ORDER BY step MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num, A.value AS a_value, LAST(B.value) AS last_b_value, FIRST(C.value) AS first_c_value ALL ROWS PER MATCH PATTERN (PERMUTE(A, B?, C?)) DEFINE A AS event_type = 'start', B AS event_type = 'middle', C AS event_type = 'end' );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['seq'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='step', ordering='ASC', nulls_ordering=None)])
DEBUG:src.pars

Testing PERMUTE with Subset Variables - Trino Compatibility

Test 7: PERMUTE with Edge Cases - Testing optional elements
Pattern value: 'PERMUTE(A, B?, C?)'
Pattern value: 'PERMUTE(A, B?, C?)'
Creating transition for variable 'A' with condition: 'event_type = 'start''
Creating transition for variable 'B' with condition: 'event_type = 'middle''
Creating transition for variable 'C' with condition: 'event_type = 'end''
Initialized matcher with excluded variables: set()
Find matches with all_rows=True, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'seq': 1, 'step': 1, 'event_type': 'start', 'value': 100}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'seq': 1, 'step': 2, 'event_type': 'middle', 'value': 200}
  Evaluating condition for var: B
    Condi

# Example 2

In [22]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Validation query with ALL ROWS PER MATCH
query = """
    SELECT * FROM memory.default.employees MATCH_RECOGNIZE (
        PARTITION BY department, region
        ORDER BY hire_date
        MEASURES 
            salary AS current_salary,
            RUNNING SUM(salary) AS running_sum,
            MATCH_NUMBER() AS match_num
        ALL ROWS PER MATCH
        PATTERN (A+)
        DEFINE A AS salary > 1000
    );
    """
    
data = [
    {"id": 1, "name": "Alice",   "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob",     "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana",   "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
]
    
output_df = match_recognize(query, pd.DataFrame(data))
print("Match Recognize Output:")
print(output_df)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE ( PARTITION BY department, region ORDER BY hire_date MEASURES salary AS current_salary, RUNNING SUM(salary) AS running_sum, MATCH_NUMBER() AS match_num ALL ROWS PER MATCH PATTERN (A+) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Meas

Pattern value: 'A+'
Pattern value: 'A+'
Creating transition for variable 'A' with condition: 'salary > 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=True, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 1 to variable A
Reached accepting state 1 at row 1
  Current longest match: 0-1, vars: ['A']
Testing row 2, data: {'id': 3, 'name': 'Charlie', 'department': 'Sales', 'region': 'West', 'hire_date'

In [23]:
import pandas as pd
from src.executor.match_recognize import match_recognize
# Use an absolute import for match_recognize.

query = """
    SELECT * FROM memory.default.employees MATCH_RECOGNIZE (
        PARTITION BY department, region
        ORDER BY hire_date
        MEASURES salary AS avg_salary
        PATTERN (A+)
        DEFINE A AS salary > 1000
    );
    """
    
data = [
        {"id": 1, "name": "Alice",   "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
        {"id": 2, "name": "Bob",     "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
        {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
        {"id": 4, "name": "Diana",   "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
    ]
    
output_df = match_recognize(query, pd.DataFrame(data))
print("Match Recognize Output:")
print(output_df)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE ( PARTITION BY department, region ORDER BY hire_date MEASURES salary AS avg_salary PATTERN (A+) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='salary', alias='avg_salary', metadata={'semantics': 'RUNNING'}, is_class

Pattern value: 'A+'
Pattern value: 'A+'
Creating transition for variable 'A' with condition: 'salary > 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 1 to variable A
Reached accepting state 1 at row 1
  Current longest match: 0-1, vars: ['A']
Testing row 2, data: {'id': 3, 'name': 'Charlie', 'department': 'Sales', 'region': 'West', 'hire_date

In [24]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Validation query with ALL ROWS PER MATCH
query = """
    SELECT * FROM memory.default.employees MATCH_RECOGNIZE (
        PARTITION BY department, region
        ORDER BY hire_date
        MEASURES 
            salary AS current_salary,
            RUNNING SUM(salary) AS running_sum,
            MATCH_NUMBER() AS match_num
        ALL ROWS PER MATCH
        PATTERN (A*)
        DEFINE A AS salary > 1000
    );
    """
    
data = [
    {"id": 1, "name": "Alice",   "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob",     "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana",   "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
]
    
output_df = match_recognize(query, pd.DataFrame(data))
print("Match Recognize Output:")
print(output_df)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE ( PARTITION BY department, region ORDER BY hire_date MEASURES salary AS current_salary, RUNNING SUM(salary) AS running_sum, MATCH_NUMBER() AS match_num ALL ROWS PER MATCH PATTERN (A*) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Meas

Pattern value: 'A*'
Pattern value: 'A*'
Creating transition for variable 'A' with condition: 'salary > 1000'
Pattern allows empty matches - adding epsilon transition
Initialized matcher with excluded variables: set()
Find matches with all_rows=True, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Accept, Vars: A)
Found potential empty match at index 0 - start state is accepting
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 1 to variable A
Reached accepting state 1 at row 1
  Current longest match: 

In [25]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Validation query with ALL ROWS PER MATCH
query = """
SELECT *
FROM memory.default.employees 
MATCH_RECOGNIZE (
  PARTITION BY department, region
  ORDER BY hire_date
  MEASURES 
    A.salary AS starting_salary,
    LAST(C.salary) AS ending_salary,
    MATCH_NUMBER() AS match_num
  ONE ROW PER MATCH
  AFTER MATCH SKIP PAST LAST ROW
  PATTERN (A B+ C+)
  DEFINE 
    A AS salary > 1000,
    B AS salary < 1000,
    C AS salary > 1000
);


    """
    
data = [
    {"id": 1, "name": "Alice",   "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob",     "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana",   "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
]
    
output_df = match_recognize(query, pd.DataFrame(data))
print("Match Recognize Output:")
print(output_df)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE ( PARTITION BY department, region ORDER BY hire_date MEASURES A.salary AS starting_salary, LAST(C.salary) AS ending_salary, MATCH_NUMBER() AS match_num ONE ROW PER MATCH AFTER MATCH SKIP PAST LAST ROW PATTERN (A B+ C+) DEFINE A AS salary > 1000, B AS salary < 1000, C AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.

Pattern value: 'A B+ C+'
Pattern value: 'A B+ C+'
Creating transition for variable 'A' with condition: 'salary > 1000'
Creating transition for variable 'B' with condition: 'salary < 1000'
Creating transition for variable 'C' with condition: 'salary > 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: B
    Condition failed for B
No valid transition from state 1 at row 1
No match found starting at index 0
Starting match at index 1, state: State 0 (Non-accept, Vars: 

In [None]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Example query with comprehensive CLASSIFIER usage
query = """
SELECT * FROM memory.default.employees  MATCH_RECOGNIZE(
    PARTITION BY department, region
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        salary AS current_salary,
        RUNNING SUM(salary) AS running_sum
    ALL ROWS PER MATCH
    PATTERN (A C* {- B+ -} C+)
    DEFINE 
        A AS salary > 1000,
        B AS salary < 1000,
        C AS salary > 1000
);

"""

data = [
    {"id": 1, "name": "Alice", "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob",   "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana", "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
]


output_df = match_recognize(query, pd.DataFrame(data))
print("Match Recognize Output:")
print(output_df)

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE( PARTITION BY department, region ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, salary AS current_salary, RUNNING SUM(salary) AS running_sum ALL ROWS PER MATCH PATTERN (A C* {- B+ -} C+) DEFINE A AS salary > 1000, B AS salary < 1000, C AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extrac

Pattern value: 'A C* {- B+ -} C+'
Pattern value: 'A C* {- B+ -} C+'


DEBUG:src.parser.match_recognize_extractor:Subset components: set()
DEBUG:src.parser.match_recognize_extractor:Subset variables: {}
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: CLASSIFIER()
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: salary
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: SUM(salary)
DEBUG:src.parser.match_recognize_extractor:Extracted MATCH_RECOGNIZE clause via recursive search.


Creating transition for variable 'A' with condition: 'salary > 1000'
Creating transition for variable 'C' with condition: 'salary > 1000'
Creating transition for variable 'B' with condition: 'salary < 1000'
Creating transition for variable 'C' with condition: 'salary > 1000'
Pattern allows empty matches - adding epsilon transition
Exclusion handler found content: 'B+'
Exclusion handler added variable: 'B'
Initialized matcher with excluded variables: {'B'}
Find matches with all_rows=True, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Accept, Vars: A)
Found potential empty match at index 0 - start state is accepting
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 's

In [27]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Example query with comprehensive CLASSIFIER usage
query = """
SELECT * FROM memory.default.employees  MATCH_RECOGNIZE(
    PARTITION BY department, region
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        salary AS current_salary,
        RUNNING SUM(salary) AS running_sum
    ALL ROWS PER MATCH
    PATTERN (A {- B+ -} C+)
    DEFINE 
        A AS salary > 1000,
        B AS salary < 1000,
        C AS salary > 1000
);

"""

data = [
    {"id": 1, "name": "Alice", "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob",   "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana", "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
]


output_df = match_recognize(query, pd.DataFrame(data))
print("Match Recognize Output:")
print(output_df)

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE( PARTITION BY department, region ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, salary AS current_salary, RUNNING SUM(salary) AS running_sum ALL ROWS PER MATCH PATTERN (A {- B+ -} C+) DEFINE A AS salary > 1000, B AS salary < 1000, C AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor

Pattern value: 'A {- B+ -} C+'
Pattern value: 'A {- B+ -} C+'
Creating transition for variable 'A' with condition: 'salary > 1000'
Creating transition for variable 'B' with condition: 'salary < 1000'
Creating transition for variable 'C' with condition: 'salary > 1000'
Exclusion handler found content: 'B+'
Exclusion handler added variable: 'B'
Initialized matcher with excluded variables: {'B'}
Find matches with all_rows=True, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: B
    Condition failed for B
No valid transition from state 1 at row 1
No match 

In [5]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Example query with comprehensive CLASSIFIER usage
query = """
SELECT * FROM  memory.default.employees MATCH_RECOGNIZE(
    PARTITION BY department, region
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        CLASSIFIER(A) AS is_a_var,
        CLASSIFIER(C) AS is_c_var,
        salary AS current_salary,
        RUNNING SUM(salary) AS running_sum
    ONE ROW PER MATCH
    PATTERN (A {- B+ -} C+)
    DEFINE 
        A AS salary > 1000,
        B AS salary < 1000,
        C AS salary > 1000
);
"""

data = [
    {"id": 1, "name": "Alice", "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob",   "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana", "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
]


output_df = match_recognize(query, pd.DataFrame(data))
print("Match Recognize Output:")
print(output_df)

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE( PARTITION BY department, region ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, CLASSIFIER(A) AS is_a_var, CLASSIFIER(C) AS is_c_var, salary AS current_salary, RUNNING SUM(salary) AS running_sum ONE ROW PER MATCH PATTERN (A {- B+ -} C+) DEFINE A AS salary > 1000, B AS salary < 1000, C AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_orderi

Pattern value: 'A {- B+ -} C+'
Pattern value: 'A {- B+ -} C+'
Match Recognize Output:
  department region   hire_date  salary pattern_var is_a_var is_c_var  \
0      Sales   West  2021-01-02    1300           C        A        C   

   current_salary  running_sum  
0            1100       3300.0  


In [30]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Example query with comprehensive CLASSIFIER usage
query = """
SELECT * FROM memory.default.employees MATCH_RECOGNIZE(
    PARTITION BY department
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num
    ONE ROW PER MATCH
    PATTERN (^A+)
    DEFINE 
        A AS salary > 1000
);
"""

data = [
    {"id": 1, "name": "Alice", "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob",   "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana", "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
]


output_df = match_recognize(query, pd.DataFrame(data))
print("Match Recognize Output:")
print(output_df)

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE( PARTITION BY department ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num ONE ROW PER MATCH PATTERN (^A+) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='CLASSIFIER()', alias='pattern_var', 

Pattern value: '^A+'
Pattern value: '^A+'
Creating transition for variable 'A' with condition: 'salary > 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 1 to variable A
Reached accepting state 1 at row 1
  Current longest match: 0-1, vars: ['A']
Testing row 2, data: {'id': 3, 'name': 'Charlie', 'department': 'Sales', 'region': 'West', 'hire_da

# Example 3


In [31]:
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create test data with different departments to test partition behavior
data = [
    # Sales department - First row has high salary
    {"id": 1, "name": "Alice", "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob",   "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana", "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
    
    # Marketing department - Last row has high salary
    {"id": 5, "name": "Eve", "department": "Marketing", "region": "East", "hire_date": "2021-01-01", "salary": 900},
    {"id": 6, "name": "Frank", "department": "Marketing", "region": "East", "hire_date": "2021-01-02", "salary": 950},
    {"id": 7, "name": "Grace", "department": "Marketing", "region": "East", "hire_date": "2021-01-03", "salary": 980},
    {"id": 8, "name": "Henry", "department": "Marketing", "region": "East", "hire_date": "2021-01-04", "salary": 1200},
    
    # IT department - All rows have high salary
    {"id": 9, "name": "Ivy", "department": "IT", "region": "North", "hire_date": "2021-01-01", "salary": 1500},
    {"id": 10, "name": "Jack", "department": "IT", "region": "North", "hire_date": "2021-01-02", "salary": 1600},
    {"id": 11, "name": "Kate", "department": "IT", "region": "North", "hire_date": "2021-01-03", "salary": 1700},
    {"id": 12, "name": "Leo", "department": "IT", "region": "North", "hire_date": "2021-01-04", "salary": 1800},
    
    # HR department - No rows have high salary
    {"id": 13, "name": "Mike", "department": "HR", "region": "South", "hire_date": "2021-01-01", "salary": 950},
    {"id": 14, "name": "Nina", "department": "HR", "region": "South", "hire_date": "2021-01-02", "salary": 980},
    {"id": 15, "name": "Oscar", "department": "HR", "region": "South", "hire_date": "2021-01-03", "salary": 990},
    {"id": 16, "name": "Pam", "department": "HR", "region": "South", "hire_date": "2021-01-04", "salary": 995},
]

df = pd.DataFrame(data)

print("Testing Pattern Anchors\n")

# Test 1: Start anchor (^) - Should match patterns starting at the beginning of a partition
query_start_anchor = """
SELECT * FROM memory.default.orders MATCH_RECOGNIZE(
    PARTITION BY department
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num
    ONE ROW PER MATCH
    PATTERN (^A+)
    DEFINE 
        A AS salary > 1000
);
"""

print("Test 1: Start Anchor (^) - Should only match departments where first employee has salary > 1000")
output_df = match_recognize(query_start_anchor, df)
print(output_df)
print("\n")


Testing Pattern Anchors

Test 1: Start Anchor (^) - Should only match departments where first employee has salary > 1000


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.orders MATCH_RECOGNIZE( PARTITION BY department ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num ONE ROW PER MATCH PATTERN (^A+) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='CLASSIFIER()', alias='pattern_var', met

Pattern value: '^A+'
Pattern value: '^A+'
Creating transition for variable 'A' with condition: 'salary > 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 1 to variable A
Reached accepting state 1 at row 1
  Current longest match: 0-1, vars: ['A']
Testing row 2, data: {'id': 3, 'name': 'Charlie', 'department': 'Sales', 'region': 'West', 'hire_da

In [32]:

# Test 2: End anchor ($) - Should match patterns ending at the end of a partition
query_end_anchor = """
SELECT * FROM memory.default.orders MATCH_RECOGNIZE(
    PARTITION BY department
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num
    ONE ROW PER MATCH
    PATTERN (A+$)
    DEFINE 
        A AS salary > 1000
);
"""

print("Test 2: End Anchor ($) - Should only match departments where last employee has salary > 1000")
output_df = match_recognize(query_end_anchor, df)
print(output_df)
print("\n")



DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.orders MATCH_RECOGNIZE( PARTITION BY department ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num ONE ROW PER MATCH PATTERN (A+$) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='CLASSIFIER()', alias='pattern_var', met

Test 2: End Anchor ($) - Should only match departments where last employee has salary > 1000
Pattern value: 'A+$'
Pattern value: 'A+$'
Creating transition for variable 'A' with condition: 'salary > 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
End anchor failed: row_idx=0 is not at partition end
End anchor check failed for accepting state 1 at row 0
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 1 to variable A
End anchor failed: row_idx=1 is not at partition end

In [33]:
# Test 3: Both anchors (^$) - Should match patterns spanning the entire partition
query_both_anchors = """
SELECT * FROM memory.default.orders MATCH_RECOGNIZE(
    PARTITION BY department
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num
    ONE ROW PER MATCH
    PATTERN (^A+$)
    DEFINE 
        A AS salary > 1000
);
"""

print("Test 3: Both Anchors (^$) - Should only match departments where ALL employees have salary > 1000")
output_df = match_recognize(query_both_anchors, df)
print(output_df)
print("\n")


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.orders MATCH_RECOGNIZE( PARTITION BY department ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num ONE ROW PER MATCH PATTERN (^A+$) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='CLASSIFIER()', alias='pattern_var', me

Test 3: Both Anchors (^$) - Should only match departments where ALL employees have salary > 1000
Pattern value: '^A+$'
Pattern value: '^A+$'
Creating transition for variable 'A' with condition: 'salary > 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
End anchor failed: row_idx=0 is not at partition end
End anchor check failed for accepting state 1 at row 0
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 1 to variable A
End anchor failed: row_idx=1 is not at partiti

In [34]:

# Test 4: Start anchor with ALL ROWS PER MATCH to see the actual matched rows
query_start_all_rows = """
SELECT * FROM memory.default.orders MATCH_RECOGNIZE(
    PARTITION BY department
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num
    ALL ROWS PER MATCH
    PATTERN (^A+)
    DEFINE 
        A AS salary > 1000
);
"""

print("Test 4: Start Anchor (^) with ALL ROWS PER MATCH - Shows matched rows")
output_df = match_recognize(query_start_all_rows, df)
print(output_df)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.orders MATCH_RECOGNIZE( PARTITION BY department ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num ALL ROWS PER MATCH PATTERN (^A+) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='CLASSIFIER()', alias='pattern_var', me

Test 4: Start Anchor (^) with ALL ROWS PER MATCH - Shows matched rows
Pattern value: '^A+'
Pattern value: '^A+'


DEBUG:src.parser.match_recognize_extractor:Subset union variables: set()
DEBUG:src.parser.match_recognize_extractor:Subset components: set()
DEBUG:src.parser.match_recognize_extractor:Subset variables: {}
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: CLASSIFIER()
DEBUG:src.parser.match_recognize_extractor:Validated function usage for measure: MATCH_NUMBER()
DEBUG:src.parser.match_recognize_extractor:Extracted MATCH_RECOGNIZE clause via recursive search.


Creating transition for variable 'A' with condition: 'salary > 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=True, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 0 to variable A
Reached accepting state 1 at row 0
  Current longest match: 0-0, vars: ['A']
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition for var: A
    Condition passed for A
  Assigned row 1 to variable A
Reached accepting state 1 at row 1
  Current longest match: 0-1, vars: ['A']
Testing row 2, data: {'id': 3, 'name': 'Charlie', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-03', 'salary': 900}
  Evaluat

In [35]:
# Test PERMUTE functionality
query_permute = """
SELECT * FROM memory.default.orders MATCH_RECOGNIZE(
    PARTITION BY department
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        MATCH_NUMBER() AS match_num
    ONE ROW PER MATCH
    PATTERN (PERMUTE(A, B))
    DEFINE 
        A AS salary > 1200,
        B AS salary < 1000
);
"""

print("Test PERMUTE - Should match both orderings of A and B")
output_df = match_recognize(query_permute, df)
print(output_df)
print("\n")


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.orders MATCH_RECOGNIZE( PARTITION BY department ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, MATCH_NUMBER() AS match_num ONE ROW PER MATCH PATTERN (PERMUTE(A, B)) DEFINE A AS salary > 1200, B AS salary < 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='CLASSIFIE

Test PERMUTE - Should match both orderings of A and B
Pattern value: 'PERMUTE(A, B)'
Pattern value: 'PERMUTE(A, B)'
Creating transition for variable 'A' with condition: 'salary > 1200'
Creating transition for variable 'B' with condition: 'salary < 1000'
Initialized matcher with excluded variables: set()
Find matches with all_rows=False, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Non-accept, Vars: A, B)
Testing row 0, data: {'id': 1, 'name': 'Alice', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-01', 'salary': 1200}
  Evaluating condition for var: A
    Condition failed for A
  Evaluating condition for var: B
    Condition failed for B
No valid transition from state 0 at row 0
No match found starting at index 0
Starting match at index 1, state: State 0 (Non-accept, Vars: A, B)
Testing row 1, data: {'id': 2, 'name': 'Bob', 'department': 'Sales', 'region': 'West', 'hire_date': '2021-01-02', 'salary': 1300}
  Evaluating condition fo

## Exclusion Pattern Test Case

Testing the exclusion pattern `A C* {- B+ -} C+` that should match:
- Alice (A): salary > 1000 ✓
- Bob (C): salary > 1000 ✓  
- Charlie (excluded B): salary < 1000 - should be excluded but allow pattern to continue
- Diana (C): salary > 1000 ✓

Expected: Single match with all 4 rows, Charlie excluded from output

In [36]:
# Test exclusion pattern with the exact case from debug output
import pandas as pd
from src.executor.match_recognize import match_recognize

# Create the test data matching the debug output
exclusion_data = [
    {"id": 1, "name": "Alice", "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob", "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana", "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
]

exclusion_df = pd.DataFrame(exclusion_data)
print("Exclusion Test Data:")
print(exclusion_df)

# The query with exclusion pattern
exclusion_query = """
SELECT * FROM memory.default.employees MATCH_RECOGNIZE(
    PARTITION BY department, region
    ORDER BY hire_date
    MEASURES 
        CLASSIFIER() AS pattern_var,
        salary AS current_salary,
        RUNNING SUM(salary) AS running_sum
    ALL ROWS PER MATCH
    PATTERN (A C* {- B+ -} C+)
    DEFINE 
        A AS salary > 1000,
        B AS salary < 1000,
        C AS salary > 1000
);
"""

print("\nRunning exclusion pattern test...")
result = match_recognize(exclusion_query, exclusion_df)
print("\nResult:")
print(result)

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM memory.default.employees MATCH_RECOGNIZE( PARTITION BY department, region ORDER BY hire_date MEASURES CLASSIFIER() AS pattern_var, salary AS current_salary, RUNNING SUM(salary) AS running_sum ALL ROWS PER MATCH PATTERN (A C* {- B+ -} C+) DEFINE A AS salary > 1000, B AS salary < 1000, C AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='memory')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extrac

Exclusion Test Data:
   id     name department region   hire_date  salary
0   1    Alice      Sales   West  2021-01-01    1200
1   2      Bob      Sales   West  2021-01-02    1300
2   3  Charlie      Sales   West  2021-01-03     900
3   4    Diana      Sales   West  2021-01-04    1100

Running exclusion pattern test...
Pattern value: 'A C* {- B+ -} C+'
Pattern value: 'A C* {- B+ -} C+'
Creating transition for variable 'A' with condition: 'salary > 1000'
Creating transition for variable 'C' with condition: 'salary > 1000'
Creating transition for variable 'B' with condition: 'salary < 1000'
Creating transition for variable 'C' with condition: 'salary > 1000'
Pattern allows empty matches - adding epsilon transition
Exclusion handler found content: 'B+'
Exclusion handler added variable: 'B'
Initialized matcher with excluded variables: {'B'}
Find matches with all_rows=True, show_empty=True, include_unmatched=False
Starting match at index 0, state: State 0 (Accept, Vars: A)
Found potential e