In [1]:
import pandas as pd
from src.executor.match_recognize import match_recognize
# Example with your original query
data = [
    {"id": 1, "name": "Alice", "department": "Sales", "region": "West", 
     "hire_date": "2021-01-01", "salary": 1200},
    {"id": 2, "name": "Bob", "department": "Sales", "region": "West", 
     "hire_date": "2021-01-02", "salary": 1300},
    {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", 
     "hire_date": "2021-01-03", "salary": 900},
    {"id": 4, "name": "Diana", "department": "Sales", "region": "West", 
     "hire_date": "2021-01-04", "salary": 1100},
    # Add more departments/regions
    {"id": 5, "name": "Eve", "department": "Sales", "region": "East", 
     "hire_date": "2021-01-01", "salary": 1250},
    {"id": 6, "name": "Frank", "department": "Marketing", "region": "West", 
     "hire_date": "2021-01-01", "salary": 1150},
]
df = pd.DataFrame(data)

query = """
SELECT id, name 
FROM employees 
MATCH_RECOGNIZE (
    PARTITION BY department, region
    ORDER BY hire_date
    MEASURES salary AS avg_salary
    PATTERN (A)
    DEFINE A AS salary > 1000
);
"""

result = match_recognize(query, df)
print("Match Recognize Output:")
print(result)



DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT id, name FROM employees MATCH_RECOGNIZE ( PARTITION BY department, region ORDER BY hire_date MEASURES salary AS avg_salary PATTERN (A) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=id, metadata={}), SelectItem(expression=name, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='employees')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='salary', alias='avg_salary', metada

Match Recognize Output:
   id   name department region   hire_date  salary  avg_salary  MATCH_NUMBER
0   6  Frank  Marketing   West  2021-01-01    1150        1150             1
1   5    Eve      Sales   East  2021-01-01    1250        1250             1
2   1  Alice      Sales   West  2021-01-01    1200        1200             1
3   2    Bob      Sales   West  2021-01-02    1300        1300             2
4   4  Diana      Sales   West  2021-01-04    1100        1100             3


In [1]:
# test_match_recognize.py

import pandas as pd
from src.executor.match_recognize import match_recognize

def test_basic_pattern():
    """Test basic pattern matching with A+ pattern."""
    data = [
        {"id": 1, "name": "Alice", "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
        {"id": 2, "name": "Bob", "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
        {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
        {"id": 4, "name": "Diana", "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
    ]
    df = pd.DataFrame(data)
    
    query = """
    SELECT id, name 
    FROM employees 
    MATCH_RECOGNIZE (
        PARTITION BY department, region
        ORDER BY hire_date
        MEASURES salary AS avg_salary
        PATTERN (A)
        DEFINE A AS salary > 1000
    );
    """
    
    result = match_recognize(query, df)
    print("Basic Pattern Test:")
    print(result)

def test_price_trend():
    """Test price trend pattern (increasing then decreasing)."""
    data = [
        {"order_id": 1, "customer_id": "C1", "price": 100, "order_date": "2023-01-01"},
        {"order_id": 2, "customer_id": "C1", "price": 150, "order_date": "2023-01-02"},
        {"order_id": 3, "customer_id": "C1", "price": 200, "order_date": "2023-01-03"},
        {"order_id": 4, "customer_id": "C1", "price": 180, "order_date": "2023-01-04"},
        {"order_id": 5, "customer_id": "C1", "price": 160, "order_date": "2023-01-05"},
        {"order_id": 6, "customer_id": "C2", "price": 120, "order_date": "2023-01-01"},
        {"order_id": 7, "customer_id": "C2", "price": 140, "order_date": "2023-01-02"},
        {"order_id": 8, "customer_id": "C2", "price": 130, "order_date": "2023-01-03"},
    ]
    df = pd.DataFrame(data)
    
    query = """
    SELECT *
    FROM orders
    MATCH_RECOGNIZE (
        PARTITION BY customer_id
        ORDER BY order_date
        MEASURES 
            FIRST(A.price) AS start_price,
            LAST(A.price) AS peak_price,
            LAST(B.price) AS end_price
        ONE ROW PER MATCH
        AFTER MATCH SKIP TO NEXT ROW
        PATTERN (A+ B+)
        DEFINE
            A AS PREV(price) IS NULL OR price > PREV(price),
            B AS price < PREV(price)
    );
    """
    
    result = match_recognize(query, df)
    print("\nPrice Trend Test:")
    print(result)

def test_complex_pattern():
    """Test complex pattern with alternation and quantifiers."""
    data = [
        {"event_id": 1, "sequence": "A", "value": 10, "timestamp": "2023-01-01 10:00:00"},
        {"event_id": 2, "sequence": "B", "value": 20, "timestamp": "2023-01-01 10:01:00"},
        {"event_id": 3, "sequence": "A", "value": 30, "timestamp": "2023-01-01 10:02:00"},
        {"event_id": 4, "sequence": "C", "value": 40, "timestamp": "2023-01-01 10:03:00"},
        {"event_id": 5, "sequence": "B", "value": 50, "timestamp": "2023-01-01 10:04:00"},
        {"event_id": 6, "sequence": "A", "value": 60, "timestamp": "2023-01-01 10:05:00"},
    ]
    df = pd.DataFrame(data)
    
    query = """
    SELECT *
    FROM events
    MATCH_RECOGNIZE (
        ORDER BY timestamp
        MEASURES 
            FIRST(A.value) AS first_value,
            LAST(B.value) AS last_value,
            COUNT(*) AS pattern_length
        ALL ROWS PER MATCH
        PATTERN ((A B+) | (B A+))
        SUBSET U = (A, B)
        DEFINE
            A AS sequence = 'A',
            B AS sequence = 'B'
    );
    """
    
    result = match_recognize(query, df)
    print("\nComplex Pattern Test:")
    print(result)

def test_running_measures():
    """Test running measures and aggregations."""
    data = [
        {"id": 1, "metric": 100, "timestamp": "2023-01-01"},
        {"id": 2, "metric": 110, "timestamp": "2023-01-02"},
        {"id": 3, "metric": 120, "timestamp": "2023-01-03"},
        {"id": 4, "metric": 115, "timestamp": "2023-01-04"},
        {"id": 5, "metric": 125, "timestamp": "2023-01-05"},
    ]
    df = pd.DataFrame(data)
    
    query = """
    SELECT *
    FROM metrics
    MATCH_RECOGNIZE (
        ORDER BY timestamp
        MEASURES 
            RUNNING AVG(A.metric) AS running_avg,
            FINAL AVG(A.metric) AS final_avg,
            RUNNING COUNT(*) AS running_count
        ALL ROWS PER MATCH
        PATTERN (A+)
        DEFINE
            A AS PREV(metric) IS NULL OR metric >= PREV(metric)
    );
    """
    
    result = match_recognize(query, df)
    print("\nRunning Measures Test:")
    print(result)

def test_unmatched_rows():
    """Test handling of unmatched rows."""
    data = [
        {"id": 1, "value": 100, "category": "A"},
        {"id": 2, "value": 90, "category": "B"},
        {"id": 3, "value": 80, "category": "A"},
        {"id": 4, "value": 110, "category": "B"},
        {"id": 5, "value": 120, "category": "A"},
    ]
    df = pd.DataFrame(data)
    
    query = """
    SELECT *
    FROM data
    MATCH_RECOGNIZE (
        PARTITION BY category
        ORDER BY id
        MEASURES 
            FIRST(A.value) AS start_value,
            LAST(A.value) AS end_value
        ALL ROWS PER MATCH WITH UNMATCHED ROWS
        PATTERN (A+)
        DEFINE
            A AS PREV(value) IS NULL OR value > PREV(value)
    );
    """
    
    result = match_recognize(query, df)
    print("\nUnmatched Rows Test:")
    print(result)

if __name__ == "__main__":
    # Run all tests
    test_basic_pattern()
    test_price_trend()
    test_complex_pattern()
    test_running_measures()
    test_unmatched_rows()

DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT id, name FROM employees MATCH_RECOGNIZE ( PARTITION BY department, region ORDER BY hire_date MEASURES salary AS avg_salary PATTERN (A) DEFINE A AS salary > 1000 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=id, metadata={}), SelectItem(expression=name, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='employees')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['department', 'region'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='hire_date', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='salary', alias='avg_salary', metada

AttributeError: 'dict' object has no attribute 'condition'