In [3]:
from src.parser.match_recognize_extractor import parse_full_query

if __name__ == "__main__":
    sample_query = """
       SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price
                ONE ROW PER MATCH
                PATTERN (A B+)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """



    components = parse_full_query(sample_query)
    print(components)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders MATCH_RECOGNIZE( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price, LAST(B.totalprice) AS bottom_price ONE ROW PER MATCH PATTERN (A B+) DEFINE B AS totalprice < PREV(totalprice) );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_pr

FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False), Measure(expression='LAST(B.totalprice)', alias='bottom_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=ONE ROW PER MATCH,
  after_match_skip=None,
  pattern=PatternClause(pattern='A B+', metadata={'variables': ['A', 'B+'], 'base_variables': ['A', 'B']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='B', condition='totalprice<PREV(totalprice)')])
),
  metadata={}
)


In [5]:
from src.parser.match_recognize_extractor import parse_full_query
from src.ast.ast_nodes import FullQueryAST

# Define a list of test queries to cover edge cases and full coverage
test_queries = [
    # 1. Basic ONE ROW PER MATCH
    {
        "name": "Basic One Row Per Match",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price
                ONE ROW PER MATCH
                PATTERN (A B+)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 2. ALL ROWS PER MATCH with SHOW EMPTY MATCHES, SUBSET clause and union variable
    {
        "name": "All Rows Per Match - Show Empty Matches",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price,
                    LAST(U.totalprice) AS top_price
                ALL ROWS PER MATCH SHOW EMPTY MATCHES
                PATTERN (A | B | C | D)
                SUBSET U = (C, D)
                DEFINE
                    B AS totalprice < PREV(totalprice),
                    C AS totalprice > PREV(totalprice) AND totalprice <= A.totalprice,
                    D AS totalprice > PREV(totalprice)
            );
        """
    },
    # 3. ALL ROWS PER MATCH with OMIT EMPTY MATCHES
    {
        "name": "All Rows Per Match - Omit Empty Matches",
        "query": """
            SELECT * FROM transactions MATCH_RECOGNIZE(
                PARTITION BY account_id
                ORDER BY transaction_date
                MEASURES
                    A.amount AS start_amount,
                    C.amount AS final_amount
                ALL ROWS PER MATCH OMIT EMPTY MATCHES
                PATTERN (A | B |C)
                DEFINE
                    B AS amount < PREV(amount),
                    C AS amount > PREV(amount)
            );
        """
    },
    # 4. ALL ROWS PER MATCH WITH UNMATCHED ROWS
    {
        "name": "All Rows Per Match - With Unmatched Rows",
        "query": """
            SELECT * FROM sales MATCH_RECOGNIZE(
                PARTITION BY region
                ORDER BY sale_date
                MEASURES
                    A.price AS initial_price,
                    D.price AS final_price,
                    MATCH_NUMBER() AS match_id
                ALL ROWS PER MATCH WITH UNMATCHED ROWS
                PATTERN (A B C D)
                DEFINE
                    B AS price < PREV(price),
                    C AS price > PREV(price),
                    D AS price > A.price
            );
        """
    },
    # 5. Nested Pattern: union variables and repeated groups with WITH UNMATCHED ROWS
    {
        "name": "Nested Pattern with Unmatched Rows",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    F.totalprice AS ending_price
                ALL ROWS PER MATCH WITH UNMATCHED ROWS
                PATTERN (A (B C | D E)+ F)
                DEFINE
                    B AS totalprice < PREV(totalprice),
                    C AS totalprice > PREV(totalprice),
                    D AS totalprice = PREV(totalprice),
                    E AS totalprice >= PREV(totalprice)
            );
        """
    },
    # 6. Using Classifier() and MATCH_NUMBER() functions
    {
        "name": "Classifier and Match Number Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    CLASSIFIER() AS pattern_type,
                    MATCH_NUMBER() AS match_id,
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 7. Running vs Final Semantics in MEASURES
    {
        "name": "Running vs Final Semantics in Measures",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    RUNNING LAST(A.totalprice) AS running_last_price,
                    FINAL LAST(A.totalprice) AS final_last_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 8. Aggregates with Running vs Final semantics (avg and count)
    {
        "name": "Running Avg vs Final Count Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    RUNNING avg(A.totalprice) AS running_avg,
                    FINAL count(A.*) AS final_count
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 9. Empty Match: pattern that produces an empty match for every row
    {
        "name": "Empty Match and Unmatched Rows",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    MATCH_NUMBER() AS match_id
                ONE ROW PER MATCH
                PATTERN (())
            );
        """
    },
    # 10. Physical Navigation Functions: PREV() and NEXT()
    {
        "name": "Physical Navigation Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    PREV(A.totalprice, 2) AS prev_price,
                    NEXT(A.totalprice, 1) AS next_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 11. Nested Navigation Function
    {
        "name": "Nested Navigation Function",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    PREV(FIRST(A.totalprice, 3), 2) AS nested_nav_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # --- Additional Edge Cases ---
    # 12. Unconventional Whitespace and Nested Functions in Complex Pattern
    {
        "name": "Complex Pattern with Unconventional Whitespace",
        "query": """
            SELECT *
            FROM   orders
            MATCH_RECOGNIZE(
                PARTITION BY   custkey
                ORDER BY   orderdate
                MEASURES
                  PREV(  FIRST( A.totalprice , 3 )  , 2 ) AS nested_nav_price
                ONE ROW PER MATCH
                PATTERN (  A   B+   C )
                DEFINE
                  B AS    totalprice < PREV(  totalprice )
            );
        """
    },
    # 13. Missing ORDER BY clause (should raise an error)
    {
        "name": "Error: Missing ORDER BY Clause",
        "query": """
            SELECT *
            FROM orders
            MATCH_RECOGNIZE(
                PARTITION BY custkey
                MEASURES
                    A.totalprice AS starting_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 14. Missing PATTERN clause when DEFINE is provided (should raise an error)
    {
        "name": "Error: Missing PATTERN Clause",
        "query": """
            SELECT *
            FROM orders
            MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 15. Malformed PATTERN clause (missing closing parenthesis)
    {
        "name": "Error: Malformed PATTERN Clause",
        "query": """
            SELECT *
            FROM orders
            MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price
                ONE ROW PER MATCH
                PATTERN (A B+ C
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 16. Malformed DEFINE clause (missing 'AS' keyword)
    {
        "name": "Error: Malformed DEFINE Clause",
        "query": """
            SELECT *
            FROM orders
            MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B totalprice < PREV(totalprice)
            );
        """
    }
]

# Run all test cases and print the resulting AST or error message
for test in test_queries:
    print(f"🔹 Running Test: {test['name']}")
    try:
        query_ast = parse_full_query(test["query"])
        print(query_ast)
    except Exception as e:
        print(f"Error: {e}")
    print("=" * 80)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders MATCH_RECOGNIZE( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price, LAST(B.totalprice) AS bottom_price ONE ROW PER MATCH PATTERN (A B+) DEFINE B AS totalprice < PREV(totalprice) );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_pr

🔹 Running Test: Basic One Row Per Match
FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False), Measure(expression='LAST(B.totalprice)', alias='bottom_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=ONE ROW PER MATCH,
  after_match_skip=None,
  pattern=PatternClause(pattern='A B+', metadata={'variables': ['A', 'B+'], 'base_variables': ['A', 'B']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='B', condition='totalprice<PREV(totalprice)')])
),
  metadata={}
)
🔹 Runn

In [None]:
testing

In [3]:
from src.parser.match_recognize_extractor import parse_full_query

# Define a list of test cases with descriptions and query strings.
sample_queries = [
    {
        "description": "Simple pattern with whitespace",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A b+ c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern without whitespace",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (Ab+c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    },
    {
        "description": "Single word pattern (one token)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES vfggf.totalprice AS starting_price
    PATTERN (vfggf)
    DEFINE vfggf AS totalprice > 0
);
"""
    },
    {
        "description": "Pattern with PERMUTE construct",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES LAST(U.totalprice) AS top_price
    PATTERN (PERMUTE(A,B,C))
    SUBSET U = (A,B,C)
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern with grouping and quantifiers",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN ((A B+ C+))
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    },
    {
        "description": "Mixed pattern: whitespace vs. no whitespace",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A b+ c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern with quantifiers (greedy)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
    PATTERN (A{2,4})
    DEFINE A AS totalprice > 0
);
"""
    },
    {
        "description": "Pattern with quantifiers (reluctant)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
    PATTERN (A{2,4}?)
    DEFINE A AS totalprice > 0
);
"""
    },
    {
        "description": "Empty PATTERN clause (allowed as empty match)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES NULL AS starting_price
    PATTERN ()
    DEFINE A AS totalprice > 0
);
"""
    },
    {
        "description": "Pattern with irregular whitespace and tabs",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY   custkey
    ORDER BY    orderdate
    MEASURES   A.totalprice   AS   starting_price
    PATTERN (   A   \t   b+   \t  c*   )
    DEFINE   A   AS totalprice > 0,   b   AS totalprice < PREV(totalprice),   c   AS totalprice < 100
);
"""
    },
    {
        "description": "Error: Undefined variable in DEFINE (missing c)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A b+ c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice)
);
"""
    },
    {
        "description": "Error: Extra variable in DEFINE (c extra)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    PATTERN (A b+)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern with bounded quantifiers preserving commas",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A{2,4} B+)
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice)
);
"""
    },
    {
        "description": "Complex nested grouping with mixed quantifiers",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (((AB+)(CD{2,4}?))E+)
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice > 50, D AS totalprice < 100, E AS totalprice BETWEEN 50 AND 150
);
"""
    },
    {
        "description": "Error: Undefined variable in DEFINE (extra variable missing in pattern)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    PATTERN (A b+ c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice)
);
"""
    }
]

# Run test cases
for test in sample_queries:
    print(f"--- Running Test: {test['description']} ---")
    try:
        result = parse_full_query(test["query"])
        print("Test Passed: Query parsed successfully.")
        print(result)
    except Exception as e:
        print("Test Failed with error:")
        print(e)
    print("=" * 80)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders MATCH_RECOGNIZE( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price PATTERN (A b+ c*) DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', m

--- Running Test: Simple pattern with whitespace ---
Test Passed: Query parsed successfully.
FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=None,
  after_match_skip=None,
  pattern=PatternClause(pattern='Ab+c*', metadata={'variables': ['A', 'b+', 'c*'], 'base_variables': ['A', 'b', 'c']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='A', condition='totalprice>0'), Define(variable='b', condition='totalprice<PREV(totalprice)'), Define(variable='c', condition='totalprice<100')])
),
  metadata={}
)

In [None]:
from src.parser.match_recognize_extractor import parse_full_query

def run_test(query, description):
    print(f"--- Running Test: {description} ---")
    try:
        ast = parse_full_query(query)
        print("Test Passed: Query parsed successfully.")
        print(ast)
    except Exception as e:
        print("Test Failed with error:")
        print(e)
    print("="*80)

if __name__ == "__main__":
    # 1. Alternation using '|' operator
    test_query1 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A | B | C)
    DEFINE A AS totalprice > 100, B AS totalprice < 50, C AS totalprice BETWEEN 50 AND 100
);
"""
    run_test(test_query1, "Alternate forms: Alternation using '|' operator")

    # 2. Grouping
    test_query2 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN ((A B) C)
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    run_test(test_query2, "Alternate forms: Grouping")

    # 3. Permutation
    test_query3 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES LAST(U.totalprice) AS top_price
    PATTERN (PERMUTE(A, B, C))
    SUBSET U = (A, B, C)
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    run_test(test_query3, "Alternate forms: Permutation")

    # 4. Quantifiers with bounds (Greedy)
    test_query4a = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
    PATTERN (A{2,4})
    DEFINE A AS totalprice > 0
);
"""
    run_test(test_query4a, "Quantifiers with bounds: Greedy")

    # 4. Quantifiers with bounds (Reluctant)
    test_query4b = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
    PATTERN (A{2,4}?)
    DEFINE A AS totalprice > 0
);
"""
    run_test(test_query4b, "Quantifiers with bounds: Reluctant")

    # 5. Valid Navigation and Aggregates
    test_query5 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES LAST(A.totalprice) AS final_price, avg(A.totalprice) AS avg_price
    PATTERN (A+)
    DEFINE A AS totalprice > 0
);
"""
    run_test(test_query5, "Valid Navigation and Aggregates")

    # 6. Invalid Navigation Function (should fail)
    test_query6 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES LAST(1) AS invalid_nav
    PATTERN (A+)
    DEFINE A AS totalprice > 0
);
"""
    run_test(test_query6, "Invalid Navigation Function (should fail)")

    # 7. Invalid Nested Aggregate in Navigation (should fail)
    test_query7 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES FIRST(avg(A.totalprice),2) AS invalid_agg_nav
    PATTERN (A+)
    DEFINE A AS totalprice > 0
);
"""
    run_test(test_query7, "Invalid Nested Aggregate in Navigation (should fail)")

    # 8. Case-sensitive identifier validation
    test_query8 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A b+ c)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    run_test(test_query8, "Case-sensitive identifier validation")

    # 9. Grouping with Quantifiers
    test_query9 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN ((A B+ C+))
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    run_test(test_query9, "Alternate forms: Grouping with quantifiers")

    # 10. Aggregate with inconsistent variable references (should fail)
    test_query10 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES max_by(A.totalprice, B.totalprice) AS invalid_agg
    PATTERN (A B+ C)
    DEFINE B AS totalprice < PREV(totalprice)
);
"""
    run_test(test_query10, "Aggregate with inconsistent variable references (should fail)")

    # 11. Aggregate with consistent variable references
    test_query11 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES avg(A.totalprice) AS avg_price, count(A.*) AS count_price
    PATTERN (A B+ C)
    DEFINE B AS totalprice < PREV(totalprice)
);
"""
    run_test(test_query11, "Aggregate with consistent variable references")

    # 12. Navigation and Aggregate with MATCH_NUMBER and avg
    test_query12 = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES MATCH_NUMBER() AS match_id, avg(A.totalprice) AS avg_price
    PATTERN (A B+ C)
    DEFINE B AS totalprice < PREV(totalprice)
);
"""
    run_test(test_query12, "MATCH_NUMBER with aggregates")


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders MATCH_RECOGNIZE( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price PATTERN (A | B | C) DEFINE A AS totalprice > 100, B AS totalprice < 50, C AS totalprice BETWEEN 50 AND 100 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price'

--- Running Test: Alternate forms: Alternation using '|' operator ---
Test Passed: Query parsed successfully.
FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=None,
  after_match_skip=None,
  pattern=PatternClause(pattern='A | B | C', metadata={'variables': ['A', 'B', 'C'], 'base_variables': ['A', 'B', 'C']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='A', condition='totalprice>100'), Define(variable='B', condition='totalprice<50'), Define(variable='C', condition='totalpriceBETWEEN50AND100')])


In [None]:
from src.parser.match_recognize_extractor import parse_full_query

def run_test(query: str, description: str):
    print(f"--- Running Test: {description} ---")
    try:
        ast = parse_full_query(query)
        print("Test Passed: Query parsed successfully.")
        print(ast)
    except Exception as e:
        print("Test Failed with error:")
        print(e)
    print("=" * 80)

if __name__ == "__main__":
    # 1. Alternate forms: Alternation using '|' operator
    query1 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES A.totalprice AS starting_price
        PATTERN (A | B | C)
        DEFINE A AS totalprice > 100, B AS totalprice < 50, C AS totalprice BETWEEN 50 AND 100
    );
    """
    run_test(query1, "Alternate forms: Alternation using '|' operator")

    # 2. Alternate forms: Grouping (using parentheses)
    query2 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES A.totalprice AS starting_price
        PATTERN ((A B C))
        DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
    );
    """
    run_test(query2, "Alternate forms: Grouping")

    # 3. Alternate forms: Permutation
    query3 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES LAST(U.totalprice) AS top_price
        PATTERN (PERMUTE(A, B, C))
        SUBSET U = (A, B, C)
        DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
    );
    """
    run_test(query3, "Alternate forms: Permutation")

    # 4a. Quantifiers with bounds: Greedy
    query4a = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
        PATTERN (A{2,4})
        DEFINE A AS totalprice > 0
    );
    """
    run_test(query4a, "Quantifiers with bounds: Greedy")

    # 4b. Quantifiers with bounds: Reluctant
    query4b = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
        PATTERN (A{2,4}?)
        DEFINE A AS totalprice > 0
    );
    """
    run_test(query4b, "Quantifiers with bounds: Reluctant")

    # 5. Valid Navigation and Aggregates
    query5 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES LAST(A.totalprice) AS final_price, avg(A.totalprice) AS avg_price
        PATTERN (A+)
        DEFINE A AS totalprice > 0
    );
    """
    run_test(query5, "Valid Navigation and Aggregates")

    # 6. Invalid Navigation Function (should fail)
    query6 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES LAST(1) AS invalid_nav
        PATTERN (A+)
        DEFINE A AS totalprice > 0
    );
    """
    run_test(query6, "Invalid Navigation Function (should fail)")

    # 7. Invalid Nested Aggregate in Navigation (should fail)
    query7 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES FIRST(avg(A.totalprice),2) AS invalid_agg_nav
        PATTERN (A+)
        DEFINE A AS totalprice > 0
    );
    """
    run_test(query7, "Invalid Nested Aggregate in Navigation (should fail)")

    # 8. Case-Sensitive Identifier Validation
    query8 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES A.totalprice AS starting_price
        PATTERN (A b+ c)
        DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
    );
    """
    run_test(query8, "Case-Sensitive Identifier Validation")

    # 9. Grouping with Quantifiers
    query9 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES A.totalprice AS starting_price
        PATTERN ((A B+ C+))
        DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
    );
    """
    run_test(query9, "Grouping with Quantifiers")

    # 10. Aggregate with inconsistent variable references (should fail)
    query10 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES max_by(A.totalprice, B.totalprice) AS invalid_agg
        PATTERN (A B+ C)
        DEFINE B AS totalprice < PREV(totalprice)
    );
    """
    run_test(query10, "Aggregate with Inconsistent Variable References (should fail)")

    # 11. Aggregate with consistent variable references
    query11 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES avg(A.totalprice) AS avg_price, count(A.*) AS count_price
        PATTERN (A B+ C)
        DEFINE B AS totalprice < PREV(totalprice), A AS TRUE, C AS TRUE
    );
    """
    run_test(query11, "Aggregate with Consistent Variable References")

    # 12. MATCH_NUMBER with aggregates
    query12 = """
    SELECT * FROM orders MATCH_RECOGNIZE(
        PARTITION BY custkey
        ORDER BY orderdate
        MEASURES MATCH_NUMBER() AS match_id, avg(A.totalprice) AS avg_price
        PATTERN (A B+ C)
        DEFINE B AS totalprice < PREV(totalprice), A AS TRUE, C AS TRUE
    );
    """
    run_test(query12, "MATCH_NUMBER with Aggregates")


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders MATCH_RECOGNIZE( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price PATTERN (A | B | C) DEFINE A AS totalprice > 100, B AS totalprice < 50, C AS totalprice BETWEEN 50 AND 100 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause via robust splitting: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause via regex: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.tota

--- Running Test: Alternate forms: Alternation using '|' operator ---
Test Passed: Query parsed successfully.
FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=None,
  after_match_skip=None,
  pattern=PatternClause(pattern='A | B | C', metadata={'variables': ['A', 'B', 'C'], 'base_variables': ['A', 'B', 'C']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='A', condition='totalprice>100'), Define(variable='B', condition='totalprice<50'), Define(variable='C', condition='totalpriceBETWEEN50AND100')])


In [2]:
from src.parser.match_recognize_extractor import parse_match_recognize_query

# Define a list of test cases with a description and query string.
sample_queries = [
    {
        "description": "Simple pattern with whitespace",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A b+ c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern without whitespace",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (Ab+c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    },
    {
        "description": "Single word pattern (one token)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES vfggf.totalprice AS starting_price
    PATTERN (vfggf)
    DEFINE vfggf AS totalprice > 0
);
"""
    },
    {
        "description": "Pattern with PERMUTE construct",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES LAST(U.totalprice) AS top_price
    PATTERN (PERMUTE(A,B,C))
    SUBSET U = (A,B,C)
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern with grouping and quantifiers",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN ((A B+ C+))
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    },
    {
        "description": "Mixed pattern: whitespace vs. no whitespace",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A b+ c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern with quantifiers (greedy)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
    PATTERN (A{2,4})
    DEFINE A AS totalprice > 0
);
"""
    },
    {
        "description": "Pattern with quantifiers (reluctant)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
    PATTERN (A{2,4}?)
    DEFINE A AS totalprice > 0
);
"""
    }
]

# Iterate over each test case.
for test in sample_queries:
    print(f"--- Running Test: {test['description']} ---")
    try:
        result = parse_match_recognize_query(test["query"])
        print("Test Passed: Query parsed successfully.")
        print(result)
    except Exception as e:
        print("Test Failed with error:")
        print(e)
    print("=" * 80)


DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)])
DEBUG:src.parser.match_recognize_extractor:Extracted Pattern: PatternClause(pattern='A b+ c*', metadata={'variables': ['A', 'b+', 'c*'], 'base_variables': ['A', 'b', 'c']})
DEBUG:src.parser.match_recognize_extractor:Extracted DEFINE: DefineClause(definitions=[Define(variable='A', condition='totalprice>0'), Define(variable='b', condition='totalprice<PREV(totalprice)'), Define(variable='c', condition='totalprice<100')])
DEBUG:sr

--- Running Test: Simple pattern with whitespace ---
Test Passed: Query parsed successfully.
MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=None,
  after_match_skip=None,
  pattern=PatternClause(pattern='A b+ c*', metadata={'variables': ['A', 'b+', 'c*'], 'base_variables': ['A', 'b', 'c']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='A', condition='totalprice>0'), Define(variable='b', condition='totalprice<PREV(totalprice)'), Define(variable='c', condition='totalprice<100')])
)
--- Running Test: Pattern without whitespace ---
Test Passed: Query parsed successfully.
MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),

In [1]:
from src.parser.match_recognize_extractor import parse_match_recognize_query

sample_queries = [
    {
        "description": "Simple pattern with whitespace",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (A b+ c*)
    DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern without whitespace",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (AB+C*)
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    },
    {
        "description": "Single word pattern (one token)",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES vfggf.totalprice AS starting_price
    PATTERN (vfggf)
    DEFINE vfggf AS totalprice > 0
);
"""
    },
    {
        "description": "Pattern with PERMUTE construct",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES LAST(U.totalprice) AS top_price
    PATTERN (PERMUTE(A,B,C))
    SUBSET U = (A,B,C)
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    },
    {
        "description": "Pattern with grouping and quantifiers",
        "query": """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN ((A B+ C+))
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);
"""
    }
]

for test in sample_queries:
    print(f"--- Running Test: {test['description']} ---")
    try:
        result = parse_match_recognize_query(test["query"])
        print("Test Passed: Query parsed successfully.")
        print(result)
    except Exception as e:
        print("Test Failed with error:")
        print(e)
    print("=" * 80)


--- Running Test: Simple pattern with whitespace ---


DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)])
DEBUG:src.parser.match_recognize_extractor:Extracted Pattern: PatternClause(pattern='A b+ c*', metadata={'variables': ['A', 'b+', 'c*'], 'base_variables': ['A', 'b', 'c']})
DEBUG:src.parser.match_recognize_extractor:Extracted DEFINE: DefineClause(definitions=[Define(variable='A', condition='totalprice>0'), Define(variable='b', condition='totalprice<PREV(totalprice)'), Define(variable='c', condition='totalprice<100')])
DEBUG:sr

Test Passed: Query parsed successfully.
MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=None,
  after_match_skip=None,
  pattern=PatternClause(pattern='A b+ c*', metadata={'variables': ['A', 'b+', 'c*'], 'base_variables': ['A', 'b', 'c']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='A', condition='totalprice>0'), Define(variable='b', condition='totalprice<PREV(totalprice)'), Define(variable='c', condition='totalprice<100')])
)
--- Running Test: Pattern without whitespace ---
Test Passed: Query parsed successfully.
MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column

In [3]:
from src.parser.match_recognize_extractor import parse_match_recognize_query

if __name__ == "__main__":
    sample_query = """
SELECT * FROM orders MATCH_RECOGNIZE(
    PARTITION BY custkey
    ORDER BY orderdate
    MEASURES A.totalprice AS starting_price
    PATTERN (PERMUTE(A,B*,C))
    DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
);

    """

    components = parse_match_recognize_query(sample_query)
    print(components)


DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)])
DEBUG:src.parser.match_recognize_extractor:Extracted Pattern: PatternClause(pattern='PERMUTE(A,B*,C)', metadata={'variables': ['AB*', 'C'], 'base_variables': ['AB', 'C']})
DEBUG:src.parser.match_recognize_extractor:Extracted DEFINE: DefineClause(definitions=[Define(variable='A', condition='totalprice>0'), Define(variable='B', condition='totalprice<PREV(totalprice)'), Define(variable='C', condition='totalprice<100')])
DEBUG:src

MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=None,
  after_match_skip=None,
  pattern=PatternClause(pattern='PERMUTE(A,B*,C)', metadata={'variables': ['A', 'B*', 'C'], 'base_variables': ['A', 'B', 'C']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='A', condition='totalprice>0'), Define(variable='B', condition='totalprice<PREV(totalprice)'), Define(variable='C', condition='totalprice<100')])
)


In [None]:
from src.parser.match_recognize_extractor import parse_full_query
import logging

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

test_queries = [
    {
        "description": "Simple pattern with wildcard",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price
                PATTERN (A b+ c*)
                DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
            );
        """
    },
    {
        "description": "Pattern without whitespace in pattern",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price
                PATTERN (AB+C)
                DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
            );
        """
    },
    {
        "description": "Pattern with PERMUTE, grouping, and quantifiers",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES LAST(U.totalprice) AS top_price
                PATTERN (PERMUTE(A,B,C))
                SUBSET U = (A,B,C)
                DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
            );
        """
    },
    {
        "description": "Select specific columns",
        "query": """
            SELECT orderid, custkey, orderdate FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price
                PATTERN (A b+ c*)
                DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
            );
        """
    },
    {
        "description": "Select with aliasing",
        "query": """
            SELECT orderid AS id, totalprice AS price FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES LAST(A.totalprice) AS ending_price
                PATTERN (A{2,4}?)
                DEFINE A AS totalprice > 0
            );
        """
    },
    {
        "description": "Select with DISTINCT",
        "query": """
            SELECT DISTINCT orderid, totalprice FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price
                PATTERN (A b+ c*)
                DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
            );
        """
    },
    {
        "description": "Select with function calls",
        "query": """
            SELECT MAX(totalprice) AS max_price, MIN(totalprice) AS min_price FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price
                PATTERN (A b+ c*)
                DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
            );
        """
    },
    {
        "description": "Pattern with quantifiers (greedy and reluctant)",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
                PATTERN (A{2,4}? )
                DEFINE A AS totalprice > 0
            );
        """
    },
]


def run_tests():
    for test in test_queries:
        print("\n--- Running Test: {} ---".format(test["description"]))
        try:
            ast = parse_full_query(test["query"])
            print("Test Passed: Query parsed successfully.")
            print(ast)
        except Exception as e:
            print("Test Failed with error:")
            print(e)
            print("Query:")
            print(test["query"])

if __name__ == "__main__":
    run_tests()


In [4]:
import logging

# Import your SQL parser function here; adjust the import as needed.
from src.parser.match_recognize_extractor import parse_full_query

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

# A comprehensive list of test queries covering various SQL components after SELECT.
test_queries = [
    {
        "description": "Wildcard Column Selection",
        "query": "SELECT * FROM orders;"
    },
    {
        "description": "Specific Column Selection",
        "query": "SELECT orderid, custkey, orderdate FROM orders;"
    },
    {
        "description": "Expressions & Aliases",
        "query": "SELECT orderid, totalprice * 0.9 AS discounted_price FROM orders;"
    },
    {
        "description": "Aggregate Functions",
        "query": "SELECT COUNT(*) AS total_orders, SUM(totalprice) AS total_sales FROM orders;"
    },
    {
        "description": "WHERE Clause",
        "query": "SELECT orderid, totalprice FROM orders WHERE totalprice > 100;"
    },
    {
        "description": "ORDER BY Clause",
        "query": "SELECT orderid, orderdate FROM orders ORDER BY orderdate DESC;"
    },
    {
        "description": "GROUP BY & HAVING Clauses",
        "query": "SELECT custkey, COUNT(*) AS order_count FROM orders GROUP BY custkey HAVING COUNT(*) > 5;"
    },
    {
        "description": "LIMIT & OFFSET",
        "query": "SELECT * FROM orders LIMIT 10 OFFSET 20;"
    },
    {
        "description": "DISTINCT",
        "query": "SELECT DISTINCT custkey FROM orders;"
    },
    {
        "description": "JOIN",
        "query": "SELECT o.orderid, c.custname FROM orders o JOIN customers c ON o.custkey = c.custkey;"
    },
    {
        "description": "Subquery",
        "query": "SELECT orderid FROM orders WHERE totalprice > (SELECT AVG(totalprice) FROM orders);"
    },
    {
        "description": "UNION / UNION ALL",
        "query": "SELECT orderid FROM orders_2024 UNION ALL SELECT orderid FROM orders_2025;"
    },
    {
        "description": "CASE Expression",
        "query": "SELECT orderid, CASE WHEN totalprice > 1000 THEN 'High' ELSE 'Low' END AS price_category FROM orders;"
    }
]

def run_tests():
    for test in test_queries:
        print("\n--- Running Test: {} ---".format(test["description"]))
        try:
            ast = parse_full_query(test["query"])
            print("Test Passed: Query parsed successfully.")
            print("AST:", ast)
        except Exception as e:
            print("Test Failed with error:")
            print(e)
            print("Query:")
            print(test["query"])

if __name__ == "__main__":
    run_tests()


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders;
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted MATCH_RECOGNIZE clause via recursive search.
DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT orderid, custkey, orderdate FROM orders;
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=orderid, metadata={}), SelectItem(expression=custkey, metadata={}), SelectItem(expression=orderdate, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:


--- Running Test: Wildcard Column Selection ---
Test Passed: Query parsed successfully.
AST: FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=None,
  order_by=None,
  measures=None,
  rows_per_match=None,
  after_match_skip=None,
  pattern=None,
  subset=[],
  define=None
),
  metadata={}
)

--- Running Test: Specific Column Selection ---
Test Passed: Query parsed successfully.
AST: FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=orderid, metadata={}), SelectItem(expression=custkey, metadata={}), SelectItem(expression=orderdate, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=None,
  order_by=None,
  measures=None,
  rows_per_match=None,
  after_match_skip=None,
  pattern=None,
  subset=[],
  define=None
),
  metadata={}
)

--- Running Test: Expressions & Alias

In [None]:
from src.parser.match_recognize_extractor import parse_match_recognize_query
import logging

logging.basicConfig(level=logging.DEBUG)
logger = logging.getLogger(__name__)

test_queries = [
    {
        "description": "Simple pattern with whitespace",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price
                PATTERN (A b+ c*)
                DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
            );
        """
    },
    {
        "description": "Pattern without whitespace",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price
                PATTERN (AB+C)
                DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
            );
        """
    },
    {
        "description": "Pattern with PERMUTE, grouping, and quantifiers",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES LAST(U.totalprice) AS top_price
                PATTERN (PERMUTE(A,B,C))
                SUBSET U = (A,B,C)
                DEFINE A AS totalprice > 0, B AS totalprice < PREV(totalprice), C AS totalprice < 100
            );
        """
    },
    {
        "description": "Mixed pattern: whitespace vs. no whitespace",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price
                PATTERN (A b+ c*)
                DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100
            );
        """
    },
    {
        "description": "Pattern with quantifiers (greedy and reluctant)",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES A.totalprice AS starting_price, LAST(A.totalprice) AS ending_price
                PATTERN (A{2,4}? )
                DEFINE A AS totalprice > 0
            );
        """
    },
]

def run_tests():
    for test in test_queries:
        print("\n--- Running Test: {} ---".format(test["description"]))
        try:
            ast = parse_match_recognize_query(test["query"])
            print("Test Passed: Query parsed successfully.")
            print(ast)
        except Exception as e:
            print("Test Failed with error:")
            print(e)
            print("Query:")
            print(test["query"])

if __name__ == "__main__":
    run_tests()


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders MATCH_RECOGNIZE( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price PATTERN (A b+ c*) DEFINE A AS totalprice > 0, b AS totalprice < PREV(totalprice), c AS totalprice < 100 );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', m


--- Running Test: Simple pattern with wildcard ---
Test Passed: Query parsed successfully.
FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=None,
  after_match_skip=None,
  pattern=PatternClause(pattern='A b+ c*', metadata={'variables': ['A', 'b+', 'c*'], 'base_variables': ['A', 'b', 'c']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='A', condition='totalprice>0'), Define(variable='b', condition='totalprice<PREV(totalprice)'), Define(variable='c', condition='totalprice<100')])
),
  metadata={}


usage: ipykernel_launcher.py [-h] [-v] [-q] [--locals] [--durations N] [-f]
                             [-c] [-b] [-k TESTNAMEPATTERNS]
                             [tests ...]
ipykernel_launcher.py: error: argument -f/--failfast: ignored explicit argument '/run/user/1000/jupyter/runtime/kernel-v3affc58adf2dafecf2238112aa7beb336f44801ee.json'


SystemExit: 2

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


In [1]:
!python -m unittest src/tests/test_enhanced_features.py


E
ERROR: src/tests/test_enhanced_features (unittest.loader._FailedTest.src/tests/test_enhanced_features)
----------------------------------------------------------------------
ImportError: Failed to import test module: src/tests/test_enhanced_features
Traceback (most recent call last):
  File "/home/monierashraf/anaconda3/lib/python3.12/unittest/loader.py", line 137, in loadTestsFromName
    module = __import__(module_name)
             ^^^^^^^^^^^^^^^^^^^^^^^
ModuleNotFoundError: No module named 'src/tests/test_enhanced_features'


----------------------------------------------------------------------
Ran 1 test in 0.000s

FAILED (errors=1)


In [1]:
# test_pattern.py
from src.parser.match_recognize_extractor import parse_full_query
import json

query = """
SELECT *
FROM orders
MATCH_RECOGNIZE (
    PARTITION BY orderkey
    ORDER BY orderdate
    MEASURES
        RUNNING COUNT(*) as cnt
    PATTERN (^A {-B-} (C | D)$)
    DEFINE
        A AS A.totalprice > 100,
        B AS B.totalprice > 200,
        C AS C.totalprice > 300,
        D AS D.totalprice > 400
)
"""

ast = parse_full_query(query)
print("Pattern:", ast)



NameError: name 'Optional' is not defined

In [None]:
test coverage

In [4]:
from src.parser.match_recognize_extractor import parse_full_query
from src.ast.ast_nodes import FullQueryAST

# Define a list of test queries to cover edge cases and full coverage
test_queries = [
    # 1. Basic ONE ROW PER MATCH
    {
        "name": "Basic One Row Per Match",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 2. ALL ROWS PER MATCH with SHOW EMPTY MATCHES, SUBSET clause and union variable
    {
        "name": "All Rows Per Match - Show Empty Matches",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price,
                    LAST(U.totalprice) AS top_price
                ALL ROWS PER MATCH SHOW EMPTY MATCHES
                PATTERN (A | B | C | D)
                SUBSET U = (C, D)
                DEFINE
                    B AS totalprice < PREV(totalprice),
                    C AS totalprice > PREV(totalprice) AND totalprice <= A.totalprice,
                    D AS totalprice > PREV(totalprice)
            );
        """
    },
    # 3. ALL ROWS PER MATCH with OMIT EMPTY MATCHES
    {
        "name": "All Rows Per Match - Omit Empty Matches",
        "query": """
            SELECT * FROM transactions MATCH_RECOGNIZE(
                PARTITION BY account_id
                ORDER BY transaction_date
                MEASURES
                    A.amount AS start_amount,
                    C.amount AS final_amount
                ALL ROWS PER MATCH OMIT EMPTY MATCHES
                PATTERN (A | B | C)
                DEFINE
                    B AS amount < PREV(amount),
                    C AS amount > PREV(amount)
            );
        """
    },
    # 4. ALL ROWS PER MATCH WITH UNMATCHED ROWS
    {
        "name": "All Rows Per Match - With Unmatched Rows",
        "query": """
            SELECT * FROM sales MATCH_RECOGNIZE(
                PARTITION BY region
                ORDER BY sale_date
                MEASURES
                    A.price AS initial_price,
                    D.price AS final_price
                ALL ROWS PER MATCH WITH UNMATCHED ROWS
                PATTERN (A B C D)
                DEFINE
                    B AS price < PREV(price),
                    C AS price > PREV(price),
                    D AS price > A.price
            );
        """
    },
    # 5. Nested Pattern: union variables and repeated groups with WITH UNMATCHED ROWS
    {
        "name": "Nested Pattern with Unmatched Rows",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    F.totalprice AS ending_price
                ALL ROWS PER MATCH WITH UNMATCHED ROWS
                PATTERN (A (B C | D E)+ F)
                DEFINE
                    B AS totalprice < PREV(totalprice),
                    C AS totalprice > PREV(totalprice),
                    D AS totalprice = PREV(totalprice),
                    E AS totalprice >= PREV(totalprice)
            );
        """
    },
    # 6. Using Classifier() and MATCH_NUMBER() functions
    {
        "name": "Classifier and Match Number Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    CLASSIFIER() AS pattern_type,
                    MATCH_NUMBER() AS match_id,
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 7. Running vs Final Semantics in MEASURES
    {
        "name": "Running vs Final Semantics in Measures",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    RUNNING LAST(A.totalprice) AS running_last_price,
                    FINAL LAST(A.totalprice) AS final_last_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 8. Aggregates with Running vs Final semantics (avg and count)
    {
        "name": "Running Avg vs Final Count Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    RUNNING avg(A.totalprice) AS running_avg,
                    FINAL count(A.*) AS final_count
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 9. Empty Match: pattern that produces an empty match for every row
    {
        "name": "Empty Match and Unmatched Rows",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    count(A.*) AS match_count
                ONE ROW PER MATCH
                PATTERN (())
            );
        """
    },
    # 10. Physical Navigation Functions: PREV() and NEXT()
    {
        "name": "Physical Navigation Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    PREV(A.totalprice, 2) AS prev_price,
                    NEXT(A.totalprice, 1) AS next_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 11. Nested Navigation Function
    {
        "name": "Nested Navigation Function",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    PREV(FIRST(A.totalprice, 3), 2) AS nested_nav_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    }
]

# Run all test cases and print the resulting AST
for test in test_queries:
    print(f"🔹 Running Test: {test['name']}")
    try:
        query_ast = parse_full_query(test["query"])
        print(query_ast)
    except Exception as e:
        print(f"Error: {e}")
    print("=" * 80)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders MATCH_RECOGNIZE( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price, LAST(B.totalprice) AS bottom_price ONE ROW PER MATCH PATTERN (A B+ C) DEFINE B AS totalprice < PREV(totalprice) );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_

🔹 Running Test: Basic One Row Per Match
FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False), Measure(expression='LAST(B.totalprice)', alias='bottom_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=ONE ROW PER MATCH,
  after_match_skip=None,
  pattern=PatternClause(pattern='AB+C', metadata={'variables': ['A', 'B+', 'C'], 'base_variables': ['A', 'B', 'C']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='B', condition='totalprice<PREV(totalprice)')])
),
  metadata={

In [3]:
from src.parser.match_recognize_extractor import parse_full_query
from src.ast.ast_nodes import FullQueryAST

# Define a list of test queries to cover edge cases and full coverage
test_queries = [
    # 1. Basic ONE ROW PER MATCH
    {
        "name": "Basic One Row Per Match",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 2. ALL ROWS PER MATCH with SHOW EMPTY MATCHES, SUBSET clause and union variable
    {
        "name": "All Rows Per Match - Show Empty Matches",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price,
                    LAST(U.totalprice) AS top_price
                ALL ROWS PER MATCH SHOW EMPTY MATCHES
                PATTERN (A | B | C | D)
                SUBSET U = (C, D)
                DEFINE
                    B AS totalprice < PREV(totalprice),
                    C AS totalprice > PREV(totalprice) AND totalprice <= A.totalprice,
                    D AS totalprice > PREV(totalprice)
            );
        """
    },
    # 3. ALL ROWS PER MATCH with OMIT EMPTY MATCHES
    {
        "name": "All Rows Per Match - Omit Empty Matches",
        "query": """
            SELECT * FROM transactions MATCH_RECOGNIZE(
                PARTITION BY account_id
                ORDER BY transaction_date
                MEASURES
                    A.amount AS start_amount,
                    C.amount AS final_amount
                ALL ROWS PER MATCH OMIT EMPTY MATCHES
                PATTERN (A | B | C)
                DEFINE
                    B AS amount < PREV(amount),
                    C AS amount > PREV(amount)
            );
        """
    },
    # 4. ALL ROWS PER MATCH WITH UNMATCHED ROWS
    {
        "name": "All Rows Per Match - With Unmatched Rows",
        "query": """
            SELECT * FROM sales MATCH_RECOGNIZE(
                PARTITION BY region
                ORDER BY sale_date
                MEASURES
                    A.price AS initial_price,
                    D.price AS final_price
                ALL ROWS PER MATCH WITH UNMATCHED ROWS
                PATTERN (A B C D)
                DEFINE
                    B AS price < PREV(price),
                    C AS price > PREV(price),
                    D AS price > A.price
            );
        """
    },
    # 5. Nested Pattern: union variables and repeated groups with WITH UNMATCHED ROWS
    {
        "name": "Nested Pattern with Unmatched Rows",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    F.totalprice AS ending_price
                ALL ROWS PER MATCH WITH UNMATCHED ROWS
                PATTERN (A (B C | D E)+ F)
                DEFINE
                    B AS totalprice < PREV(totalprice),
                    C AS totalprice > PREV(totalprice),
                    D AS totalprice = PREV(totalprice),
                    E AS totalprice >= PREV(totalprice)
            );
        """
    },
    # 6. Using Classifier() and MATCH_NUMBER() functions
    {
        "name": "Classifier and Match Number Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    CLASSIFIER() AS pattern_type,
                    MATCH_NUMBER() AS match_id,
                    A.totalprice AS starting_price,
                    LAST(B.totalprice) AS bottom_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 7. Running vs Final Semantics in MEASURES
    {
        "name": "Running vs Final Semantics in Measures",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    RUNNING LAST(A.totalprice) AS running_last_price,
                    FINAL LAST(A.totalprice) AS final_last_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 8. Aggregates with Running vs Final semantics (avg and count)
    {
        "name": "Running Avg vs Final Count Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    RUNNING avg(A.totalprice) AS running_avg,
                    FINAL count(A.*) AS final_count
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 9. Empty Match: pattern that produces an empty match for every row
    {
        "name": "Empty Match and Unmatched Rows",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    A.totalprice AS starting_price,
                    count(A.*) AS match_count
                ONE ROW PER MATCH
                PATTERN (())
            );
        """
    },
    # 10. Physical Navigation Functions: PREV() and NEXT()
    {
        "name": "Physical Navigation Functions",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    PREV(A.totalprice, 2) AS prev_price,
                    NEXT(A.totalprice, 1) AS next_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    },
    # 11. Nested Navigation Function
    {
        "name": "Nested Navigation Function",
        "query": """
            SELECT * FROM orders MATCH_RECOGNIZE(
                PARTITION BY custkey
                ORDER BY orderdate
                MEASURES
                    PREV(FIRST(A.totalprice, 3), 2) AS nested_nav_price
                ONE ROW PER MATCH
                PATTERN (A B+ C)
                DEFINE
                    B AS totalprice < PREV(totalprice)
            );
        """
    }
]

# Run all test cases and print the resulting AST
for test in test_queries:
    print(f"🔹 Running Test: {test['name']}")
    try:
        query_ast = parse_full_query(test["query"])
        print(query_ast)
    except Exception as e:
        print(f"Error: {e}")
    print("=" * 80)


DEBUG:src.parser.match_recognize_extractor:Full statement text: SELECT * FROM orders MATCH_RECOGNIZE( PARTITION BY custkey ORDER BY orderdate MEASURES A.totalprice AS starting_price, LAST(B.totalprice) AS bottom_price ONE ROW PER MATCH PATTERN (A B+ C) DEFINE B AS totalprice < PREV(totalprice) );
DEBUG:src.parser.match_recognize_extractor:Extracted SELECT clause: SelectClause(items=[SelectItem(expression=*, metadata={})])
DEBUG:src.parser.match_recognize_extractor:Extracted FROM clause: FromClause(table='orders')
DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_

🔹 Running Test: Basic One Row Per Match
FullQueryAST(
  select_clause=SelectClause(items=[SelectItem(expression=*, metadata={})]),
  from_clause=FromClause(table='orders'),
  match_recognize=MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False), Measure(expression='LAST(B.totalprice)', alias='bottom_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=ONE ROW PER MATCH,
  after_match_skip=None,
  pattern=PatternClause(pattern='AB+C', metadata={'variables': ['A', 'B+', 'C'], 'base_variables': ['A', 'B', 'C']}),
  subset=[],
  define=DefineClause(definitions=[Define(variable='B', condition='totalprice<PREV(totalprice)')])
),
  metadata={

In [2]:
from src.parser.match_recognize_extractor import parse_match_recognize_query
if __name__ == "__main__":
    sample_query = """
SELECT *
FROM orders
MATCH_RECOGNIZE(
  PARTITION BY custkey
  ORDER BY orderdate
  MEASURES
    CLASSIFIER() AS pattern_type,
    MATCH_NUMBER() AS match_id,
    A.totalprice AS starting_price,
    LAST(B.totalprice) AS bottom_price
  ONE ROW PER MATCH
  PATTERN (A B+ C)
  DEFINE
    B AS totalprice < PREV(totalprice)
);
    """
    components = parse_match_recognize_query(sample_query)

    print(components)

DEBUG:src.parser.match_recognize_extractor:Visiting PatternRecognition context
DEBUG:src.parser.match_recognize_extractor:Extracted PARTITION BY: PartitionByClause(columns=['custkey'])
DEBUG:src.parser.match_recognize_extractor:Extracted ORDER BY: OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)])
DEBUG:src.parser.match_recognize_extractor:Extracted MEASURES: MeasuresClause(measures=[Measure(expression='CLASSIFIER()', alias='pattern_type', metadata={'semantics': 'RUNNING'}, is_classifier=True, is_match_number=False), Measure(expression='MATCH_NUMBER()', alias='match_id', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=True), Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False), Measure(expression='LAST(B.totalprice)', alias='bottom_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)])
DEBUG:src.parser.

MatchRecognizeClause(
  partition_by=PartitionByClause(columns=['custkey']),
  order_by=OrderByClause(sort_items=[SortItem(column='orderdate', ordering='ASC', nulls_ordering=None)]),
  measures=MeasuresClause(measures=[Measure(expression='CLASSIFIER()', alias='pattern_type', metadata={'semantics': 'RUNNING'}, is_classifier=True, is_match_number=False), Measure(expression='MATCH_NUMBER()', alias='match_id', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=True), Measure(expression='A.totalprice', alias='starting_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False), Measure(expression='LAST(B.totalprice)', alias='bottom_price', metadata={'semantics': 'RUNNING'}, is_classifier=False, is_match_number=False)]),
  rows_per_match=ONE ROW PER MATCH,
  after_match_skip=None,
  pattern=PatternClause(pattern='AB+C', metadata={'variables': ['A', 'B+', 'C'], 'base_variables': ['A', 'B', 'C']}),
  subset=[],
  define=DefineClause(definitions=[

In [1]:
!find . -type f -name "*.py" | sort  

./1.py
./2.py
./3.py
./__init__.py
./m3.py
./maiaas22222n.py
./main.py
./src/ast/ast_nodes.py
./src/ast/__init__.py
./src/executor/__init__.py
./src/grammar/__init__.py
./src/grammar/TrinoLexer.py
./src/grammar/TrinoParserListener.py
./src/grammar/TrinoParser.py
./src/grammar/TrinoParserVisitor.py
./src/__init__.py
./src/optimizer/__init__.py
./src/parser/error_listeners.py
./src/parser/__init__.py
./src/parser/match_recognize_extractor.py
./src/parser/query_parser.py
./src/validator/__init__.py
./tests/__init__.py
./tests/test_ast.py
./tests/test_parser_edge_cases.py
./tests/test_parser.py
./tests/test_validator.py


final work as we expected 

In [None]:
import sys
import pandas as pd
# Use an absolute import for match_recognize.
from transformations.match_recognize import match_recognize

query = """
    SELECT id, name FROM employees MATCH_RECOGNIZE (
        PARTITION BY department, region
        ORDER BY hire_date
        MEASURES salary AS avg_salary
        PATTERN (A)
        DEFINE A AS salary > 1000
    );
    """
    
data = [
        {"id": 1, "name": "Alice",   "department": "Sales", "region": "West", "hire_date": "2021-01-01", "salary": 1200},
        {"id": 2, "name": "Bob",     "department": "Sales", "region": "West", "hire_date": "2021-01-02", "salary": 1300},
        {"id": 3, "name": "Charlie", "department": "Sales", "region": "West", "hire_date": "2021-01-03", "salary": 900},
        {"id": 4, "name": "Diana",   "department": "Sales", "region": "West", "hire_date": "2021-01-04", "salary": 1100},
    ]
    
try:
        output_df = match_recognize(query, pd.DataFrame(data))
        print("Match Recognize Output:")
        print(output_df)
except Exception as e:
        print(f"Error: {str(e)}")

ImportError: cannot import name 'AutomatonBuilder' from partially initialized module 'transformations.automaton.automaton_builder' (most likely due to a circular import) (/home/monierashraf/Desktop/llm/Match_recognize/project/transformations/automaton/automaton_builder.py)