In [1]:
from functools import cache
from pathlib import Path

import chrysalis as chry
import duckdb
import polars as pl
from itables import show

import transformations as transformations
import invariants as invariants

In [2]:
@cache
def _test_connection() -> duckdb.DuckDBPyConnection:
    input_data_path = Path("./player_stats.parquet")
    if not input_data_path.exists():
        raise RuntimeError("Input dataset does not exist, exiting.")

    conn = duckdb.connect()
    conn.register(
        "player_stats",
        pl.read_parquet(input_data_path).cast({"round": pl.Int8}),
    )
    return conn


def evaluate_query(query: str) -> pl.DataFrame:
    conn = _test_connection()
    return conn.query(query).pl()


_TEST_QUERY = "SELECT name, position, college, team, round, draft FROM player_stats;"

In [3]:
chry.register(transformations.add_college_column, invariants.length_equals)
chry.register(transformations.add_team_column, invariants.length_equals)
chry.register(transformations.add_round_column, invariants.length_equals)
chry.register(transformations.add_draft_column, invariants.length_equals)
chry.register(transformations.remove_college_column, invariants.length_equals)
chry.register(transformations.remove_team_column, invariants.length_equals)
chry.register(transformations.remove_round_column, invariants.length_equals)
chry.register(transformations.remove_draft_column, invariants.length_equals)

chry.register(transformations.add_order_by_asc, invariants.length_equals)
chry.register(transformations.add_order_by_desc, invariants.length_equals)
chry.register(transformations.remove_order_by, invariants.length_equals)

# Intentional bug, it is possible that adding `LIMIT 400` will remove `LIMIT 200`
# and cause the invariant to fail.
chry.register(transformations.add_limit_400, invariants.length_less_than_equals)
chry.register(transformations.add_limit_200, invariants.length_less_than_equals)
chry.register(transformations.remove_limit, invariants.length_greater_than_equals)

In [4]:
conn = chry.run(evaluate_query, [_TEST_QUERY], chain_length=50, num_chains=10)

In [5]:
show(conn.query("SELECT COUNT(*) AS count FROM applied_transformation;").pl())

count
Loading ITables v2.3.0 from the internet... (need help?)


In [14]:
relation_chain_ids = conn.query("""SELECT DISTINCT relation_chain_id FROM applied_transformation;""").pl()
show(relation_chain_ids)

relation_chain_id
Loading ITables v2.3.0 from the internet... (need help?)


In [19]:
relation_chain = conn.execute("""
SELECT t.name, t.id
FROM applied_transformation at
INNER JOIN transformation t ON t.id == at.transformation
WHERE at.relation_chain_id = ?
ORDER BY link_index;
""", (relation_chain_ids[0].item(),)).pl()
relation_chain

name,id
str,str
"""remove_limit""","""844fce6a561a4b63921ce7ec57377b…"
"""remove_round_column""","""5ffd0003c0cf4887a4af1a467581c6…"
"""remove_round_column""","""5ffd0003c0cf4887a4af1a467581c6…"
"""add_round_column""","""8ecfec5763bf48259bde3882645c38…"
"""add_team_column""","""673665dcd77d40418e56a296d57916…"
…,…
"""add_team_column""","""673665dcd77d40418e56a296d57916…"
"""remove_draft_column""","""5d82f70fbbd940918883a991dd1229…"
"""remove_round_column""","""5ffd0003c0cf4887a4af1a467581c6…"
"""add_draft_column""","""9a29d2c6fb39467bb09b59c794040d…"


In [20]:
query = _TEST_QUERY
knowledge_base = chry._internal._controller._CURRENT_KNOWLEDGE_BASE

print(f"{'start':<30} {query}")
for (name, transform_id) in relation_chain.head(15).iter_rows():
    query = knowledge_base.relations[transform_id].apply_transform(query)
    print(f"{name:<30} {query}")

start                          SELECT name, position, college, team, round, draft FROM player_stats;
remove_limit                   SELECT name, position, college, team, round, draft FROM player_stats
remove_round_column            SELECT name, position, college, team, draft FROM player_stats
remove_round_column            SELECT name, position, college, team, draft FROM player_stats
add_round_column               SELECT name, position, college, team, draft, round FROM player_stats
add_team_column                SELECT name, position, college, draft, round, team FROM player_stats
remove_draft_column            SELECT name, position, college, round, team FROM player_stats
remove_college_column          SELECT name, position, round, team FROM player_stats
add_round_column               SELECT name, position, team, round FROM player_stats
add_order_by_asc               SELECT name, position, team, round FROM player_stats ORDER BY draft ASC, round ASC
add_order_by_desc              SELECT 

In [21]:
show(conn.query("""
SELECT DISTINCT name FROM failed_invariant
INNER JOIN applied_transformation ON failed_invariant.applied_transformation = applied_transformation.id
INNER JOIN transformation ON applied_transformation.transformation = transformation.id
""").pl())

name
Loading ITables v2.3.0 from the internet... (need help?)
