In [4]:
import sqlglot

def extract_ctes_and_tables(sql):
    # Parse the SQL query into an AST
    expressions = sqlglot.parse_one(sql)
    
    cte_dict = {}

    # Check if the query contains CTEs
    if expressions.ctes:
        for cte in expressions.ctes:
            cte_name = cte.alias_or_name
            tables = []

            # Extract tables used in the CTE
            for table in cte.find_all(sqlglot.exp.Table):
                tables.append(table.name)
            
            cte_dict[cte_name] = tables

    return cte_dict

# Sample SQL with CTEs
sql_query = """
WITH cte1 AS (
    SELECT a.id, b.name
    FROM hellow a
    JOIN world3 b ON a.id = b.id
),
cte2 AS (
    SELECT id, COUNT(*)
    FROM cte1
    GROUP BY id
)
SELECT * FROM cte2;
"""

# Extract CTEs and their associated tables
ctes_and_tables = extract_ctes_and_tables(sql_query)
print(ctes_and_tables)


{'cte1': ['hellow', 'world3'], 'cte2': ['cte1']}
