In [3]:
!pip install sql-metadata
!pip install pyparsing



In [4]:
from sql_metadata import Parser
import csv

def extract_tables_and_columns(sql_query):
    """
    Extract tables and columns from a SQL query using sql-metadata.
    """
    parser = Parser(sql_query)
    tables = parser.tables
    columns = parser.columns
    
    # Add table prefix to columns without one if there's a single table
    if len(tables) == 1:
        table_name = tables[0]
        columns = [col if '.' in col else f"{table_name}.{col}" for col in columns]

    return tables, columns

def process_csv(input_csv, output_csv):
    """
    Read the input CSV, extract tables and columns for each query, and write the results to the output CSV.
    """
    with open(input_csv, 'r') as infile, open(output_csv, 'w', newline='') as outfile:
        reader = csv.DictReader(infile)
        fieldnames = reader.fieldnames
        writer = csv.DictWriter(outfile, fieldnames=fieldnames)

        writer.writeheader()

        for row in reader:
            sql_query = row['sql_query']
            tables, columns = extract_tables_and_columns(sql_query)

            # Update the row with the extracted tables and columns
            row['tables'] = ', '.join(tables)
            row['columns'] = ', '.join(columns)

            writer.writerow(row)

if __name__ == "__main__":
    input_csv = "./sql_rag_eval.csv"
    output_csv = "./processed_sql_rag_eval.csv"
    process_csv(input_csv, output_csv)
    print(f"Processed file saved as {output_csv}")


Processed file saved as ./processed_sql_rag_eval.csv
