In [1]:

import sqlite3
import io
import csv
import os
import sys
from pathlib import Path

current_dir = Path(os.getcwd())
parent_dir = str(current_dir.parent)
sys.path.append(parent_dir)

from dotenv import load_dotenv
from openai import OpenAI
import json
from typing import Union, List
from pydantic import BaseModel, Field

schema_path = "/Users/virounikamina/Desktop/PIMCO-Text2SQL/chatgpt_api/schema.json"
with open(schema_path, 'r') as f:
    schema_info = json.load(f)

load_dotenv()
client = OpenAI(api_key=os.environ.get("OPENAI_API_KEY"))

# Simple Reasonings Schema
reasonings_schema_json = json.dumps({
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "array",
    "items": {
        "type": "object",
        "properties": {
            "thought": {
                "type": "string",
                "description": "A thought about the user's question"
            },
            "helpful": {
                "type": "boolean",
                "description": "Whether the thought is helpful to solving the user's question"
            }
        }
    }
})

# Simple Final Output Schema
final_output_schema_json = json.dumps({
    "$schema": "http://json-schema.org/draft-07/schema#",
    "type": "object",
    "properties": {
        "user_nlp_query": {
            "type": "string",
            "description": "The original natural language query to be translated into SQL"
        },
        "reasonings": {
            "type": "array",
            "items": {
                "type": "object",
                "properties": {
                    "thought": {
                        "type": "string",
                        "description": "A thought about the user's question"
                    },
                    "helpful": {
                        "type": "boolean",
                        "description": "Whether the thought is helpful to solving the user's question"
                    }
                }
            },
            "description": "Step-by-step reasoning process for query generation"
        },
        "generated_sql_query": {
            "type": "string",
            "description": "The final SQL query that answers the natural language question"
        }
    }
})

thought_instructions = f"""
```
Thought Instructions:
```

```
Generate thoughts of increasing complexity.
Each thought should build on the previous ones and thoughts 
should progressively cover the nuances of the problem at hand.
```

```
First set of thoughts should be on whether a the query requires 
Common Table Expressions (CTEs) to calculate the
results for sub queries. 

Prefer using Common Table Expressions rather than
case when statements or nested subqueries.

If CTEs are required then for each CTE, an analysis of the purpose of each
CTE should be done.
An overall structure should be outlined as to what will be calculated in 
each CTE.
```

```
Next set of thoughts should on 
extracting out the names of as many of 
the relevant columns as possible for all CTEs and for all the sql clauses such as the 
`select`, `where` and `group_by` clauses.
There might be additions or deletions from this list based on the 
following additional thoughts to be generated.
```


```
Generate a thought to figure out the possible phrases in the query 
which can be used as values of the columns present in the table so as to use them 
in the `where` clause.
```

```
Generate a thought to compare these extracted values with the list of possible values
of columns listed in the information for the columns so as to use the exact string
in the `where` clause.
```

```
Generate a thought to reason whether `IS_TOP_TIER_ENTITY` flag is required or not.
```

```
Generate a thought to figure out which time period is being queried.
If nothing is specified use `PERIOD_ID = 2023Y`.
```

```
Generate a thought to figure out if a group_by clause is required.
Since the table is structured so that for a single entity multiple securities are listed,
`group_by` is often required over `INS_ENTITY_NAME_LONG` column.
```

```
The above thoughts about 
1. phrases for values of columns
2. query phrase to column value mapping
3. filters such as `IS_TOP_TIER_ENTITY` and others in the where clause
4. Period_id value to use
5. Group by column

should be generated for each of the CTE separately.
```

```
If the input question is similar to any of the examples given above,
then a thought should be generated to detect that and then that example 
should be followed closely to get the SQL for the input question given.
```

```
Closing Thoughts and Observations
```
These should summarize:
1. The structure of the SQL query:
    - This states whether the query has any nested query.
    If so, the structure of the nested query is also mentioned.
    If not, a summary of the function of each of the select`, `where`, `group_by` etc. clauses
    should be mentioned.
2. An explanation of how the query solves the user question.
"""

reasoning_instructions = """
```
1. Reasoning you provide should first focus on why a nested query was chosen or why it wasn't chosen.
2. It should give a query plan on how to solve this question - explain 
the mapping of the columns to the words in the input question.
3. It should explain each of the clauses and why they are structured the way they are structured. 
For example, if there is a `group_by`, an explanation should be given as to why it exists.
4. If there's any sum() or any other function used it should be explained as to why it was required.
```

```
Format the generated sql with proper indentation - the columns in the
(`select` statement should have more indentation than keyword `select`
and so on for each SQL clause.)
```
"""
def load_schema_from_json() -> dict:
    schema_path = "/Users/virounikamina/Desktop/PIMCO-Text2SQL/chatgpt_api/schema.json"
    try:
        with open(schema_path, 'r') as f:
            schema = json.load(f)
        return schema
    except Exception as e:
        print(f"Error loading schema: {str(e)}")
        raise e

class Background(BaseModel):
    """A setup description providing context for the user's question"""
    background: str = Field(
        description="Background for the user's question",
        min_length=10
    )

class Thought(BaseModel):
    """A thought about the user's question"""
    thought: str = Field(
        description="Text of the thought"
    )
    helpful: bool = Field(
        description="Whether the thought is helpful to solving the user's question"
    )

class Observation(BaseModel):
    """An observation summarizing insights from the reasoning process"""
    observation: str = Field(
        description="An insightful observation on the sequence of thoughts and observations generated so far"
    )

class FinalOutput(BaseModel):
    """Complete output structure containing the query, reasoning, and SQL"""
    user_nlp_query: str = Field(
        description="The original natural language query to be translated into SQL"
    )
    reasonings: List[Thought] = Field(
        description="Step-by-step reasoning process for query generation"
    )
    generated_sql_query: str = Field(
        description="The final SQL query that answers the natural language question"
    )

    class Config:
        arbitrary_types_allowed = True
        json_schema_extra = {
            "example": {
                "user_nlp_query": "Show top funds by total assets",
                "reasonings": [
                    {"background": "Analyzing fund asset query"},
                    {"thought": "Need to sort by total assets", "helpful": True},
                    {"observation": "Simple ranking query required"}
                ],
                "generated_sql_query": "SELECT * FROM fund_table ORDER BY total_assets DESC LIMIT 10"
            }
        }

    def get_sql(self) -> str:
        return self.generated_sql_query

    def get_reasoning_steps(self) -> List[str]:
        steps = []
        for item in self.reasonings:
            if isinstance(item, Background):
                steps.append(f"Background: {item.background}")
            elif isinstance(item, Thought):
                steps.append(f"Thought: {item.thought} (Helpful: {item.helpful})")
            elif isinstance(item, Observation):
                steps.append(f"Observation: {item.observation}")
        return steps

def load_schema_from_json() -> dict:
    schema_path = "/Users/virounikamina/Desktop/PIMCO-Text2SQL/chatgpt_api/schema.json"
    try:
        with open(schema_path, 'r') as f:
            schema = json.load(f)
        return schema
    except Exception as e:
        print(f"Error loading schema: {str(e)}")
        raise e

def execute_sql(query: str) -> str:
    conn = None
    try:
        db_path = "/Users/virounikamina/Desktop/PIMCO-Text2SQL/sqlite/nport.db"
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        cursor.execute(query)
        columns = [description[0] for description in cursor.description]
        rows = cursor.fetchall()
        output = io.StringIO()
        writer = csv.writer(output)
        writer.writerow(columns)
        writer.writerows(rows)
        csv_data = output.getvalue()
        output.close()
        return csv_data
    except sqlite3.Error as e:
        print(f"Database error: {str(e)}")
        raise e
    finally:
        if conn:
            conn.close()

def generate_sql(question: str, max_retries: int = 3) -> FinalOutput:
    system_prompt = f"""
You are the most intelligent person in the world.

You will receive a $500 tip if you follow ALL the instructions specified.

Instructions:
Provide an explanation of why the given sql query is correct based 
on the input request and the description of the columns.

Use step by step reasoning and at each step generate thoughts of increasing complexity.

Getting this answer right is important for my career. Please do your best.
"""

    final_output_schema_json = FinalOutput.model_json_schema()

    user_prompt = f"""
Generate a SQL query that retrieves from the database the answer to this question: {question}

Database Schema:
{schema_info}

Use the following JSON Schema as the grammar to create the structure 
for the step by step reasoning, and then to create the final SQL query.

Schema for Reasoning:
{reasoning_instructions}
{reasonings_schema_json}


The instructions on how to structure the reasoning is provided below:
{thought_instructions}

Schema for Overall Output:
{final_output_schema_json}

The final response should be a json with names as:
- user_nlp_query: exactly the same as the user query in string format
- reasonings: reasoning steps adhering to the Reasonings schema
- generated_sql_query: the SQL query generated in string format

This is the final answer format required.
"""
    for attempt in range(max_retries):
        try:
            response = client.chat.completions.create(
                model="gpt-4o",
                messages=[
                    {"role": "system", "content": system_prompt},
                    {"role": "user", "content": user_prompt}
                ],
                response_format={"type": "json_object"}
            )

            final_response = response.choices[0].message.content
            try:
                result = json.loads(final_response)
                return FinalOutput(
                    user_nlp_query=result["user_nlp_query"],
                    reasonings=[
                        Thought(**thought) for thought in result["reasonings"]
                    ],
                    generated_sql_query=result["generated_sql_query"]
                )
            except Exception:
                return FinalOutput(
                    user_nlp_query=question,
                    reasonings=[
                        Thought(
                            thought="Failed to parse response",
                            helpful=False
                        )
                    ],
                    generated_sql_query="SELECT 1"
                )
        except Exception:
            if attempt == max_retries - 1:
                return FinalOutput(
                    user_nlp_query=question,
                    reasonings=[
                        Thought(
                            thought="Error in process",
                            helpful=False
                        )
                    ],
                    generated_sql_query="SELECT 1"
                )
            continue

    return FinalOutput(
        user_nlp_query=question,
        reasonings=[
            Thought(
                thought="Maximum retries exceeded",
                helpful=False
            )
        ],
        generated_sql_query="SELECT 1"
    )

In [2]:
############################################ COLUMN MAPPING

from typing import Union, Tuple, List, Optional

class CMBackground(BaseModel):
    """A setup to the background for the user."""

    background: str = Field(description="Background for the user's question", min_length=10)


class CMThought(BaseModel):
    """A thought about the user's question."""

    thought: str  = Field(description="Text of the thought.")
#     helpful: bool = Field(description="Whether the thought is helpful to solving the user's question.")


class CMObservation(BaseModel):
    """An observation on the sequence of thoughts and observations generated so far."""

    observation: str = Field(description="An insightful observation on the sequence of thoughts and observations generated so far.")
    

class CMReasonings(BaseModel):
    """Returns a detailed reasoning to the user's question."""

    reasonings: list[Union[CMBackground, CMThought, CMObservation]] = Field(
        description="Reasonings to solve the users questions."
        #, min_length=5
    )

reasonings_schema_json_cm = CMReasonings.model_json_schema()

class FinalQueryOutput(BaseModel):
    
    input_sql_query_1: str = Field(
        description=f"""Returns the exact same first query that the user gave as input.""")
        
    input_sql_query_2: str = Field(
        description=f"""Returns the exact same second query that the user gave as input.""")

    reasonings: list[Union[CMBackground, CMThought, CMObservation]] = Field(
        description="Reasonings to solve the users questions."
        #, min_length=5
    )
        
    column_mapping_list: List[Tuple[str, str]] = Field(
        description=f"""Returns the list of the corresponding column names in first sql query, sql 1, which
        corresponds to the column name in the other sql query, sql 2, as a list of tuple entries""")
    
column_mapping_schema_json = FinalQueryOutput.model_json_schema()

complete_user_prompts = """
```
Task Overview
```
Given two sql queries which are supposed to be equivalent, as inputs, 
the task is to give a column mapping between the output columns in one sql query
to the other sql query.

The mapping should include any table aliases present in the column names.
For example, if one query uses 'COLUMN_NAME' and another uses 'alias.column_name',
the mapping should be ['COLUMN_NAME', 'alias.column_name'].
```

```
The mapping is to be generated as a list of tuples.
```

```
For each element of the list which would be a tuple, 
the first entry in the tuple would be the column name used in sql query 1,
and the second entry in the tuple would be the corresponding column name in the sql query 2.
```
"""

reasoning_instructions_cm = """
```
1. Reasoning you provide should first focus on whether the input sql queries contain 
a nested query or not.
2. It should give a plan on how to solve this question.
3. It should explain each of the clauses and why they are structured the way they are structured. 
For example, if there is a `group_by`, an explanation should be given as to why it exists.
```

```
Format the generated sql with proper indentation - the columns in the
(`select` statement should have more indentation than keyword `select`
and so on for each SQL clause.)
```
"""

thought_instructions_cm= f"""
```
Thought Instructions:
```

```
Generate thoughts of increasing complexity.
Each thought should build on the previous ones and thoughts 
should progressively cover the nuances of the problem at hand.
```

```
Generate two separate thoughts, one each for the two input sql queries, 
to figure out the list of output columns in each of the sql queries.
```

```
Generate a thought to figure out the list of columns in sql query 1
which are present in both the sql queries.
```

```
Generate a thought to figure out the list of columns in sql query 1 
which are in sql query 1 but 
which are not present in sql query 2.
```

```
Generate a thought to figure out the list of columns in sql query 1
which are in sql query 2 but 
which are not present in sql query 1.
```

```
If the query uses common table expressions or nested queries, 
the above thoughts should be generated for each of the CTE separately.
```


```
Closing Thoughts and Observations
```
These should summarize:
1. The structure of the SQL query:
    - This states whether the query has any nested query.
    If so, the structure of the nested query is also mentioned.
    If not, a summary of the function of each of the select`, `where`, `group_by` etc. clauses
    should be mentioned.
2. An explanation of why the mapping is correct.
"""

reasoning_schema_instructions = f"""
```
Use the following JSON Schema as the grammar to create the structure 
for the step by step reasoning, and then to 
create the final SQL query.
```

```
Schema for Reasoning:
```
{reasonings_schema_json_cm}
```

```
The instructions on how to structure the reasoning is provided below:
```
{thought_instructions_cm}
```

```
Schema for Overall Output:
(This includes the reasonings schema above as an element)
```
{column_mapping_schema_json}
```

```
The final response should be a json with `names` as 
    `input_sql_query_1`,
    `input_sql_query_2`,
    `reasonings`,
    `column_mapping_list`.
```
"""


def get_user_prompt_for_question(input_sql_query_1, input_sql_query_2, input_table_schema, complete_user_prompts):
    
    user_prompt = f"""
```
Here are the two sql statements that are to be compared:
```

```
SQL Query 1:
```
{input_sql_query_1}
```

```
SQL Query 2:
```
{input_sql_query_2}
```

```
Generate a column mapping corresponding to the given input sql queries
and the description of the table provided below.
```
{input_table_schema}
```

```
Here's a more detailed set of instructions:
```
{complete_user_prompts}
```

```
Reasoning as to why the query is correct:
```
{reasoning_instructions_cm}


{reasoning_schema_instructions}

```
Response for Column Mapping Generation:
```
"""
    
    return user_prompt


def call_openai_model(system_prompt, user_prompt, model_name):

    chat_history = [
        {
            'role': 'system', 
            'content': system_prompt
        },
        {
            'role': 'user', 
            'content': user_prompt
        }, 

    ]
    
    final_response = {}
    
    try:
        
        response = client.chat.completions.create(
            model           = model_name, 
            messages        = chat_history, 
            response_format = {"type":"json_object"}
        )
        
        final_response = response.choices[0].message.content
    
    except Exception:

        response = {
            "content": "An error occured. Please retry your chat. \
                If you keep getting this error, you may be out of OpenAI \
                completion tokens. Contact #help-ai on slack for assistance."
        }
        return response

    return final_response


system_prompt_snippet_001 = """
```
You are the most intelligent person in the world.
```
"""

system_prompt_snippet_002 = """

```
You will receive a $500 tip if you follow ALL the instructions specified.
```
"""

system_prompt_snippet_003 = """

```
Instructions
```
Give a column mapping between two equivalent sql statements
which may differ in the names of columns used in the output
and may also differ in the structure, but the overall meaning
and function of the query is meant to be the same.
```

```
Use step by step reasoning and at each step generate thoughts of increasing complexity.
```
"""

system_prompt_snippet_004 = """

```
Getting this answer right is important for my career. Please do your best.
```
"""

system_prompt = f"""
{system_prompt_snippet_001}
{system_prompt_snippet_002}
{system_prompt_snippet_003}
{system_prompt_snippet_004}
"""

In [3]:
final_output = generate_sql("Show top funds by total assets")
print(final_output.generated_sql_query)
print(final_output)


SELECT 
    SERIES_NAME, 
    CAST(TOTAL_ASSETS AS NUMERIC) AS TOTAL_ASSETS_NUMERIC 
FROM 
    FUND_REPORTED_INFO 
ORDER BY 
    TOTAL_ASSETS_NUMERIC DESC 
LIMIT 10;

user_nlp_query='Show top funds by total assets' reasonings=[Thought(thought='Considering whether the query requires Common Table Expressions (CTEs): Since we are interested in extracting a list of top funds based on their total assets, a single straightforward query should suffice. Typically, CTEs are preferable in scenarios where multiple intermediate calculations or recursive operations are needed, neither of which is required here. Thus, a CTE is not necessary.', helpful=True), Thought(thought="Mapping columns to the words in the input question: From the database schema, we identify that 'FUND_REPORTED_INFO' contains a column named 'TOTAL_ASSETS'. This maps directly to 'total assets' in the question. For identifying the 'funds', the 'SERIES_NAME' column from the same table can be used to represent the fund identity.",

In [4]:
curr = os.getcwd()
print(curr)
output_file = os.path.join(curr, 'og_all_outputs')
def append_to_file(output, qnum, filename=output_file):
    # Check if file exists
    output_filename= filename+str(qnum)+'.txt'
    if not os.path.exists(output_filename):
        with open(output_filename, 'w') as file:
            file.write("Test_OG Output Log\n")
            file.write("=" * 80 + "\n")
    # Append the output
    with open(output_filename, 'a') as file:
        file.write(output + "\n" + "=" * 80 + "\n")

/Users/virounikamina/Desktop/PIMCO-Text2SQL/test


In [5]:
import pandas as pd
import re

def get_aggregate_columns(sql_query):
    """
    Extract resulting output column names of aggregate functions in the SQL query,
    handling duplicates and default naming conventions.
    """
    aggregate_functions = ["SUM", "AVG", "COUNT", "MAX", "MIN"]
    output_columns = []

    # Regex to match aggregate functions with optional aliasing
    pattern = rf"({'|'.join(aggregate_functions)})\((.*?)\)(?:\s+AS\s+([\w_]+))?"
    
    matches = re.findall(pattern, sql_query, re.IGNORECASE)
    function_counter = {}  # Track occurrences of each aggregate function
    
    for func, inner, alias in matches:
        func_lower = func.lower()
        if alias:  # Explicit alias defined
            output_columns.append(alias)
        else:  # No alias, use default naming conventions
            if func_lower not in function_counter:
                function_counter[func_lower] = 0
            else:
                function_counter[func_lower] += 1
            # Generate default name (e.g., sum, sum_1, sum_2, etc.)
            if function_counter[func_lower] == 0:
                output_columns.append(f"{func_lower}({inner.strip()})")  # Default naming for SQLite
            else:
                output_columns.append(f"{func_lower}({inner.strip()})_{function_counter[func_lower]}")  # Add suffix

    return output_columns

def evaluate_sql_accuracy(generated_sql, ground_truth_sql, generated_csv, ground_truth_csv, qnum):
    """
    Evaluate the accuracy of generated SQL by comparing the resulting CSV files.
    """
    # Load CSV files
    gen_df = pd.read_csv(io.StringIO(generated_csv))
    gt_df = pd.read_csv(io.StringIO(ground_truth_csv))
    
    # Ensure all ground truth columns are in the generated DataFrame
    for col in gt_df.columns:
        if col not in gen_df.columns:
            append_to_file("False, not all ground truth columns are in generated csv",qnum)
            return False

    # Identify resulting output columns of aggregate functions in both SQL queries
    gt_agg_columns = get_aggregate_columns(ground_truth_sql)

    # Remove aggregate function columns from both DataFrames
    gen_df = gen_df.drop(columns=gt_agg_columns, errors='ignore')
    gt_df = gt_df.drop(columns=gt_agg_columns, errors='ignore')

    # Align columns in the generated DataFrame to match ground truth
    gen_subset = gen_df[gt_df.columns]

    # Check if rows match exactly
    if not gen_subset.equals(gt_df):
        append_to_file("False, all ground truth columns exist, but rows mismatch",qnum)
        return False  # Row mismatch detected

    append_to_file("True, all ground truth columns exist, and rows match", qnum)
    return True  # All checks passed





def compare_csv_din(ground_truth_query: str, qnum: int) -> bool:
    """Compare ground truth query results with LLM-generated query results"""
    append_to_file(f"Ground Truth Query: {ground_truth_query}", qnum)

    final_output = generate_sql(ground_truth_query)
    append_to_file(f"Thoughts: {final_output.reasonings}", qnum)
    append_to_file(f"SQL: {final_output.generated_sql_query}", qnum)

        
    # Add print statements to debug
    print("About to start column mapping...")
    print(f"schema_info available: {schema_info is not None}")
    print(f"complete_user_prompts available: {complete_user_prompts is not None}")
    print(f"system_prompt available: {system_prompt is not None}")
        
    try:
        print("Getting column mappings prompt...")
        column_mappings_prompt = get_user_prompt_for_question(
            ground_truth_query,
            final_output,
            schema_info,
            complete_user_prompts
        )

        print("Calling OpenAI model...")
        column_mappings_response = call_openai_model(
            system_prompt=system_prompt,
            user_prompt=column_mappings_prompt,
            model_name='gpt-4o'
        )
        
        print("Parsing response...")
        response_parsed = json.loads(column_mappings_response)
        append_to_file(f"Column Mappings: {json.dumps(response_parsed['column_mapping_list'], indent=2)}", qnum)
        print("Column mappings appended to file")

    except Exception as e:
        err_string = f"Error Mapping Columns: {str(e)}"
        print(err_string)
        append_to_file(err_string, qnum)
    
    return True

In [6]:
import os
import pandas as pd

# Configure paths
curr = os.getcwd()
parent_dir = os.path.dirname(curr)  # Get parent directory
print(f"Current working directory: {curr}")
print(f"Parent directory: {parent_dir}")

# Input file path (one directory up)
input_file = os.path.join(parent_dir, "query_summary.csv")
output_file = os.path.join(curr, 'og_all_outputs')

print(f"Looking for input file at: {input_file}")

def append_to_file(output: str, qnum: int, filename=output_file):
    """Append formatted output to a question-specific file"""
    output_filename = filename + str(qnum) + '.txt'
    if not os.path.exists(output_filename):
        with open(output_filename, 'w', encoding='utf-8') as file:
            file.write("Test_Din Output Log\n")
            file.write("=" * 80 + "\n")
    with open(output_filename, 'a', encoding='utf-8') as file:
        file.write(output + "\n" + "=" * 80 + "\n")

def process_queries(input_file: str):
    """Process queries from CSV and generate output files"""
    try:
        df = pd.read_csv(input_file, encoding='utf-8')
        print(f"Successfully loaded {len(df)} queries from {input_file}")
        
        # Ensure these are globally accessible
        global schema_info, system_prompt, complete_user_prompts
        
        for index, row in df.iterrows():
            try:
                qnum = index + 1
                ground_truth_query = row['SQL']  # Adjust column name if needed
                
                print(f"Processing question {qnum}...")
                
                # Call compare_csv_din which now includes the column mapping
                compare_csv_din(ground_truth_query, qnum)
                
                print(f"Successfully generated log file for question {qnum}")
                
            except Exception as e:
                print(f"Error processing question {qnum}: {str(e)}")
                continue
                
    except Exception as e:
        print(f"Error reading input file: {str(e)}")
        raise

if __name__ == "__main__":
    try:
        process_queries(input_file)
        print("Processing complete!")
    except Exception as e:
        print(f"Fatal error: {str(e)}")

Current working directory: /Users/virounikamina/Desktop/PIMCO-Text2SQL/test
Parent directory: /Users/virounikamina/Desktop/PIMCO-Text2SQL
Looking for input file at: /Users/virounikamina/Desktop/PIMCO-Text2SQL/query_summary.csv
Successfully loaded 119 queries from /Users/virounikamina/Desktop/PIMCO-Text2SQL/query_summary.csv
Processing question 1...
About to start column mapping...
schema_info available: True
complete_user_prompts available: True
system_prompt available: True
Getting column mappings prompt...
Calling OpenAI model...
Parsing response...
Column mappings appended to file
Successfully generated log file for question 1
Processing question 2...
About to start column mapping...
schema_info available: True
complete_user_prompts available: True
system_prompt available: True
Getting column mappings prompt...
Calling OpenAI model...
Parsing response...
Column mappings appended to file
Successfully generated log file for question 2
Processing question 3...
About to start column map

KeyboardInterrupt: 