### openai api to use LLM

In [7]:
import json
import random
from openai import OpenAI
import os
import pandas as pd
from dotenv import load_dotenv
from typing import List, Tuple

load_dotenv()
client = OpenAI()

# load ATIS Schema
def load_schema(csv_file_path: str) -> str:
    schema_df = pd.read_csv(csv_file_path)
    schema_df.columns = [col.strip().lower() for col in schema_df.columns]

    schema_str = "Your task is to generate an SQL query based on the following database schema:\n"
    current_table = ""
    columns = []

    for _, row in schema_df.iterrows():
        table = str(row['table name']).strip()
        column = str(row['field name']).strip()

        if table in ("-", "nan") or column in ("-", "nan"):
            continue

        if table != current_table:
            if current_table:
                schema_str += f"- {current_table}({', '.join(columns)})\n"
            current_table = table
            columns = [column]
        else:
            columns.append(column)

    if current_table and columns:
        schema_str += f"- {current_table}({', '.join(columns)})\n"

    return schema_str

# load datasets
def load_generation_data(file_path: str) -> List[Tuple[str, str]]:
    data = []
    with open(file_path, 'r') as f:
        for line in f:
            ex = json.loads(line)
            data.append((ex['input'], ex['output']))
    return data

# choose few-shot/many-shot examples
def select_examples(data, num_examples):
    return random.sample(data, num_examples)

# build openai message
def build_messages(question, examples=None, schema=None):
    messages = []
    if schema:
        messages.append({"role": "system", "content": schema})
    if examples:
        for q, a in examples:
            messages.append({"role": "user", "content": q})
            messages.append({"role": "assistant", "content": a})
    messages.append({"role": "user", "content": question})
    return messages

# update
def query_openai(question, examples=None, schema=None):
    messages = build_messages(question, examples, schema)
    response = client.chat.completions.create(
        model="gpt-4o-mini",  
        messages=messages
    )
    return response.choices[0].message.content.strip()


def ask_zero_shot(question, schema_info):
    return query_openai(question, examples=None, schema=schema_info)

def ask_few_shot(question, examples, schema_info):
    return query_openai(question, examples=examples, schema=schema_info)

def ask_many_shot(question, examples, schema_info):
    return query_openai(question, examples=examples, schema=schema_info)

# evaluation
def evaluate_model(strategy_func, test_data, examples=None, schema_info=None):
    correct = 0
    total = len(test_data)
    for i, (question, gold_sql) in enumerate(test_data):
        try:
            pred_sql = strategy_func(question, examples, schema_info) if examples else strategy_func(question, schema_info)
            is_correct = pred_sql.strip().lower() == gold_sql.strip().lower()
            correct += int(is_correct)
            print(f"\n[{i + 1}/{total}]")
            print(f"Q: {question}")
            print(f"Gold: {gold_sql}")
            print(f"Pred : {pred_sql}")
            print(f"Correct: {is_correct}")
        except Exception as e:
            print(f"‚ùå Error at example {i + 1}: {e}")
    accuracy = correct / total
    print(f"\n‚úÖ Final Accuracy: {accuracy:.3f}")
    return accuracy

# ‰∏ªÂáΩÊï∞ÊâßË°å
def run_all_evaluations():
    print("=== Loading schema... ===")
    schema_info = load_schema('./sources/atis-schema.csv')

    print("\n=== Loading datasets... ===")
    train_data = load_generation_data('./datasets/generation_train.jsonl')
    test_data = load_generation_data('./datasets/generation_test.jsonl')

    print("\n=== Sampling examples... ===")
    few_shot_examples = select_examples(train_data, 5)
    many_shot_examples = select_examples(train_data, 40)

    print("\n=== üîç Evaluating Zero-shot ===")
    evaluate_model(ask_zero_shot, test_data, schema_info=schema_info)

    print("\n=== üîç Evaluating Few-shot (5) ===")
    evaluate_model(ask_few_shot, test_data, few_shot_examples, schema_info)

    print("\n=== üîç Evaluating Many-shot (40) ===")
    evaluate_model(ask_many_shot, test_data, many_shot_examples, schema_info)

if __name__ == "__main__":
    run_all_evaluations()

=== Loading schema... ===

=== Loading datasets... ===

=== Sampling examples... ===

=== üîç Evaluating Zero-shot ===

[1/447]
Q: i need a flight from DENVER to SALT LAKE CITY on monday
Gold: SELECT DISTINCT FLIGHTalias0.FLIGHT_ID FROM AIRPORT_SERVICE AS AIRPORT_SERVICEalias0 , AIRPORT_SERVICE AS AIRPORT_SERVICEalias1 , CITY AS CITYalias0 , CITY AS CITYalias1 , DATE_DAY AS DATE_DAYalias0 , DAYS AS DAYSalias0 , FLIGHT AS FLIGHTalias0 WHERE ( CITYalias0.CITY_CODE = AIRPORT_SERVICEalias0.CITY_CODE AND CITYalias0.CITY_NAME = "DENVER" AND CITYalias1.CITY_CODE = AIRPORT_SERVICEalias1.CITY_CODE AND CITYalias1.CITY_NAME = "SALT LAKE CITY" AND FLIGHTalias0.FROM_AIRPORT = AIRPORT_SERVICEalias0.AIRPORT_CODE AND FLIGHTalias0.TO_AIRPORT = AIRPORT_SERVICEalias1.AIRPORT_CODE ) AND DATE_DAYalias0.DAY_NUMBER = day_number0 AND DATE_DAYalias0.MONTH_NUMBER = month_number0 AND DATE_DAYalias0.YEAR = year0 AND DAYSalias0.DAY_NAME = DATE_DAYalias0.DAY_NAME AND FLIGHTalias0.FLIGHT_DAYS = DAYSalias0.DAYS_CODE

KeyboardInterrupt: 