In [1]:
import os, sys, json, re, time, ast
import pandas as pd
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from dotenv import load_dotenv
sys.path.append('../')
from filtering_schema.Description_base_linking import SchemaLinking

In [2]:
load_dotenv()
# Set environment variables
base_dir = "../filtering_schema"
os.environ['nsql_model_path'] = os.path.join(base_dir, 'models', 'nsql-350M')
os.environ['sentence_emb_model_path'] = os.path.join(base_dir, 'models', 'all-MiniLM-L6-v2')
os.environ['schema_description_folder_path'] = os.path.join(base_dir, 'src', 'schemas', 'column-descriptions')
os.environ['schema_data_types_folder_path'] = os.path.join(base_dir, 'src', 'schemas', 'column-datatypes')
os.environ['column_threshold'] = '0.2'
os.environ['table_threshold'] = '0.2'
os.environ['max_select_column'] = '10'
os.environ['filter_table'] = 'False'
os.environ['verbose'] = 'False'


schema_link = SchemaLinking()
schema_link.selected_domain(schema_description_folder_path=os.environ.get('schema_description_folder_path'),
                            schema_data_types_folder_path=os.environ.get('schema_data_types_folder_path'))

verbose = bool(os.environ.get('verbose').lower() == 'true')

In [3]:
model = 'gpt-3.5-turbo-1106'
pointx_nlqsql_df = pd.read_csv("../src/pointx/PointX_nlqsql_pair.csv")
pointx_nlqsql_df

Unnamed: 0,Table,Question,Actual SQL
0,pointx_keymatrix_dly,What is the total number of all financial tran...,"SELECT month_id, SUM(ntx_pointx_financial) FRO..."
1,pointx_keymatrix_dly,What is the total amount of points generated b...,SELECT SUM(amt_point_topup) FROM pointx_keymat...
2,pointx_keymatrix_dly,What is the total amount of points generated b...,"SELECT month_id, SUM(amt_point_pay) FROM point..."
3,pointx_keymatrix_dly,What is the average rate of released points fo...,SELECT AVG(rate_point_per_baht_pay) FROM point...
4,pointx_keymatrix_dly,Can you determine the average number of custom...,"SELECT month_id, AVG(ncust_visit) FROM pointx_..."
...,...,...,...
100,pointx_fbs_rpt_dly,What's the average sessions duration for users...,"SELECT event_date, AVG(time_difference_second..."
101,pointx_fbs_rpt_dly,What's the percentage of users who have used t...,SELECT (COUNT(DISTINCT CASE WHEN WEEKDAY(eve...
102,pointx_fbs_rpt_dly,How many users have performed a specific event...,SELECT COUNT(DISTINCT user_pseudo_id) FROM poi...
103,pointx_fbs_rpt_dly,How many users have opened the app on at least...,SELECT SUM(user_with_multiple_devices) AS tota...


In [4]:
with open('../filtering_schema/src/schemas/column-descriptions/pointx_keymatrix_dly_description.json') as f:
    pointx_keymatrix_dly_description = json.load(f)

with open('../filtering_schema/src/schemas/column-descriptions/pointx_fbs_rpt_dly_description.json') as f:
    pointx_fbs_rpt_dly_description = json.load(f)

In [5]:
def SQL_columns(sql_query, table_name):
    if table_name == 'pointx_keymatrix_dly':
        shema_description = pointx_keymatrix_dly_description
    elif table_name == 'pointx_fbs_rpt_dly':
        shema_description = pointx_fbs_rpt_dly_description
    used_cols = [col for col in shema_description['columns'].keys() if col in re.split(r'[()\.\,\s]+', sql_query) ] 
    return used_cols

In [6]:
def safe_divide(numerator, denominator):
    try:
        result = round(numerator / denominator,2)
    except :
        result = "ZeroDivisionError"
    return result

In [7]:
def f1_columns(actual_columns, result_columns):
    col_TP = len(set(actual_columns) & set(result_columns))
    col_FP = len(set(result_columns) - set(actual_columns))
    col_FN = len(set(actual_columns) - set(result_columns))

    if col_TP is not None and col_FN is not None and col_FP is not None:
        if len(actual_columns) == 0 : col_recall = 1
        else: col_recall = safe_divide(col_TP, col_TP + col_FN)
        col_precision = safe_divide(col_TP, col_TP + col_FP)
        
    if col_precision is not None and col_recall is not None and col_recall != "ZeroDivisionError" and col_precision != "ZeroDivisionError":
        col_f1 = 2 * safe_divide(col_precision * col_recall, col_precision + col_recall)
        if type(col_f1) == str:
            col_f1 = "ZeroDivisionError"
    else: col_f1 = "ERROR"
    
    return col_recall, col_precision, col_f1

In [8]:
pointx_keymatrix_dly_context = """
{
    "table": "pointx_keymatrix_dly",
    "description": "The Key Matrix Dashboard Design table provides a detailed overview of dashboard-related database columns, \nincluding data types, status indicators, descriptions, conditions, business logic, and sample data, \nenabling a comprehensive understanding of the data structure for effective dashboard design.",
    "columns": {
        "month_id": "Transaction month",
        "ntx_pointx_financial": "All financial transactions, both Payment, Top Up, Transfer",
        "ntx_pointx_financial_out": "Number of Payment Transaction, Transfer",
        "ncust_user": "All the number of Customer in the systemAnd can use Point XPP",
        "ncust_pointx": "Point X Customers, both Customer Type, Easy and Non Easy, not including Guest.",
        "ncust_visit": "All the number of Customer in the systemAnd come to use the point x app on that day",
        "ncust_pointx_visit": "The number of point x Customers, both Customer Type, is Easy and Non Easy, not including Guest that came to use the point x app on that day.",
        "ncust_pointx_financial": "The number of point x Customers, both Customer Type, is Easy and Non Easy, not including Guest at the financial transaction.",
        "ncust_guest": "The number of point x Customers, both Customer Type is Guest.",
        "ncust_guest_visit": "Point X Customers, both Customer Type, is a guest that came to use the point x app on that day.",
        "ncust_register_success": "New customers that Register comes each day.",
        "amt_point_topup": "The number of points caused by all topups",
        "amt_point_topup_auto": "Auto Converse Point Topup number",
        "amt_point_topup_onboard": "Point Topup number onboard",
        "amt_point_topup_onetime": "Number of Point Topup Manual",
        "amt_point_transfer_out": "The number of points that transferred to other users",
        "amt_point_pay": "The number of points caused by all payment",
        "amt_point_pay_sku": "The number of points caused by payment via X-Store.",
        "amt_point_pay_qr": "The number of points caused by payment via Scan & Pay.",
        "amt_point_pay_pyw": "The number of points caused by payment via Paywise.",
        "amt_point_pay_pyw_rbh": "The number of points caused by payment via Robinhood Platform.",
        "amt_point_pay_qr_29": "The number of points caused by payment via Scan & Pay type QR 29.",
        "amt_point_pay_qr_30": "The number of points caused by payment via Scan & Pay type QR 30.",
        "amt_point_pay_qr_cs": "The number of points caused by payment via Scan & Pay type QRCS.",
        "amt_point_payment_p_only": "The number of points caused by payment by Payment Method is the use of Point Only.",
        "amt_point_payment_p_casa": "The number of points caused by payment by Payment Method is the use of Point, plus payment by transferring money.",
        "amt_point_payment_p_cc": "The number of points caused by payment by Payment Method is to use Point, plus payment through credit cards.",
        "rate_point_per_baht_pay": "The average of the Relased Rate at all Payment customers.",
        "rate_point_per_baht_pay_sku": "The average rate of the customer Payment via X-Store (Weigthed Average with Point AMOUNT)",
        "rate_point_per_baht_pay_qr": "The rate of the Rate that Payment customers via Scan & Pay (Weigthed Average with Point Amount paid).",
        "rate_point_per_baht_pay_pyw": "The average rate of the Payment customers via Paywise (Weighted Average with Point Amount paid).",
        "rate_point_per_baht_pay_pyw_rbh": "The average rate of the Payment customers via Robinhood Platform (Weigthed Average with Point AMOUNT)",
        "rate_point_per_baht_pay_qr_29": "The average rate of the customer Payment with Transaction Type type QR 29 (Weigthed Average with Point AMOUNT)",
        "rate_point_per_baht_pay_qr_30": "The average rate of the customer Payment with Transaction Type type QR 30 (Weigthed Average with Point AMOUNT)",
        "rate_point_per_baht_pay_qr_cs": "The average rate of the customer Payment with Transaction Type type QRCS (Weigted Average with Point Amount paid)",
        "rate_point_per_baht_pay_weight": "The average of the Relased Rate at all Payment customers.",
        "rate_point_per_baht_pay_sku_weight": "The average rate of the customer Payment via X-Store (Weigthed Average with Point AMOUNT)",
        "rate_point_per_baht_pay_qr_weight": "The rate of the Rate that Payment customers via Scan & Pay (Weigthed Average with Point Amount paid).",
        "rate_point_per_baht_pay_pyw_weight": "The average rate of the Payment customers via Paywise (Weighted Average with Point Amount paid).",
        "rate_point_per_baht_pay_pyw_rbh_weight": "The average rate of the Payment customers via Robinhood Platform (Weigthed Average with Point AMOUNT)",
        "rate_point_per_baht_pay_qr_29_weight": "The average rate of the customer Payment with Transaction Type type QR 29 (Weigthed Average with Point AMOUNT)",
        "rate_point_per_baht_pay_qr_30_weight": "The average rate of the customer Payment with Transaction Type type QR 30 (Weigthed Average with Point AMOUNT)",
        "rate_point_per_baht_pay_qr_cs_weight": "The average rate of the customer Payment with Transaction Type type QRCS (Weigted Average with Point Amount paid)",
        "rate_baht_per_point_pay": "The average of the Cost Per Point at all Payment customers (Weigted Average with Point Amount paid)",
        "rate_baht_per_point_pay_sku": "The average of the Cost Per Point that the Payment customers via X-Store (Weigthed Average with Point AMOUNT).",
        "rate_baht_per_point_pay_qr": "The average Cost Per Point that Payment customers via Scan & Pay (Weigthed Average with Point Amount paid)",
        "rate_baht_per_point_pay_pyw": "The average of the Cost Per Point that the Paywise (Weigthed Average with Point AMOUNT) is calculated from 1 divided by release rate.",
        "rate_baht_per_point_pay_pyw_rbh": "The average of the Cost Per Point that Payment customers via Robinhood Platform (Weigthed Average with Point AMOUNT)",
        "rate_baht_per_point_pay_qr_29": "The average of Cost Per Point at Payment customers with Transaction Type type QR 29 (Weigthed Average with Point AMOUNT)",
        "rate_baht_per_point_pay_qr_30": "The average of Cost Per Point at Payment customers with Transaction Type type QR 30 (Weigthed Average with Point AMOUNT)",
        "rate_baht_per_point_pay_qr_cs": "The average of Cost Per Point at Payment customers with Transaction Type type QRCS (Weigthed Average with Point AMOUNT)",
        "rate_baht_per_point_pay_weight": "The average of the Cost Per Point at all Payment customers (Weigted Average with Point Amount paid)",
        "rate_baht_per_point_pay_sku_weight": "The average of the Cost Per Point that the Payment customers via X-Store (Weigthed Average with Point AMOUNT).",
        "rate_baht_per_point_pay_qr_weight": "The average of the Cost Per Point at Payment customers via Scan & Pay (Weigthed Average with Point Amount paid).",
        "rate_baht_per_point_pay_pyw_weight": "The average of the Cost Per Point that the Paywise (Weigthed Average with Point AMOUNT) is calculated from 1 divided by release rate.",
        "rate_baht_per_point_pay_pyw_rbh_weight": "The average Cost Per Point that Payment customers via Robinhood Platform (Weigthed Average with Point Amount paid)",
        "rate_baht_per_point_pay_qr_29_weight": "The average of the Cost Per Point at Payment customers with Transaction Type type QR 29 (Weigthed Average with Point AMOUNT)",
        "rate_baht_per_point_pay_qr_30_weight": "The average of Cost Per Point at Payment customers with Transaction Type type QR 30 (Weigthed Average with Point AMOUNT)",
        "rate_baht_per_point_pay_qr_cs_weight": "The average of Cost Per Point at Payment customers with Transaction Type type QRCS (Weigthed Average with Point AMOUNT)",
        "n_topup_point": "The number of transactions caused by all topups.",
        "n_topup_point_onboard": "Number of Transaction Topup Episode Onboard",
        "n_topup_point_onetime": "Number of Transaction Topup Manual",
        "n_topup_point_auto": "Auto converse transaction top",
        "n_transfer_point_out": "The number of transactions that transferred to other users",
        "n_purchase": "The number of transactions caused by payment does not include Reverse Scan & Pay (2002A), Reverse Paywise (2004A) and Cancel Order (7015A) of X-Store.",
        "n_purchase_sku": "The number of transactions caused by payment via X-Store does not include the Cancel Order (7015A) of X-Store.",
        "n_purchase_qr": "The number of transactions caused by payment via Scan & Pay does not include Reverse Scan & Pay (2002A).",
        "n_purchase_pyw": "The number of transactions caused by payment via Paywise does not include Reverse Paywise (2004A).",
        "n_purchase_pyw_rbh": "The number of transactions caused by payment via Robinhood Platform does not include Reverse Scan & Pay (2002A).",
        "n_purchase_qr_29": "The number of transactions caused by payment via Scan & Pay with Transaction Type type QR 29, not including Reverse Scan & Pay (2002A).",
        "n_purchase_qr_30": "The number of transactions caused by payment via Scan & Pay with Transaction Type QR 30, not including Reverse Scan & Pay (2002A).",
        "n_purchase_qr_cs": "The number of transactions caused by payment via Scan & Pay with Transaction Type QRCS excluding Reverse Scan & Pay (2002A).",
        "n_point_payment_p_only": "The number of transactions caused by payment by Payment Method is to use Point Only, not including Reverse Scan & Pay (2002A), Reverse Paywise (2004A) and Cancel Order (7015A) of X-Store.",
        "n_point_payment_p_casa": "The number of transactions caused by payment by Payment Method is to use POINT, plus the payment of accounting does not include the Reverse Scan & Pay (2002A), Reverse Paywise (2004A) and Cancel (7015A) of X-Store.",
        "n_point_payment_p_cc": "The number of transactions caused by payment by Payment Method is to use POINT, combined with credit card payments, not including Reverse Scan & Pay (2002A), Reverse Paywise (2004A) and Cancel Order (7015A) of X-STORE.",
        "mtd1_ncust_user": "All the number of Customer in the systemAnd can use Point XPP",
        "mtd1_ncust_pointx": "Point X Customers, both Customer Type, Easy and Non Easy, not including Guest.",
        "mtd1_ncust_visit": "All the number of Customer in the systemAnd use Point XPP from 1 of the month to the latest transaction information",
        "mtd1_ncust_pointx_visit": "The number of point x Customers, both Customer Type, is Easy and Non Easy, not including the Guest that has been active from the 1st day of the month to the latest transaction data.",
        "mtd1_ncust_pointx_financial": "The number of point x Customers, both Customer Type, is Easy and Non Easy, not including the Guest at the financial transaction (broken program at Reverse) from 1 of the month to the latest transaction data.",
        "mtd1_ncust_guest": "The number of point x Customers, both Customer Type is Guest.",
        "mtd1_ncust_guest_visit": "The number of point x Customers, both Customer Type, is a guest that comes to use Point XP from the 1st day to the latest transaction data.",
        "mtd1_amt_point_topup": "The number of points caused by topups from 1 of the month to the latest transaction day.",
        "mtd1_amt_point_topup_auto": "Auto Converse Point Topup from 1 of the month until the latest transaction",
        "mtd1_amt_point_topup_onboard": "Point Topup number onboard from 1 of the month until the latest transaction",
        "mtd1_amt_point_topup_onetime": "Manual number points from 1 of the month until the latest transaction",
        "mtd1_amt_point_transfer_out": "The number of points that transferred to other users from 1 of the month until the latest transaction",
        "mtd1_amt_point_pay": "The number of points caused by Payment from the 1st day of the month until the latest transaction.",
        "mtd1_amt_point_pay_sku": "The number of points caused by payment via X-Store from 1 of the month until the latest transaction day.",
        "mtd1_amt_point_pay_qr": "The number of points caused by payment via Scan & Pay from 1 of the month until the latest transaction.",
        "mtd1_amt_point_pay_pyw": "The number of points caused by payment via Paywise from the 1st day of the month until the latest transaction.",
        "mtd1_amt_point_pay_pyw_rbh": "The number of points caused by payment via Robinhood Platform from the 1st day of the month until the latest transaction.",
        "mtd1_amt_point_pay_qr_29": "The number of points caused by payment via Scan & Pay with Transaction Type type QR 29 from the 1st day of the month to the latest transaction.",
        "mtd1_amt_point_pay_qr_30": "The number of points caused by payment via Scan & Pay with Transaction Type QR 30 from 1 day of the month until the latest transaction.",
        "mtd1_amt_point_pay_qr_cs": "The number of points caused by payment via Scan & Pay with Transaction Type QRCS from 1 day of the month until the latest transaction.",
        "mtd1_amt_point_payment_p_only": "The number of points caused by payment by Payment Method is the use of Point Only from 1 day of the month until the latest transaction.",
        "mtd1_amt_point_payment_p_casa": "The number of points caused by payment by Payment Method is the use of Point, plus payment by transferring money.From the 1st day of the month until the latest transaction",
        "mtd1_amt_point_payment_p_cc": "The number of points caused by payment by Payment Method is to use Point, plus payment through credit cards.From the 1st day of the month until the latest transaction",
        "mtd1_rate_point_per_baht_pay": "The average rate of all the customers from 1 of the month until the latest transaction",
        "mtd1_rate_point_per_baht_pay_sku": "The rate of the Rate that Payment customers via X-Store from 1 of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_qr": "The rate of the Rate that Payment customers via Scan & Pay from 1 of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_pyw": "The rate of the Rate that Payment customers via Paywise from 1 of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_pyw_rbh": "The rate of the Rate that Payment customers via Robinhood Platform from 1 of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_qr_29": "The average rate of the customer Payment with Transaction Type type QR 29 from 1 day of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_qr_30": "The average rate of the customer Payment with Transaction Type type QR 30 from 1 day of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_qr_cs": "The average rate of the customer Payment with Transaction Type type QRCS from the 1st day of the month to the latest transaction.",
        "mtd1_rate_point_per_baht_pay_weight": "The average rate of all Payment customers (Weigthed Average with Point AMOUNT) from 1 day of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_sku_weight": "The average rate of the customer Payment via X-Store (Weigthed Average with Point AMOUNT) from 1 day of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_qr_weight": "The rate of the Rate that Payment customers via Scan & Pay (Weigted Average with Point Amount paid from 1 day of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_pyw_weight": "The average rate of the Payment customers via Paywise (Weigthed Average with Point AMOUNT) from 1 day of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_pyw_rbh_weight": "The average rate of the Payment customers via Robinhood Platform (Weigthed Average with Point AMOUNT) from 1 day of the month to the latest transaction.",
        "mtd1_rate_point_per_baht_pay_qr_29_weight": "The average rate of the customer Payment with Transaction Type type QR 29 (Weigthed Average with Point Amount paid from 1 day of the month to the latest transaction.",
        "mtd1_rate_point_per_baht_pay_qr_30_weight": "The average rate of the customer Payment with Transaction Type type QR 30 (Weigthed Average with Point Amount paid from 1 day of the month until the latest transaction.",
        "mtd1_rate_point_per_baht_pay_qr_cs_weight": "The rate of the Rate that the customer payment with the type of transaction type QRCS (Weigted Average with Point Amount paid from 1 day of the month to the latest transaction date.",
        "mtd1_rate_baht_per_point_pay": "The average of the Cost Per Point at all Payment customers from 1 of the month until the latest transaction day.",
        "mtd1_rate_baht_per_point_pay_sku": "The Cost Per Point, the Payment customers via X-Store from 1 of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_qr": "The average of Cost Per Point at Payment customers via Scan & Pay from 1 of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_pyw": "The average of the Cost Per Point that Payment customers via Paywise from 1 day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_pyw_rbh": "The average of the Cost Per Point that Payment customers via Robinhood Platform from 1 day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_qr_29": "The average of Cost Per Point at Payment customers with Transaction Type type QR 29 from the 1st day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_qr_30": "The average of Cost Per Point at Payment customers with Transaction Type type QR 30 from the 1st day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_qr_cs": "The average of Cost Per Point at Payment customers with Transaction Type type QRCS from the 1st day of the month to the latest transaction.",
        "mtd1_rate_baht_per_point_pay_weight": "The average of the Cost Per Point at all Payment customers (Weigted Average with Point Amount paid from 1 day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_sku_weight": "The average of the Cost Per Point that the Payment customers via X-Store (Weigthed Average with Point AMOUNT) from 1 day of the month to the latest transaction.",
        "mtd1_rate_baht_per_point_pay_qr_weight": "The average of Cost Per Point at Payment customers via Scan & Pay (Weigted Average with Point Amount paid from 1 day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_pyw_weight": "The average of the Cost Per Point at Paywise (Weigthed Average with Point AMOUNT) from 1 day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_pyw_rbh_weight": "The average of the Cost Per Point that Payment customers via Robinhood Platform (Weigthed Average with Point AMOUNT) from 1 day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_qr_29_weight": "The average of Cost Per Point at Payment Customers with Transaction Type type QR 29 (Weigted Average with Point AMOUNT) from 1 day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_qr_30_weight": "The average of the Cost Per Point at Payment Customers with Transaction Type type QR 30 (Weigted Average with Point AMOUNT) from 1 day of the month until the latest transaction.",
        "mtd1_rate_baht_per_point_pay_qr_cs_weight": "The average of Cost Per Point at Payment Customers with Transaction Type type QRCS (Weigthed Average with Point Amount paid from 1 day of the month until the latest transaction.",
        "mtd1_n_topup_point": "The number of transactions caused by topups from 1 of the month to the latest transaction day.",
        "mtd1_n_topup_point_onboard": "The number of Transaction Topup onboard from 1 of the month until the latest transaction",
        "mtd1_n_topup_point_onetime": "Number of Transaction Topup Manual from 1 of the month until the latest transaction",
        "mtd1_n_topup_point_auto": "Auto converse transaction topg from 1 of the month until the latest transaction",
        "mtd1_n_transfer_point_out": "The number of transactions that transferred to other users from 1 of the month until the latest transaction",
        "mtd1_n_purchase": "The number of transactions caused by payment from 1 day of the month to the latest transaction.",
        "mtd1_n_purchase_sku": "The number of transactions caused by payment via X-Store from 1 of the month until the latest transaction.",
        "mtd1_n_purchase_qr": "The number of transactions caused by payment via Scan & Pay from the 1st day of the month until the latest transaction.",
        "mtd1_n_purchase_pyw": "The number of transactions caused by payment via Paywise from the 1st day of the month until the latest transaction.",
        "mtd1_n_purchase_pyw_rbh": "The number of transactions caused by payment via Robinhood Platform from 1 day of the month until the latest transaction.",
        "mtd1_n_purchase_qr_29": "The number of transactions caused by payment via Scan & Pay with Transaction Type type QR 29 from the 1st day of the month to the latest transaction.",
        "mtd1_n_purchase_qr_30": "The number of transactions caused by payment via Scan & Pay with Transaction Type QR 30 from 1 day of the month until the latest transaction.",
        "mtd1_n_purchase_qr_cs": "The number of transactions caused by payment via Scan & Pay with Transaction Type QRCS from 1 day of the month until the latest transaction.",
        "mtd1_n_point_payment_p_only": "The number of transactions caused by payment by Payment Method is the use of Point Only from the 1st day of the month until the latest transaction.",
        "mtd1_n_point_payment_p_casa": "The number of transactions caused by payment by Payment Method is the use of Point, plus payment by transferring money.From the 1st day of the month until the latest transaction",
        "mtd1_n_point_payment_p_cc": "The number of transactions caused by payment by Payment Method is to use Point, plus payment through credit cards.From the 1st day of the month until the latest transaction",
        "amt_point_topup_auto_cardx": "Point Topup number onboard of Cardx customers",
        "amt_point_topup_auto_wealth": "Point Topup number onboard of wealth customers",
        "amt_point_topup_extnl": "Point Topup number via External Partner",
        "mtd1_amt_point_topup_auto_cardx": "The Auto Converse Point Topup number of Cardx customers from 1 of the month until the latest transaction.",
        "mtd1_amt_point_topup_auto_wealth": "The Auto Converse Point Topup number of Wealth customers from 1 of the month until the latest transaction.",
        "mtd1_amt_point_topup_extnl": "Point Topup number via partner from 1 of the month until the latest transaction day",
        "mtd1_n_topup_point_auto_cardx": "The number of transactions caused by the Auto Converse top of the Cardx customer group from 1 of the month until the latest transaction.",
        "mtd1_n_topup_point_auto_wealth": "The number of transactions caused by the Auto Converse top of the Wealth customer group from 1 of the month until the latest transaction.",
        "mtd1_n_topup_point_extnl": "The number of transactions caused by the auto converse top via partner from the 1st day of the month to the latest transaction.",
        "mtd1_ncust_partner": "The number of users obtained from partner companiesCollecting the 1st day of the month",
        "mtd1_ncust_partner_new": "The number of users that register comes from partner companies.Collecting the 1st day of the month",
        "n_topup_point_auto_cardx": "The number of Transaction Topup Auto Converse of Cardx customers",
        "n_topup_point_auto_wealth": "The number of Transaction Topup Auto Converse of Wealth Customers",
        "n_topup_point_extnl": "Number of Transaction Topup via External Partner",
        "ncust_partner": "The number of users obtained from the partner company that day",
        "ncust_partner_new": "The number of users that register came from the partner company that day.",
        "n_transfer_point_out_extnl": "The number of transactions that transferred to other users via External Partner.",
        "mtd1_n_transfer_point_out_extnl": "The number of transactions transferred to other users via Partner from the 1st day until the latest transaction.",
        "amt_point_transfer_out_extnl": "The number of points that transferred to other User via External Partner",
        "mtd1_amt_point_transfer_out_extnl": "The number of points that transferred to other users via Partner from 1 of the month until the latest transaction.",
        "_dl_load_ts": "Data download date",
        "_date": "Transaction date"
}
"""

In [9]:
pointx_fbs_rpt_dly_context = """
{
    "table": "pointx_fbs_rpt_dly",
    "description": "Table records user interactions with the PointX app daily, capturing events such as app opens and deletions, \nproviding key insights into user behavior, app version usage, and device characteristics ",
    "columns": {
        "event_date": "The date on which the event was logged (YYYYMMDD format in the registered timezone of your app).",
        "event_month": "The year month on which the event was logged (YYYY-MM format).",
        "event_bundle_sequence_id": "The sequential ID of the bundle in which these events were uploaded.",
        "event_timestamp": "The time (in microseconds, UTC) at which the event was logged on the client.",
        "event_name": "The name of the event activity that occurred from the user's use.",
        "customer_id": "customer identification same values as pointx id in application",
        "user_pseudo_id": "The pseudonymous id (e.g., app instance ID) for the user.",
        "user_id": "The user ID set via the setUserId API.",
        "event_previous_timestamp": "The time (in microseconds, UTC) at which the event was previously logged on the client.",
        "event_value_in_usd": "The currency-converted value (in USD) of the event's \"value\" parameter.",
        "event_server_timestamp_offset": "Timestamp offset between collection time and upload time in micros.",
        "privacy_info_analytics_storage": "Whether Analytics storage is enabled for the user.",
        "privacy_info_ads_storage": "Whether ad targeting is enabled for a user.",
        "privacy_info_uses_transient_token": "Whether a web user has denied Analytics storage and the developer has enabled measurement without cookies based on transient tokens in server data.",
        "user_properties_ga_session_number": "Session number identifies the number of sessions that a user has started up to the current session (e.g., a user's third or fifth session on your site).",
        "user_properties_ga_session_number_set_timestamp_micros": "Timestamp of sessions that a user has started up to the current session (e.g., a user's third or fifth session on your site).",
        "user_properties_ga_session_id": "Session ID identifies the session that an event came from. For example, two different session IDs are generated when a user has two separate sessions on your site.",
        "user_properties_ga_session_id_set_timestamp_micros": "Timestamp that an event came from. For example, two different session IDs are generated when a user has two separate sessions on your site.",
        "user_properties_first_open_time": "Timestamp of user's first open",
        "user_properties_first_open_time_set_timestamp_micros": "Timestamp of user's first open in micros timestamp",
        "user_first_touch_timestamp": "Timestamp of user's first touch",
        "user_ltv_revenue": "The Lifetime Value (revenue) of the user. This field is not populated in intraday tables.",
        "user_ltv_currency": "The Lifetime Value (currency) of the user. This field is not populated in intraday tables.",
        "device_category": "The device category (mobile, tablet, desktop).",
        "device_mobile_brand_name": "The device brand name.",
        "device_mobile_model_name": "The device model name.",
        "device_mobile_marketing_name": "The device marketing name.",
        "device_mobile_os_hardware_model": "The device model information retrieved directly from the operating system.",
        "device_operating_system": "The operating system of the device.",
        "device_operating_system_version": "The OS version.",
        "device_vendor_id": "IDFV (present only if IDFA is not collected).",
        "device_advertising_id": "Advertising ID/IDFA.",
        "device_language": "The OS language.",
        "device_is_limited_ad_tracking": "The device's Limit Ad Tracking setting.",
        "device_time_zone_offset_seconds": "The offset from GMT in seconds.",
        "device_browser": "The browser in which the user viewed content.",
        "device_browser_version": "The version of the browser in which the user viewed content.",
        "device_web_info_browser": "The browser in which the user viewed content",
        "device_web_info_browser_version": "The version of the browser in which the user viewed content.",
        "device_web_info_hostname": "The hostname associated with the logged event.",
        "geo_continent": "The continent from which events were reported, based on IP address.",
        "geo_country": "The country from which events were reported, based on IP address.",
        "geo_region": "The region from which events were reported, based on IP address.",
        "geo_city": "The city from which events were reported, based on IP address.",
        "geo_sub_continent": "The subcontinent from which events were reported, based on IP address.",
        "geo_metro": "The metro from which events were reported, based on IP address.",
        "app_info_id": "The package name or bundle ID of the app.",
        "app_info_version": "The app's versionName (Android) or short bundle version.",
        "app_info_install_store": "The store that installed the app.",
        "app_info_firebase_app_id": "The Firebase App ID associated with the app",
        "app_info_install_source": "The source that installed the app.",
        "traffic_source_name": "Name of the marketing campaign that first acquired the user. This field is not populated in intraday tables.",
        "traffic_source_medium": "Name of the medium (paid search, organic search, email, etc.) that first acquired the user. This field is not populated in intraday tables.",
        "traffic_source_source": "Name of the network that first acquired the user. This field is not populated in intraday tables.",
        "stream_id": "The numeric ID of the stream.",
        "platform": "The platform on which the app was built.",
        "event_dimensions_hostname": "Includes the subdomain and domain names of a URL; for example, the Host Name of www.example.com/contact.html is www.example.com.",
        "ecommerce": "A record of information about ecommerce. (Currenntly collect as string)",
        "items": "A repeated record of items included in this event. (Currenntly collect as string)",
        "source_date": "Source date of firebase file name",
        "address_id": "Delivery address identification",
        "auto_earn_display": "In case of event delete account the values of auto_earn flag",
        "banner_description": "Highlight banner description",
        "banner_rank": "Highlight banner item index",
        "banner_title": "Highlight banner title",
        "campaign": "Firebase's campagin name",
        "campaign_info_source": "Firebase's campagin source information",
        "card_sub_product": "Possible Values : SCB Beyond , SCB Toyota platinum",
        "change_language": "Possible Values : EN, TH",
        "coupon_id": "Coupon Identification Number",
        "customer_device_lat": "Customer's device latitude",
        "customer_device_long": "Customer's device longtitude",
        "customer_lat": "Customer's latitude",
        "customer_long": "Customer's longtitude",
        "customer_type": "Customer Type e.g. guest, N/A",
        "deal_title": "Deal of the day title",
        "deal_type": "Deal of the day type",
        "debug_event": "Debug event",
        "deleteacount_button": "Delete account button",
        "delivery_address": "Delivery address ",
        "delivery_fee": "Delivery fee e.g. Fee ",
        "delivery_option": "Delivery option e.g. Standard Shipping ",
        "delivery_type": "Delivery type e.g. Standard Shipping ",
        "e_coupon_display": "e coupon display",
        "each_point_card": "Point in each credit cards, This value in array format",
        "ecatalog_list": "ecatalog list",
        "ecatalog_rank": "ecatalog rank",
        "ecoupon_rank": "ecoupon rank",
        "ecoupon_title": "ecoupon title",
        "engaged_session_event": "Engaged session event",
        "engagement_time_msec": "Engagement time millisecond",
        "entrances": "Number of entrance",
        "error_message": "Error message",
        "error_value": "Error value",
        "event_id": "Event Identification",
        "fatal": "Fatal",
        "firebase_conversion": "Firebase conversion",
        "firebase_error": "Firebase error",
        "firebase_event_origin": "Firebase event origin",
        "firebase_previous_class": "Firebase previous class",
        "firebase_previous_id": "Firebase previous identification",
        "firebase_previous_screen": "Firebase previous screen",
        "firebase_screen": "Firebase screen",
        "firebase_screen_class": "Firebase screen claass",
        "firebase_screen_id": "Firebase screen identification",
        "flashdeals_rank": "Flash deals rank",
        "flashdeals_title": "Flash deals title",
        "from_customer_name": "Full customer name who transferred point to another customer",
        "from_customer_profile_name": "Customer profile name in application  who transferred point to another customer",
        "ga_session_id": "Session Identification",
        "ga_session_number": "Sesion Number",
        "id": "Category Identification",
        "ignore_referrer": "Ignore referer",
        "item_code": "Item code",
        "latitude": "Latitude",
        "longitude": "Longitude",
        "link_classes": "Link classes",
        "link_domain": "Link domain",
        "link_url": "Link url",
        "list_card_sub_product": "List card sub product",
        "list_each_point_card": "List each point card",
        "medium": "Medium",
        "merchant_id": "Merchant id",
        "message_type": "Message type",
        "offer_type": "Offer type",
        "order_id": "Order id",
        "order_status": "Order status",
        "outbound": "Outbound",
        "page": "Page",
        "page_location": "Page location",
        "page_referrer": "Page referrer",
        "page_title": "Page title",
        "payment_method": "Payment method",
        "percent_scrolled": "Percent scrolled",
        "place_id": "Place id",
        "place_lat": "Place lat",
        "place_long": "Place long",
        "place_name": "Place name",
        "point_balance_display": "Point balance display",
        "points": "Points",
        "points_per_unit": "Points per unit",
        "points_remaining": "Points remaining",
        "previous_app_version": "Previous app version",
        "previous_first_open_count": "Previous first open count",
        "previous_os_version": "Previous os version",
        "primary_address": "Primary address",
        "product_id": "Product id",
        "quantity": "Quantity",
        "reason_id": "Reason id",
        "recommendedForYou_rank": "RecommendedForYou rank",
        "search_list_id": "Search list id",
        "search_list_id_scan_and_pay": "Search list id scan and pay",
        "search_list_id_xstore": "Search list id xstore",
        "session_engaged": "Session engaged",
        "shop_list_id": "Shop list id",
        "sku_catagory_name": "Sku catagory name",
        "sku_group_type": "Sku group type",
        "sku_id": "The SKU (Stock Keeping Unit) ID serves as a unique identifier assigned to a specific product variant, enabling efficient inventory management and tracking within a retail or e-commerce system.",
        "source": "Source",
        "source_page_name": "Source page name",
        "stock_code": "Stock code",
        "system_app": "System app",
        "system_app_update": "System app update",
        "tab_name": "Tab name",
        "term": "Term",
        "text_search": "Text search",
        "timestamp": "Timestamp",
        "to_customer_name": "To customer name",
        "to_customer_profile_name": "To customer profile name",
        "toggle": "Toggle",
        "total_amount": "Total amount",
        "total_point": "Total point",
        "total_points": "Total points",
        "transaction_id": "Transaction id",
        "transaction_status": "Transaction status",
        "transaction_type": "Transaction type",
        "unable_to_proceed": "Unable to proceed",
        "update_with_analytics": "Update with analytics",
        "client_code": "Client code",
        "client_member_id": "Client member id",
        "_dl_load_ts": "Date of data loading",
        "_date": "Transaction's occurrence date"
    }
}"""

In [10]:
llm_schemalink_template = """Your task is to select the columns related to the question to create the sql query in the next step.
Please selected related columns following by the question from this schema.
Answer the list of columns that you think are relevant and sufficient to create sql.
Example result : ['column1', 'column3']
{domain_knowledge}

Question: {question}
Result:"""

schemalink_prompt = PromptTemplate(
        input_variables=["domain_knowledge","question"],
        template = llm_schemalink_template                      
)

schemalink_chain = LLMChain(
        llm=ChatOpenAI(temperature=0, model=model, request_timeout=120),
        prompt=schemalink_prompt,
        output_key="result"
)

In [None]:
measurement_data = {
    "Question" : [],
    "Actual result" : [],
    "Schemalink result" : [],
    "LLM result" : [],
    "SL recall" : [],
    "SL precision" : [],
    "SL f1" : [],
    "LLM recall" : [],
    "LLM precision" : [],
    "LLM f1" : []
}
for i, row in pointx_nlqsql_df.iterrows():
    
    if row['Table'] == 'pointx_keymatrix_dly':
        domain_context = pointx_keymatrix_dly_context
    elif row['Table'] == 'pointx_fbs_rpt_dly':
        domain_context = pointx_fbs_rpt_dly_context

    actual_columns = SQL_columns(row['Actual SQL'], row['Table'])
    module_result = list(schema_link.filter_schema(row['Question'])[row['Table']].keys())
    llm_result = ast.literal_eval(schemalink_chain({"domain_knowledge":domain_context,"question":row['Question']})['result'])

    module_recall, module_precision, module_f1 = f1_columns(actual_columns, module_result)
    llm_recall, llm_precision, llm_f1 = f1_columns(actual_columns, llm_result)

    measurement_data['Question'].append(row['Question'])
    measurement_data["Actual result"].append(actual_columns)
    measurement_data["Schemalink result"].append(module_result)
    measurement_data["LLM result"].append(llm_result)
    measurement_data["SL recall"].append(module_recall)
    measurement_data["SL precision"].append(module_precision)
    measurement_data["SL f1"].append(module_f1)
    measurement_data['LLM recall'].append(llm_recall)
    measurement_data['LLM precision'].append(llm_precision)
    measurement_data['LLM f1'].append(llm_f1)

    print(actual_columns)
    print(module_result)
    # print(module_recall, module_precision, module_f1)
    print(llm_result)
    # print(llm_recall, llm_precision, llm_f1)
    print()

    if not i % 5 and i > 1:
        print(f"Complete {i} of {pointx_nlqsql_df.shape[0]}")
        time.sleep(60)
    

In [None]:
measurement_df = pd.DataFrame(measurement_data)
merged_df = pd.merge(pointx_nlqsql_df, measurement_df, on='Question', how='outer')

merged_df.to_excel('EXP_LLM_schemalink.xlsx', index=False)

# splite list of question by schema

In [24]:
llm_schemalink_loq_template = """Your task is to select the columns related to the question to create the sql query in the next step.
Please selected related columns following by the question from this schema.
Answer the list of columns that you think are relevant and sufficient to create sql.
The input will be a list of questions, and the answers you give will be a list of columns that list the answers to each question in order.

Example List of questions : ['question1', 'question2', 'question3', 'question4']
Example Result : [['column1', 'column3'], ['column2', 'column4', 'column5'], ['column1'], ['column6', 'column3']]

Schema: {domain_knowledge}

List of questions: {list_of_questions}
Result:"""

schemalink_loq_prompt = PromptTemplate(
        input_variables=["domain_knowledge","list_of_questions"],
        template = llm_schemalink_loq_template                      
)

schemalink_chain = LLMChain(
        llm=ChatOpenAI(temperature=0, model=model, request_timeout=120),
        prompt=schemalink_loq_prompt,
        output_key="result"
)

In [66]:
def dict_append_val(d, table, list_of_questions, list_of_actual_columns):
    if table not in d:
        d[table] = {'loq' : [], 'loc' : [] }

    d[table]['loq'].extend(list_of_questions)
    d[table]['loc'].extend(list_of_actual_columns)
    return d

In [67]:
measurement_data = {
    "Question" : [],
    "Actual result" : [],
    "Schemalink result" : [],
    "LLM result" : [],
    "SL recall" : [],
    "SL precision" : [],
    "SL f1" : [],
    "LLM recall" : [],
    "LLM precision" : [],
    "LLM f1" : []
}

llm_loq = list()
list_of_actualcols = list()
table_loq_dict = dict()

for i, row in pointx_nlqsql_df.iterrows():

    # first time
    if not i : temp_domain = row['Table']

    actual_columns = SQL_columns(row['Actual SQL'], row['Table'])
    module_result = list(schema_link.filter_schema(row['Question'])[row['Table']].keys())
    module_recall, module_precision, module_f1 = f1_columns(actual_columns, module_result)
    
    if temp_domain != row['Table']:
        table_loq_dict = dict_append_val(table_loq_dict, temp_domain, llm_loq, list_of_actualcols)
        temp_domain = row['Table']
        list_of_actualcols = [actual_columns]
        llm_loq = [row['Question']]
    else:
        llm_loq.append(row['Question'])
        list_of_actualcols.append(actual_columns)

    if not (pointx_nlqsql_df.shape[0] - 1 - i):   # last record
        table_loq_dict = dict_append_val(table_loq_dict, temp_domain, llm_loq, list_of_actualcols)

    measurement_data['Question'].append(row['Question'])
    measurement_data["Actual result"].append(actual_columns)
    measurement_data["Schemalink result"].append(module_result)
    measurement_data["SL recall"].append(module_recall)
    measurement_data["SL precision"].append(module_precision)
    measurement_data["SL f1"].append(module_f1)
    # print(actual_columns)
    # print(module_result)
    

Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> points
Column matching  ----> engagement_time_msec


In [47]:
def split_list(input_list, chunk_size):
    for i in range(0, len(input_list), chunk_size):
        yield input_list[i:i + chunk_size]

In [74]:
for table, list_of_value in table_loq_dict.items():
    if table == 'pointx_keymatrix_dly':
        domain_context = pointx_keymatrix_dly_context
    elif table == 'pointx_fbs_rpt_dly':
        domain_context = pointx_fbs_rpt_dly_context
    loq = list_of_value.get('loq')
    loc = list_of_value.get('loc')

    chunk_size = 10
    loq_chunks = list(split_list(loq, chunk_size))
    
    for loq_chunk in loq_chunks:
        str_list_of_result = schemalink_chain({"domain_knowledge": domain_context, "list_of_questions": str(loq_chunk)})
        llm_result = ast.literal_eval(str_list_of_result['result'])
        
        for i in range(len(llm_result)):
            print(loq_chunk[i],'\n', llm_result[i],'\n', loc[i],'\n')
            llm_recall, llm_precision, llm_f1 = f1_columns(loc[i], llm_result[i])
            measurement_data["LLM result"].append(llm_result[i])
            measurement_data['LLM recall'].append(llm_recall)
            measurement_data['LLM precision'].append(llm_precision)
            measurement_data['LLM f1'].append(llm_f1)
            
        time.sleep(10)
    print(loq_chunks)

What is the total number of all financial transactions for each month? 
 ['month_id', 'ntx_pointx_financial'] 
 ['month_id', 'ntx_pointx_financial'] 

What is the total amount of points generated by all top-up transactions in August 2022? 
 ['month_id', 'amt_point_topup'] 
 ['month_id', 'amt_point_topup'] 

What is the total amount of points generated by all payment transactions for each month in 2022? 
 ['month_id', 'amt_point_pay'] 
 ['month_id', 'amt_point_pay'] 

What is the average rate of released points for all payment customers? 
 ['rate_point_per_baht_pay'] 
 ['rate_point_per_baht_pay'] 

Can you determine the average number of customers who visit and use the pointX app each month? 
 ['month_id', 'ncust_visit'] 
 ['month_id', 'ncust_visit'] 

What is the total number of transactions caused by all top-ups in 2022? 
 ['month_id', 'n_topup_point'] 
 ['month_id', 'n_topup_point'] 

Total transactions are caused by payment of X-store. 
 ['month_id', 'n_purchase_sku'] 
 ['n_purchase

In [75]:
measurement_df = pd.DataFrame(measurement_data)
merged_df = pd.merge(pointx_nlqsql_df, measurement_df, on='Question', how='outer')

# merged_df.to_excel('EXP_LLM_schemalink.xlsx', index=False)

### Score re-check

In [77]:
chech_df = pd.read_excel("EXP_LLM_schemalink.xlsx")
chech_df.head()

Unnamed: 0,Table,Question,Actual SQL,Actual result,Schemalink result,LLM result,SL recall,SL precision,SL f1,LLM recall,LLM precision,LLM f1
0,pointx_keymatrix_dly,What is the total number of all financial tran...,"SELECT month_id, SUM(ntx_pointx_financial) FRO...","['month_id', 'ntx_pointx_financial']","['mtd1_n_transfer_point_out', 'mtd1_n_purchase...","['month_id', 'ntx_pointx_financial']",1.0,0.4,0.58,1.0,1.0,1
1,pointx_keymatrix_dly,What is the total amount of points generated b...,SELECT SUM(amt_point_topup) FROM pointx_keymat...,"['month_id', 'amt_point_topup']","['mtd1_amt_point_topup', 'mtd1_n_topup_point',...","['month_id', 'amt_point_topup']",0.5,0.2,0.28,1.0,1.0,1
2,pointx_keymatrix_dly,What is the total amount of points generated b...,"SELECT month_id, SUM(amt_point_pay) FROM point...","['month_id', 'amt_point_pay']","['amt_point_pay', 'mtd1_amt_point_pay', 'mtd1_...","['month_id', 'amt_point_pay']",0.5,0.2,0.28,1.0,1.0,1
3,pointx_keymatrix_dly,What is the average rate of released points fo...,SELECT AVG(rate_point_per_baht_pay) FROM point...,['rate_point_per_baht_pay'],"['rate_baht_per_point_pay', 'rate_baht_per_poi...",['rate_point_per_baht_pay'],1.0,0.2,0.34,1.0,1.0,1
4,pointx_keymatrix_dly,Can you determine the average number of custom...,"SELECT month_id, AVG(ncust_visit) FROM pointx_...","['month_id', 'ncust_visit']","['ncust_visit', 'ncust_pointx_visit', 'mtd1_nc...","['month_id', 'ncust_visit']",0.5,0.2,0.28,1.0,1.0,1


In [78]:
for i, row in chech_df[['Actual result', 'Schemalink result', 'LLM result']].iterrows():
    actual_cols = ast.literal_eval(row['Actual result'])
    module_cols = ast.literal_eval(row['Schemalink result'])
    llm_cols = ast.literal_eval(row['LLM result'])
    
    module_recall, module_precision, module_f1 = f1_columns(actual_cols, module_cols)
    llm_recall, llm_precision, llm_f1 = f1_columns(actual_cols, llm_cols)

    chech_df.at[i, 'SL recall'] = module_recall
    chech_df.at[i, 'SL precision'] = module_precision
    chech_df.at[i, 'SL f1'] = module_f1
    chech_df.at[i, 'LLM recall'] = llm_recall
    chech_df.at[i, 'LLM precision'] = llm_precision
    chech_df.at[i, 'LLM f1'] = llm_f1

In [81]:
# chech_df.to_excel('EXP_LLM_schemalink.xlsx', index=False)