In [12]:
model_name = ['llama', 'gpt', 'deepseek'][1]

import getpass
import os
from langchain_ollama import ChatOllama
from langchain_openai import ChatOpenAI
from langchain_core.messages import HumanMessage, SystemMessage
from glob import glob
import json
from num2words import num2words
import pandas as pd
import tqdm

In [17]:
llama = ChatOllama(
        model="llama3.2",
        temperature=0,
        num_ctx=4096,
        num_predict=2048,
    )
model = None
if model_name == 'llama':
    model = llama
else:
    if not os.environ.get("OPENAI_API_KEY"):
        os.environ["OPENAI_API_KEY"] = getpass.getpass("Enter your OpenAI API key: ")
    model = ChatOpenAI(
        model="gpt-4o",
        temperature=0,
        max_tokens=None,
        timeout=None,
        max_retries=2,
        # api_key="...",  # if you prefer to pass api key in directly instaed of using env vars
        # base_url="...",
        # organization="...",
        # other params...
    )

In [2]:
files = glob('./selected_questions/*.jsonl')
questions = []
for path in files:
    question_type = path[path.rfind('/')+1:-6]
    with open(path, 'r') as file:
        for i in range(100):
            line = file.readline()
            question = json.loads(line)
            question['type'] = question_type
            questions.append(question)

In [4]:
# This sql gets the table schema:
"""
SELECT column_name, data_type
from Information_schema.Columns
where table_name = 'lakes';
"""

system_prompt1 = str(
"""Convert the provided user question to a SQL query.
The following are the tables that exist in the database:

Table 1: pois
contains the points of interest
Schema:
   column_name    |     description     
------------------+-------------------
 id               | unique identifier
 geometry         | geography type represents the shape and position on earth
 poi_name         | name of the poi
 wikidata         | unique identifier reference to wikidata
 wikipedia        | unique name reference to wikipedia page
 addr_state       | the state where the poi is located
 addr_city        | the city where the poi is located
 cuisine          | type of cuisine the associated with restaurant pois
 leisure          | type of leisure
 tourism          | type of tourism
 takeaway         | indicates if a resturant offers takeaway
 drive_through    | indicates if a resturant offers drive through
 museum           | type of museum
 healthcare       | type of healthcare service
 outdoor_seating  | indicates if an amenity has outdoor seating
 emergency        | indicates if a healthcare service provides emergency service
 restaurant       | attribute related to restaurants
 amenity          | type of amenity provided


Table 2: lakes
contains lakes, rivers, waterbodies, etc.
Schema:
   column_name    |     description     
------------------+-------------------
 id               | unique identifier
 geometry         | geography type represents the shape and position on earth
 lake_name        | name of the lake
 wikidata         | unique identifier reference to wikidata
 wikipedia        | unique name reference to wikipedia page
 addr_country     | the country where the lake is located
 addr_state       | the state where the lake is located
 addr_county      | the county where the lake is located
 addr_city        | the city where the lake is located
 addr_postcode    | postcode where the lake is located
 addr_street      | street where the lake is located
 addr_housenumber | house number specific to this lake
 waterway         | type of waterway
 water            | type of waterbody

Table 3: parks
contains parks, gardens, etc.
Schema:
   column_name    |     description     
------------------+-------------------
 id               | unique identifier
 geometry         | geography type represents the shape and position on earth
 park_name        | name of the park
 wikidata         | unique identifier reference to wikidata
 wikipedia        | unique name reference to wikipedia page
 leisure          | type of leisure
 park             | type of park
 tourism          | type of tourism

Table 4: roads
contains roads, walkways, etc.
Schema:
   column_name    |     description     
------------------+-------------------
 id               | unique identifier
 geometry         | geography type represents the shape and position on earth
 road_name        | name of the road
 wikidata         | unique identifier reference to wikidata
 wikipedia        | unique name reference to wikipedia page
 highway          | attribute associated with roads of type highway
 sidewalk         | attribute associated with roads of type sidewalk
 foot             | attribute associated with roads of type foot
 bicycle          | attribute associated with roads of type bycycle
 cycleway         | attribute associated with roads of type cycleway

Table 5: regions
contains adminstrative region boundaries, like cities and states, etc.
Schema:
  column_name  |     data_type     
---------------+-------------------
 id            | integer
 geometry         | geography type represents the shape and position on earth
 region_name   | name of the region
 border_type   | the type of border
 wikidata      | unique identifier reference to wikidata
 wikipedia     | unique name reference to wikipedia page

When creating the sql queries make sure of the following:
- they match the table names provided
- the columns must exist in the provided table schema
- the queries are compatible with PostGIS.
- make sure to cast the geography columns to geometry if needed
- do not include the geometry column or location in the output
- string matching must be flexible, use ILIKE and the '%' sign.
- do not include any description or text, just the sql query.
- the query starts with ```sql and ends with ```
Note all tables are based on OpenStreetMap data.
"""
) # TODO: possibly add instruction to limit the outputs records

system_prompt2 = str("Answer the provided user question while satisfying the following requirements:\n"
                    "1. do not include any parts of the question in the answer you must provide the answer directly.\n"
                    "2. provide only the property the user is asking for like name of an entity, its location, distance, direction.\n"
                    "3. don't provide information the user didn't ask for.\n"
                    "4. any number must be written as words and rounded to the nearest ten.\n"
                    "5. only use metric units.\n\n"
                    "The following records might be relavant to answering this question:\n")


system_prompt3 = str("Given a question and a text answer, parse the text answer to json format."
                     " The location must be provided as a complete address,"
                     " any measurment must be in metric units,"
                     " and directions must be converted to azimuth angle in degress."
                     " Try to match the following schema:"
                     """
                        {
                            "name" string
                            "address": string,
                            "count": integer,
                            "distance": integer,
                            "length": integer,
                            "area": integer,
                            "azimuth_angle": integer,
                            %OTHER_ATT%
                        }
                    If a value is missing don't include it in the output, and don't write any comments.
                    All json blocks must be enclosed with ```json and ```
                     """)

# You can use the following examples as a guide:
# Example 1: What is the largest park in Tuscon, Arizona?
# ```sql
# SELECT *, ST_Area(parks.geometry::geography) AS computed_area FROM parks\nWHERE leisure = 'park'\nAND ST_Intersects(parks.geometry::geography, (SELECT geometry FROM regions WHERE wikipedia = `en:Tucson, Arizona` LIMIT 1)::geography) ORDER BY computed_area DESC LIMIT 1;
# ```
# Example 2: 
# What is the total area of all gardens in Riverside, California?
# ```sql
# SELECT SUM(ST_Area(parks.geometry::geography)) AS area FROM parks\nWHERE leisure = 'garden'\nAND ST_Intersects(parks.geometry::geography, (SELECT geometry FROM regions WHERE wikipedia = `en:Riverside, California` LIMIT 1)::geography)
# ```
# """
#)



In [34]:
import signal
def safe_invoke(messages, timeout=30):
    import signal

    def handler(signum, frame):
        raise Exception("Request timed out")

    signal.signal(signal.SIGALRM, handler)
    signal.alarm(timeout)

    try:
        result = model.invoke(messages)
        signal.alarm(0)  # Disable alarm
        return result
    except Exception:
        EmptyClass = type('EmptyClass', (), {'content': ''})
        # print("The request timed out.")
        return EmptyClass()


In [35]:
with open('./text2sql_output_%s.json' % model_name, 'r') as file:
    _text2sql_answers = {a['id']: a for a in json.loads(file.read())}


In [36]:
progress = tqdm.tqdm(questions)

text2sql_answers = []
for q in progress:
    # if q['id'] in processed_q:
    #     continue
    if q['id'] not in _text2sql_answers:
        messages = [
                SystemMessage(content=system_prompt1),
                HumanMessage(content=q['question'])
            ]
        text2sql_answers.append({'content': safe_invoke(messages).content, 'id': q['id']})
    else:
        text2sql_answers.append(_text2sql_answers[q['id']])


100%|██████████| 2800/2800 [00:00<00:00, 1867989.69it/s]


In [37]:
with open('./text2sql_output_%s.json' % model_name, 'w') as file:
    file.write(json.dumps(text2sql_answers, indent=2))
with open('./text2sql_output_%s.json' % model_name, 'r') as file:
    text2sql_answers = json.loads(file.read())

In [5]:
import re

def flatten_if_nested(array):
    # Check if the input is a list and contains nested lists
    if isinstance(array, list) and any(isinstance(item, list) for item in array):
        flattened = []
        for item in array:
            if isinstance(item, list):
                flattened.extend(flatten_if_nested(item))
            else:
                flattened.append(item)
        return flattened
    else:
        return array  # Return the input as-is if it's not a list or doesn't contain nested lists

def extract_json_blocks(text, i):
    # Regular expression pattern to match JSON blocks
    pattern = r'```[\s]*json(.*?)```'
    pattern1 = r'\b\d+(?:_\d+)*\b'
    pattern2 = r'\b\d+(?:,\d+)*\b'
    pattern3 = r'//.*?\n'
    pattern4 = r',\s*}'
    pattern5 = r'}\s*{'
    # Find all JSON blocks
    matches = re.findall(pattern, text, re.DOTALL)
    
    # Parse each match to ensure valid JSON
    json_blocks = []
    for match in matches:
        try:
            # Remove any leading/trailing whitespace and parse as JSON
            s = match.strip()
            s = re.sub(pattern1, lambda x: x.group().replace('_', ''), s)
            s = re.sub(pattern2, lambda x: x.group().replace(',', ''), s)
            s = re.sub(pattern3, '', s)
            s = re.sub(pattern4, '}', s)
            s = s.replace('''\\\\\'''',  '''\'''').replace('''\\\'''', '''\'''').replace('''\\&''', '''&''')
            if re.search(pattern5, s):
                s = re.sub(pattern5, '},\n{', s)
                s = '[\n%s\n]' % s
            convert_area = False
            if 'acres' in s:
                convert_area = True
                s = s.replace(' acres,', ',')
            if 'json' in s:
                s = s.replace('json', '')
            # print(s)
            json_data = json.loads(s)
            if convert_area and 'area' in json_data:
                json_data['area'] = json_data['area'] * 4046.8564224
            json_blocks.append(json_data)
        except json.JSONDecodeError as w:
            print(w)
            # If parsing fails, print an error message (can log or handle as needed)
            print(i)
            print(s)
            print("Warning: Found an invalid JSON block.") 
    return flatten_if_nested(json_blocks)

def extract_sql_blocks(text):
    # Regular expression pattern to match SQL blocks
    pattern = r'```[\s]*sql(.*?)```'
    # Find all SQL blocks
    matches = re.findall(pattern, text, re.DOTALL)
    
    sql_blocks = []
    for match in matches:
        sql_blocks.append(match)
    # if not len(sql_blocks):
    #     pattern = r'```(.*?)```'
    #     matches = re.findall(pattern, text, re.DOTALL)
    #     for match in matches:
    #         # possibly some processing here
    #         sql_blocks.append(match)
    return sql_blocks



In [39]:
import psycopg
from psycopg.rows import dict_row

def run_sql(sql, conn, timeout):
    cur = conn.cursor()
    cur.execute("SET statement_timeout = %d" % timeout)
    try:
        cur.execute(sql)
    except Exception as e:
        conn.rollback()
        return {'output': [], 'error': str(e)}
    records = cur.fetchmany(size=100)
    cur.close()
    return {'output': [{k: row[k] for k in row if row[k] is not None} for row in records], 'error': ''}


In [40]:
sql_time = pd.read_csv('./sql_answers_time.csv').groupby('type')['time'].max().to_dict()
sql_time = {k: max(int(sql_time[k])*2, 10) for k in sql_time}

In [12]:
with open('./sql_outputs_%s.json' % model_name, 'r') as file:
    _sql_output = {a['id']: a for a in json.loads(file.read())}

In [None]:
conn = psycopg.connect(
    host = 'localhost',
    dbname = 'osm_ca',
    user = 'postgres',
    password = 'postgres',
    port = 5432,
    row_factory=dict_row,
    # options="-c statement_timeout=180000"
)


sql_output = []
progress = tqdm.tqdm(range(len(text2sql_answers)))
for i in progress:
    q = questions[i]
    if q['id'] not in _sql_output:
        a = text2sql_answers[i]
        if a['id'] != q['id']:
            for j in range(len(text2sql_answers)):
                if text2sql_answers[j]['id'] == q['id']:
                    a = text2sql_answers[j]
                    break
        sql_blocks = extract_sql_blocks(a['content'])
        records = []
        for sql in sql_blocks:
            records.append(run_sql(sql, conn, sql_time[q['type']]))
        sql_output.append({'id': q['id'], 'records': records})
    else:
        sql_output.append(_sql_output[q['id']])
conn.close()

100%|██████████| 2800/2800 [00:00<00:00, 1029096.67it/s]


In [14]:
with open('./sql_outputs_%s.json' % model_name, 'w') as file:
    file.write(json.dumps(sql_output, indent=2))
with open('./sql_outputs_%s.json' % model_name, 'r') as file:
    sql_output = json.loads(file.read())

In [5]:
with open('./text2sql_answers_%s.json' % model_name, 'r') as file:
    _answers = {a['id']: a for a in json.loads(file.read())}

In [7]:
answers = []
progress = tqdm.tqdm(range(len(questions)))
for i in progress:
    q = questions[i]
    if q['id'] not in _answers:
        _sql_output = sql_output[i]
        if q['id'] != _sql_output['id']:
            for j in range(len(sql_output)):
                _sql_output = sql_output[j]
                if q['id'] == _sql_output['id']:
                    break
        sql_context = []
        for c in _sql_output['records']:
            sql_context += [str(r) for r in c['output']]
        if len(sql_context) == 0:
            sql_context = []
        #elif len(sql_context) > 20:
        sql_context = sql_context[:min(len(sql_context), 20)]
        messages = [
                SystemMessage(content= system_prompt2 + '\n'.join(sql_context)),
                HumanMessage(content=q['question'])
            ]
        answers.append({'id':q['id'], 'content': model.invoke(messages).content})
    else:
        answers.append(_answers[q['id']])


100%|██████████| 2800/2800 [00:00<00:00, 1096857.31it/s]


In [27]:
# with open('./text2sql_answers_%s.json' % model_name, 'w') as file:
#     file.write(json.dumps(answers, indent=2))
with open('./text2sql_answers_%s.json' % model_name, 'r') as file:
    answers = json.loads(file.read())

In [8]:
with open('./text2sql_json_answers_%s.json' % model_name, 'r') as file:
        _json_answers = {a['id']: a for a in json.loads(file.read())}

In [9]:
json_answers = []
for i in range(len(questions)):
    q = questions[i]
    if q['id'] not in _json_answers:
        if 'multihop1' in q['type']:
            sys_prompt= system_prompt3.replace('%OTHER_ATT%', '"%s": string' % q['answers'][0]['multihop_attribute'])
        else:
            sys_prompt= system_prompt3.replace('%OTHER_ATT%', '')
        a = answers[i]
        if a['id'] != q['id']:
            for j in range(len(answers)):
                if answers[j]['id'] == q['id']:
                    a = answers[j]
                    break
        messages = [
                SystemMessage(content=sys_prompt),
                HumanMessage(content="Question: %s\nAnswer: %s" % (q['question'], a['content']))
            ]
        json_answers.append({'id':q['id'], 'content': llama.invoke(messages).content})
    else:
        json_answers.append(_json_answers[q['id']])

In [13]:
# with open('./text2sql_json_answers_%s.json' % model_name, 'w') as file:
#         file.write(json.dumps(json_answers, indent=2))
with open('./text2sql_json_answers_%s.json' % model_name, 'r') as file:
        json_answers = json.loads(file.read())

In [14]:
parsed_answers = []
for i in range(len(json_answers)):
    q = questions[i]
    a = json_answers[i]
    if a['id'] != q['id']:
        for j in range(len(json_answers)):
            if json_answers[j]['id'] == q['id']:
                a = json_answers[j]
                break
    parsed_answers.append(extract_json_blocks(a['content'], i))

In [46]:
parsed_answers[0]

[{'name': 'Mediterranean restaurant',
  'address': 'Bowling Green, Kentucky',
  'distance': None,
  'length': None,
  'area': None,
  'azimuth_angle': None}]

In [7]:
from geopy.geocoders import Nominatim
from pyproj import Geod

geod = Geod(ellps='WGS84')
geocoder = Nominatim(user_agent="Geocoder")

In [8]:
import importlib
import evaluate
importlib.reload(evaluate)
import numpy as np

[nltk_data] Downloading package punkt to /Users/majid/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/majid/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package punkt to /Users/majid/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to /Users/majid/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [40]:

text_evaluation = []

# evaluate text answers

for i in range(len(questions)):
    q = questions[i]
    a = answers[i]
    if a['id'] != q['id']:
        for j in range(len(answers)):
            if answers[j]['id'] == q['id']:
                a = answers[j]
                break
    text_answer = a['content']
    key = ''
    if 'multihop1' in q['type']:
        key = 'multihop_long_answer'
    elif 'name' in q['type']:        
        key = 'name'
    elif 'loc' in q['type']:
        key = 'address'
    elif 'angle' in q['type']:
        key = 'angle_description'
    elif 'area' in q['type']:
        key = 'area'
    elif 'length' in q['type']:
        key = 'length'
    elif 'count' in q['type']:
        key = 'count'
    elif 'distance' in q['type']:
        key = 'distance'
    true_answer = []
    for a in q['answers']:
        v = evaluate.get_osm_value(a, key)
        if v == None:
            continue
        if key in ['area', 'length', 'count', 'distance']:
            v = num2words(v)
        if 'area' == key:
            v += ' meters squared'
        elif key in ['length', 'distance']:
            v += ' meters'
        true_answer.append(v)
    if len(text_answer):
        true_answer = '\n'.join(true_answer)
        P, R, F1 = evaluate.evaluate_entity_names(text_answer, true_answer)
        text_evaluation.append({'attempted': True, 'P': P, 'R': R, 'F1': F1})
    else:
        text_evaluation.append({'attempted': False, 'P': 0, 'R': 0, 'F1': 0})
    

In [31]:
df = pd.DataFrame(text_evaluation)
df['type'] = [q['type'] for q in questions]
df['id'] = [q['id'] for q in questions]
df.to_csv(f'./{model_name}_text2sql_text_eval.csv', index=False)

In [13]:
import tqdm

In [9]:
def get_recursive(data, search_key):
    outputs = []
    if isinstance(data, dict):
        for key, value in data.items():
            if key == search_key and isinstance(value, str):  # Check for key and if the value is a string
                outputs.append(value)
            else:
                outputs.extend(get_recursive(value, search_key))  # Recurse for nested structures
    elif isinstance(data, list):
        for item in data:
            outputs.extend(get_recursive(item, search_key))  # Recurse for list elements
    return outputs


In [15]:
# evaluate parsed_answers
parsed_evaluation = []
progress = tqdm.tqdm(range(len(questions)))
def imporved_f1(new_f1, scores):
    return ('F1' not in scores) or (new_f1 > scores['F1'])

for i in progress:
    q = questions[i]
    parsed_answer = parsed_answers[i]
    scores = {'attempted': False}
    if 'multihop1' in q['type']:
        for a in q['answers']:
            v = evaluate.get_osm_value(a, 'multihop_answer')
            if v == None:
                continue
            for p in parsed_answer:
                pred_answer = p.get(a['multihop_attribute'], None)
                if pred_answer == None or len(pred_answer) == 0:
                    continue
                P, R, F1 = evaluate.evaluate_entity_names(pred_answer, v)
                if imporved_f1(F1, scores):
                    scores = {'attempted': True, 'P': P, 'R': R, 'F1': F1}
    elif 'name' in q['type']:
        for a in q['answers']:
            v = evaluate.get_osm_value(a, 'name')
            if v == None:
                continue
            for p in parsed_answer:
                # pred_answer = p.get('name', None)
                pred_answer = get_recursive(p, 'name')
                if pred_answer == None or len(pred_answer) == 0:
                    continue
                pred_answer = pred_answer[0]
                P, R, F1 = evaluate.evaluate_entity_names(pred_answer, v)
                if imporved_f1(F1, scores):
                    scores = { 'attempted': True, 'P': P, 'R': R, 'F1': F1}
    elif 'loc' in q['type']:
        for a in q['answers']:
            v = evaluate.get_osm_value(a, 'address')
            loc = evaluate.get_osm_value(a, 'location')
            if v == None:
                continue
            for p in parsed_answer:
                # pred_answer = p.get('address', None)
                pred_answer = get_recursive(p, 'address')
                if pred_answer == None or len(pred_answer) == 0:
                    continue
                pred_answer = pred_answer[0]
                # if type(pred_answer) == type([]):
                #     pred_answer = ', '.join(pred_answer)
                P, R, F1 = evaluate.evaluate_entity_names(pred_answer, v)
                if imporved_f1(F1, scores):
                    scores.update({'attempted': True,'P': P, 'R': R, 'F1': F1})
                pred_loc = evaluate.get_location_by_address(geocoder, pred_answer)
                if pred_loc == None:
                    continue
                distance_error = evaluate.evaluate_location(geod, [pred_loc], [loc])[0]
                if distance_error > 5*10**5:
                    distance_error = 1.0 #float('inf')
                else:
                    distance_error /= 5*10**5
                if distance_error < scores.get('distance_error', float('inf')):
                    scores['distance_error'] = distance_error
    elif 'angle' in q['type']:
        for a in q['answers']:
            angle = evaluate.get_osm_value(a, 'angle')
            angle_desc = evaluate.get_osm_value(a, 'angle_description')
            if angle == None:
                continue
            for p in parsed_answer:
                pred_angle = p.get('azimuth_angle', None)
                try:
                    pred_angle = int(pred_angle)
                except:
                    continue
                pred_answer = evaluate.get_angle_desc(pred_angle)
                if pred_answer == None or len(pred_answer) == 0:
                    continue
                P, R, F1 = evaluate.evaluate_entity_names(pred_answer, angle_desc)
                if imporved_f1(F1, scores):
                    scores.update({'attempted': True,'P': P, 'R': R, 'F1': F1})
                angle_error = evaluate.evaluate_angle([pred_angle], [angle])[0]
                if angle_error < scores.get('angle_error', float('inf')):
                    scores['angle_error'] = angle_error
    elif 'area' in q['type']:
        for a in q['answers']:
            v = evaluate.get_osm_value(a, 'area')
            if v == None:
                continue
            for p in parsed_answer:
                pred_v = p.get('area', None)
                if pred_v == None:
                    continue
                try:
                    pred_v = int(pred_v)
                except:
                    continue
                relative_error = evaluate.evaluate_measurement(pred_v, v)
                if relative_error < scores.get('relative_error', float('inf')):
                    scores['relative_error'] = relative_error
                    scores['attempted'] = True
    elif 'length' in q['type']:
        for a in q['answers']:
            v = evaluate.get_osm_value(a, 'length')
            if v == None:
                continue
            for p in parsed_answer:
                pred_v = p.get('length', None)
                if pred_v == None:
                    continue
                try:
                    pred_v = int(pred_v)
                except:
                    continue
                relative_error = evaluate.evaluate_measurement(pred_v, v)
                if relative_error < scores.get('relative_error', float('inf')):
                    scores['relative_error'] = relative_error
                    scores['attempted'] = True
    elif 'count' in q['type']:
        for a in q['answers']:
            v = evaluate.get_osm_value(a, 'count')
            if v == None:
                continue
            for p in parsed_answer:
                pred_v = p.get('count', None)
                if pred_v == None:
                    continue
                try:
                    pred_v = int(pred_v)
                except:
                    continue
                relative_error = evaluate.evaluate_measurement(pred_v, v)
                if relative_error < scores.get('relative_error', float('inf')):
                    scores['relative_error'] = relative_error
                    scores['attempted'] = True
    elif 'distance' in q['type']:
        for a in q['answers']:
            v = evaluate.get_osm_value(a, 'distance')
            if v == None:
                continue
            for p in parsed_answer:
                pred_v = p.get('distance', None)
                if pred_v == None:
                    continue
                try:
                    pred_v = int(pred_v)
                except:
                    continue
                relative_error = evaluate.evaluate_measurement(pred_v, v)
                if relative_error < scores.get('relative_error', float('inf')):
                    scores['relative_error'] = relative_error
                    scores['attempted'] = True
    parsed_evaluation.append(scores)

100%|██████████| 2800/2800 [00:02<00:00, 1094.90it/s]


In [16]:
df = pd.DataFrame(parsed_evaluation)
df['type'] = [q['type'] for q in questions]
df['id'] = [q['id'] for q in questions]
df.loc[df['P'].isna(), 'P'] = 0
df.loc[df['R'].isna(), 'R'] = 0
df.loc[df['F1'].isna(), 'F1'] = 0
df.loc[df['distance_error'].isna(), 'distance_error'] = 1.0
df.loc[df['angle_error'].isna(), 'angle_error'] = 1.0
df.loc[df['relative_error'].isna(), 'relative_error'] = 1.0

df.to_csv(f'./{model_name}_text2sql_parsed_eval.csv', index=False)