## Working the CSVs and Excel sheets

### Train dataset

In [1]:
# Exploring the 'training' dataset
import pandas as pd
df = pd.read_csv("datasets/txt2sql_alerce_train_v4_0.csv")
df.head()

Unnamed: 0,req_id,request,table_info,external_knowledge,domain_knowledge,gold_query,difficulty,type,nested_type,rephrased_request,rephrased_request_gpt-3.5-turbo-0125_t0.4,rephrased_request_gpt-4o-2024-05-13_t0.2
0,13,Give me all the SNe that were first detected b...,"['object', 'probability']",\n-- mjd date for December = 59914.0\n-- mjd d...,\n-- Super Nova (SNe) is a large explosion tha...,"\nSELECT\n object.oid, probability.class_na...",simple,object,none,,,
1,10,Get the object identifiers and probabilities i...,"['probability', 'object']",0,0,"\nSELECT\n sq1.oid, sq1.probability as SN_pro...",medium,object,simple,,,
2,15,"Get the object identifiers, probabilities in t...","['object', 'probability', 'detection', 'magstat']",-- mjd date for September 01 = 60188.0\n-- mjd...,\n-- A fast riser is defined as an object whos...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",advanced,other,multi,,,
3,4,"Get the object identifier, candidate identifie...","['object', 'probability', 'magstat', 'detection']",0,0,"\nSELECT\n sq.oid, sq.fid, sq.dmdt_first,\n ...",advanced,other,multi,,,
4,25,Query objects within 10 degress of the next po...,"['probability', 'object']",-- mjd date for February 01 = 59976.0\n-- mjd ...,0,"\nWITH catalog ( source_id, ra, dec) AS (\n ...",advanced,spatial,simple,,,


In [2]:
df[df["req_id"] == 15]["request"].item()

"Get the object identifiers, probabilities in the stamp classifier and light curves (only detections) for objects whose highest probability in the stamp classifier is obtained for class SN, that had their first detection in the first 2 days of september, and that qualify as fast risers. Also, return the filter ID and candid of the detections, and filter ID of magstat as 'magstat_fid', with the initial rise estimate. Order the results by the oid."

In [3]:
print("External knowledge for the train dataset")
vc_df = df["external_knowledge"].value_counts().reset_index()
vc_df.columns = ["Value", "Count"]

vc_df

External knowledge for the train dataset


Unnamed: 0,Value,Count
0,0,43
1,\n-- mjd date for December = 59914.0\n-- mjd d...,1
2,-- mjd date for September 01 = 60188.0\n-- mjd...,1
3,-- mjd date for February 01 = 59976.0\n-- mjd ...,1
4,-- mjd date for the start of January = 59945.0...,1
5,"\n-- last june in mjd date: [start=60096.0, en...",1
6,\n-- it is important to return all the probabi...,1
7,-- consider the list of objects identifiers: [...,1
8,\n-- mjd date for the start of September = 601...,1
9,-- mjd date for the start of July = 60126.0\n-...,1


In [4]:
print("Domain knowledge for the train dataset")
vc_df_domain = df["domain_knowledge"].value_counts().reset_index()
vc_df_domain.columns = ["Value", "Count"]

vc_df_domain

Domain knowledge for the train dataset


Unnamed: 0,Value,Count
0,0,54
1,\n-- Super Nova (SNe) is a large explosion tha...,1
2,\n-- A fast riser is defined as an object whos...,1
3,-- 1000 arcmin = 16.16667 degrees\n,1
4,\n-- The galactic coordinate system locates ob...,1


### Test dataset

In [5]:
df_test = pd.read_csv("datasets/txt2sql_alerce_test_v4_0.csv")
df_test.head()

Unnamed: 0,req_id,request,table_info,external_knowledge,domain_knowledge,gold_query,difficulty,type,nested_type,rephrased_request,rephrased_request_gpt-3.5-turbo-0125_t0.4,rephrased_request_gpt-4o-2024-05-13_t0.2
0,8,Query all objects that were first classified a...,"['probability', 'object']",\n-- mjd date for august 17 = 60173.0\n-- mjd ...,0,\nSELECT\n *\nFROM\n object\nINNER JOIN\...,simple,object,none,,,
1,27,"Return the oids, meanra, meandec, ndet, firstm...","['probability', 'object']",0,0,"\nSELECT\n object.oid, object.meanra, objec...",simple,object,simple,,,
2,17,Return the objects classified as AGN that show...,"['probability', 'object', 'detection']",0,0,\nSELECT\n *\nFROM\n (\nSELECT *\nFROM (\nSE...,advanced,object,tree,,,
3,37,"""Query the top 200 objects classified SNIa acc...",['probability'],0,0,\nSELECT\n *\nFROM\n probability\nWHERE\n ...,simple,object,none,,,
4,34,Given this list of oids ['ZTF17aaadpsi' 'ZTF19...,['feature'],0,0,\nSELECT\n *\nFROM\n feature\nWHERE\n ...,simple,other,none,,,


In [6]:
print("External knowledge for the test dataset")
vc_df_test = df_test["external_knowledge"].value_counts().reset_index()
vc_df_test.columns = ["Value", "Count"]

vc_df_test

External knowledge for the test dataset


Unnamed: 0,Value,Count
0,0,44
1,Note that 'fainter' means 'larger' for magnitudes,2
2,\n-- mjd date for august 17 = 60173.0\n-- mjd ...,1
3,\n-- mjd date for September 01 = 60188.0\n-- m...,1
4,\n-- mjd date for March 1st 2021 = 59274.0\n--...,1
5,\n-- mjd date for June 1st 2018 = 58239.0\n-- ...,1
6,-- mjd date for February 13 = 59988.0\n-- mjd ...,1
7,"Note that filters W1 to W4 means returning W1,...",1


In [7]:
print("Domain knowledge for the train dataset")
vc_df_domain_test = df_test["domain_knowledge"].value_counts().reset_index()
vc_df_domain_test.columns = ["Value", "Count"]

vc_df_domain_test

Domain knowledge for the train dataset


Unnamed: 0,Value,Count
0,0,48
1,\n-- A fast riser is defined as an object whos...,1
2,"-- Asteroids move, so it is necessary to choos...",1
3,\n-- SNe refers to all types of SuperNova\n,1


### Additional info

In [8]:
# Reading the excel file
excel = pd.read_excel("datasets/SQLusecases_alerce.xlsx", 
                      sheet_name="examples_alerce_usecasesV3_1")

# Cleaning the excel a bit
excel = excel.drop(columns=["Unnamed: 0.1", "Unnamed: 0"])
excel.head()

Unnamed: 0,req_id,request,table_info,external_knowledge,domain_knowledge,gold_query,difficulty,type,nested_type,Set,python_format
0,0,Get objects that are likely to be YSOs (possib...,"['probability', 'feature']",\n-- feature.name can be 'Multiband_period'\n-...,\n-- Multiband_period: Period obtained using t...,"\nSELECT\n oid, probability, value, name, fid...",advanced,object,tree,Train,"sub_query_1='''\nSELECT\n feature.oid, prob_o..."
1,1,Get all the objects classified as AGN with a p...,"['object', 'probability', 'feature', 'magstat']",\n-- object.ndet represents the number of dete...,\n-- Amplitude: Half of the difference between...,"\nSELECT\n sq.oid, sq.value, sq.name, sq.fid ...",advanced,object,tree,Test,\nsub_query_object='''\nSELECT\n object.oid...
2,2,Give me the objects classified as YSO by their...,['probability'],,,"\nSELECT\n oid, probability\nFROM\n prob...",simple,object,none,Train,"query='''\nSELECT\n oid, probability\nFROM\..."
3,3,Give me the objects classified as YSO by the l...,"['object', 'probability']","\n-- last june in mjd date: [start=60096.0, en...",,\nSELECT\n *\nFROM\n probability\nWHERE\...,simple,object,simple,Train,query=f'''\nSELECT\n *\nFROM\n probabili...
4,4,"Get the object identifier, candidate identifie...","['object', 'probability', 'magstat', 'detection']",\n-- mjd date for the start of the year 2019 =...,,"\nSELECT\n sq.oid, sq.fid, sq.dmdt_first,\n ...",advanced,other,multi,Train,# objects classified as SN II with probability...


## Tests

### Gold values and query to test

In [9]:
# Select a query
query = "Give me all the SNe that were first detected between december first 2022 and september first 2023. Return the probability class, the last and the first detection date and the oids of the objects."

# Check if the query is in the excel file
if query in excel["request"].to_list():
    # Obtain the gold SQL query and Python query
    sql_gold = excel[excel["request"] == query]["gold_query"].item()
    python_gold = excel[excel["request"] == query]["python_format"].item()

    # Obtain the necessary tables
    gold_tables = excel[excel["request"] == query]["table_info"].item()

    # Print all in orderly fashion
    print("Gold values\n")
    print("Tables needed for the query:")
    print(gold_tables + "\n")
    print("SQL gold query:")
    print(sql_gold)
    print("Python gold query:\n")
    print(python_gold)
    
else:
    # Obtain the gold SQL query
    sql_gold = df[df["request"] == query]["gold_query"].item()

    # Obtain the necessary tables
    gold_tables = df[df["request"] == query]["table_info"].item()

    # Print all in orderly fashion
    print("Gold values\n")
    print("Tables needed for the query:")
    print(gold_tables + "\n")
    print("SQL gold query:")
    print(sql_gold)

Gold values

Tables needed for the query:
['object', 'probability']

SQL gold query:

SELECT
    object.oid, probability.class_name, object.lastmjd, object.firstmjd
FROM
    object INNER JOIN
    probability
    ON object.oid = probability.oid
WHERE
    probability.classifier_name='lc_classifier'
    AND probability.class_name IN ('SNIa', 'SNIbc', 'SNII', 'SLSN')
    AND probability.ranking = 1
    AND object.firstmjd < 60217.0
    AND object.firstmjd > 59914.0



In [10]:
# Running the gold query
from secret.config import SQL_URL
import requests
import sqlalchemy as sa

# Setup params for query engine
params = requests.get(SQL_URL).json()['params']
engine = sa.create_engine(f"postgresql+psycopg2://{params['user']}:{params['password']}@{params['host']}/{params['dbname']}")
engine.begin()

resultGold = pd.read_sql_query(sql_gold, con=engine)
resultGold

Unnamed: 0,oid,class_name,lastmjd,firstmjd
0,ZTF23aaocetr,SNIa,60133.210428,60109.208229
1,ZTF23aamzkrs,SNIa,60124.189074,60097.242882
2,ZTF23aamxbpd,SNIa,60120.218900,60100.190417
3,ZTF23aafwzmi,SNIa,60078.406505,60050.470231
4,ZTF23aackdba,SNII,60073.174653,59992.212407
...,...,...,...,...
4542,ZTF23aagaoax,SNII,60761.252708,60049.210880
4543,ZTF23aaefijg,SNII,60786.461586,60036.459398
4544,ZTF20aanlftl,SNII,60786.408137,59977.563657
4545,ZTF23aaavavg,SNIa,60770.256586,59967.353044


### Trying out the pipelines

#### Singular

In [10]:
from pipeline.queryPipeline import queryPipeline

# Working query
query = "Get the object identifiers, probabilities in the stamp classifier and light curves (only detections) for objects whose highest probability in the stamp classifier is obtained for class SN, that had their first detection in the first 2 days of september, and that qualify as fast risers. Also, return the filter ID and candid of the detections, and filter ID of magstat as 'magstat_fid', with the initial rise estimate. Order the results by the oid."

# Model to use
#model = "claude-3-5-sonnet-20240620"
model = "gpt-4o"

# Format for the pipeline
lang_type = "sql"
max_tokens = 10000

In [None]:
from pipeline.process import *
from prompts.base.prompts import *

# Prompt dictionary guideline and used by Jorge
prompts = {
    "Schema Linking": {
        "base_prompt": tables_linking_prompt_V2,
        "context1": schema_all_cntxV1,
        "context2": schema_all_cntxV2_indx,
        "context3": schema_all_cntxV2,
    },
    "Classify": {
        "base_prompt": diff_class_prompt_v7,
        "final_instructions": final_instructions_diff_v2
    },
    "Decomposition": {
        "simple": {
            "query_task": simple_query_task_v2,
            "query_context": simple_query_cntx,
            "external_knowledge": "placeholder",
            "domain_knowledge": "placeholder",
            "query_instructions": simple_query_instructions_v2
        },
        "medium": {
            "decomp_plan": {
                "base_prompt": medium_decomp_prompt,
                "decomp_task": medium_decomp_task_v3 + gpt4turbo1106_decomposed_prompt_2,
                "query_context": medium_query_cntx,
                "query_instructions": medium_query_instructions_1_v2
            },
            "decomp_gen": {
                "sql": {
                    "base_prompt": medium_decomp_gen,
                    "query_task": medium_query_task_v2,
                    "query_instructions": medium_query_instructions_2_v2,
                },
                "python": {
                    "base_prompt": "placeholder",
                    "query_task": "placeholder",
                    "query_instructions": "placeholder",
                }
            }
        },
        "advanced": {
            "decomp_plan": {
                "base_prompt": adv_decomp_prompt,
                "decomp_task": adv_decomp_task_v3 + gpt4turbo1106_decomposed_prompt_2,
                "query_context": adv_query_cntx,
                "query_instructions": adv_query_instructions_1_v3
            },
            "decomp_gen": {
                "sql": {
                    "base_prompt": adv_decomp_gen,
                    "query_task": adv_query_task_v2,
                    "query_instructions": adv_query_instructions_2_v3,
                },
                "python": {
                    "base_prompt": "placeholder",
                    "query_task": "placeholder",
                    "query_instructions": "placeholder",
                }
            }
        }
    },
    "Direct": {
        "base_prompt": {
            "general_task": general_taskv18,
            "general_context": general_contextv15,
            "final_instructions": final_instructions_v19
        },
        "request_prompt": {
            "external_knowledge": "placeholder",
            "domain_knowledge": "placeholder"
        }
    }
}

# Updating ext_kn and dom_kn
prompts["Decomposition"]["simple"]["external_knowledge"] = df[df["request"] == query]["external_knowledge"].item()
prompts["Decomposition"]["simple"]["domain_knowledge"] = df[df["request"] == query]["domain_knowledge"].item()
prompts["Direct"]["request_prompt"]["external_knowledge"] = df[df["request"] == query]["external_knowledge"].item()
prompts["Direct"]["request_prompt"]["domain_knowledge"] = df[df["request"] == query]["domain_knowledge"].item()

In [None]:
# Pipeline object
pipe = queryPipeline(query, model, lang_type, max_tokens, prompts)

# Schema Linking
schema_usage = pipe.schema_linking()

# Classification
class_usage = pipe.classify()

# Decomposition
pipe.decomposition()

# Direct prompt
pipe.direct()

# Generating the queries
decomp_gen_query = pipe.query_generation()
print("Query generated with decomposition prompts")
print(decomp_gen_query)
print("-"*50)
direct_gen_query = pipe.query_generation()
print("Query generated with direct prompts")
print(direct_gen_query)

Query generated with decomposition prompts

-- Subquery to get object identifiers (oids) that qualify as fast risers
WITH fast_risers AS (
    SELECT DISTINCT oid
    FROM magstat
    WHERE dmdt_first <= -0.25
),

-- Subquery to get object identifiers (oids) with first detection in the first 2 days of September
first_september_detections AS (
    SELECT oid
    FROM object
    WHERE firstmjd BETWEEN 60188.0 AND 60189.0
),

-- Subquery to get object identifiers (oids) with highest probability in the stamp classifier for class SN
high_prob_sn AS (
    SELECT oid
    FROM probability
    WHERE classifier_name = 'stamp_classifier'
    AND class_name = 'SN'
    AND ranking = 1
)

-- Main query to get the required information
SELECT 
    d.oid,
    p.probability,
    d.fid,
    d.candid,
    m.fid AS magstat_fid,
    m.dmdt_first
FROM detection d
JOIN fast_risers fr ON d.oid = fr.oid
JOIN first_september_detections fsd ON d.oid = fsd.oid
JOIN high_prob_sn hps ON d.oid = hps.oid
JOIN probabil

#### Multiple

##### Query generation

In [None]:
from pipeline.queryPipeline import queryPipeline
from pipeline.process import *
from prompts.base.prompts import *

# Prompt dictionary guideline and used by Jorge
prompts = {
    "Schema Linking": {
        "base_prompt": tables_linking_prompt_V2,
        "context1": schema_all_cntxV1,
        "context2": schema_all_cntxV2_indx,
        "context3": schema_all_cntxV2,
    },
    "Classify": {
        "base_prompt": diff_class_prompt_v7,
        "final_instructions": final_instructions_diff_v2
    },
    "Decomposition": {
        "simple": {
            "query_task": simple_query_task_v2,
            "query_context": simple_query_cntx,
            "external_knowledge": "placeholder",
            "domain_knowledge": "placeholder",
            "query_instructions": simple_query_instructions_v2
        },
        "medium": {
            "decomp_plan": {
                "base_prompt": medium_decomp_prompt,
                "decomp_task": medium_decomp_task_v3 + gpt4turbo1106_decomposed_prompt_2,
                "query_context": medium_query_cntx,
                "query_instructions": medium_query_instructions_1_v2
            },
            "decomp_gen": {
                "sql": {
                    "base_prompt": medium_decomp_gen,
                    "query_task": medium_query_task_v2,
                    "query_instructions": medium_query_instructions_2_v2,
                },
                "python": {
                    "base_prompt": "placeholder",
                    "query_task": "placeholder",
                    "query_instructions": "placeholder",
                }
            }
        },
        "advanced": {
            "decomp_plan": {
                "base_prompt": adv_decomp_prompt,
                "decomp_task": adv_decomp_task_v3 + gpt4turbo1106_decomposed_prompt_2,
                "query_context": adv_query_cntx,
                "query_instructions": adv_query_instructions_1_v3
            },
            "decomp_gen": {
                "sql": {
                    "base_prompt": adv_decomp_gen,
                    "query_task": adv_query_task_v2,
                    "query_instructions": adv_query_instructions_2_v3,
                },
                "python": {
                    "base_prompt": "placeholder",
                    "query_task": "placeholder",
                    "query_instructions": "placeholder",
                }
            }
        }
    },
    "Direct": {
        "base_prompt": {
            "general_task": general_taskv18,
            "general_context": general_contextv15,
            "final_instructions": final_instructions_v19
        },
        "request_prompt": {
            "external_knowledge": "placeholder",
            "domain_knowledge": "placeholder"
        }
    }
}

# Updating ext_kn and dom_kn
prompts["Decomposition"]["simple"]["external_knowledge"] = df[df["request"] == query]["external_knowledge"].item()
prompts["Decomposition"]["simple"]["domain_knowledge"] = df[df["request"] == query]["domain_knowledge"].item()
prompts["Direct"]["request_prompt"]["external_knowledge"] = df[df["request"] == query]["external_knowledge"].item()
prompts["Direct"]["request_prompt"]["domain_knowledge"] = df[df["request"] == query]["domain_knowledge"].item()

In [22]:
from pipeline.queryPipeline import queryPipeline

# Model to use
#model = "claude-3-5-sonnet-20240620"
model = "gpt-4o-2024-11-20"

# Format for the pipeline
lang_type = "sql"
max_tokens = 10000

# Path
prompts_path = "final_prompts/prompts_v4.json"

# Pipeline object
pipe = queryPipeline(None, model, lang_type, max_tokens, prompts_path)

# -----------------------------------------------------------------------------
# Testing

# Working query
query = "Get the object identifiers, probabilities in the stamp classifier and light curves (only detections) for objects whose highest probability in the stamp classifier is obtained for class SN, that had their first detection in the first 2 days of september, and that qualify as fast risers. Also, return the filter ID and candid of the detections, and filter ID of magstat as 'magstat_fid', with the initial rise estimate. Order the results by the oid."

# Trying out the new query pipeline with only 1 query
df_1 = df[df["request"] == query]
# Trying out the new query pipeline with 3 queries
df_2 = df.loc[:2]
# pipe.run_experiments(df_2, total_exps=1, restart=True)

In [23]:
# Applying the restart
pipe.run_experiments(df=df_test, total_exps=10, restart=True, use_rag=False)

2025-07-23 17:57:55,222 - INFO - Running process
2025-07-23 17:57:55,223 - INFO - Running pipeline
2025-07-23 17:58:21,074 - INFO - Saving backup
2025-07-23 17:58:21,082 - INFO - Running pipeline
2025-07-23 17:58:42,516 - INFO - Saving backup
2025-07-23 17:58:42,525 - INFO - Running pipeline
2025-07-23 17:59:05,289 - INFO - Saving backup
2025-07-23 17:59:05,299 - INFO - Running pipeline
2025-07-23 17:59:28,932 - INFO - Saving backup
2025-07-23 17:59:28,947 - INFO - Running pipeline
2025-07-23 17:59:49,800 - INFO - Saving backup
2025-07-23 17:59:49,815 - INFO - Running pipeline
2025-07-23 18:00:17,506 - INFO - Saving backup
2025-07-23 18:00:17,519 - INFO - Running pipeline
2025-07-23 18:00:41,325 - INFO - Saving backup
2025-07-23 18:00:41,333 - INFO - Running pipeline
2025-07-23 18:01:10,986 - INFO - Saving backup
2025-07-23 18:01:11,008 - INFO - Running pipeline
2025-07-23 18:01:46,967 - INFO - Saving backup
2025-07-23 18:01:46,977 - INFO - Running pipeline
2025-07-23 18:02:11,060 - IN

##### Metrics generation

In [9]:
# Model to use
#model = "claude-3-5-sonnet-20240620"
model = "gpt-4o-2024-11-20"

# Format for the pipeline
lang_type = "sql"
max_tokens = 10000

# Paths
pred_path = "experiments/preds_gpt-4o-2024-11-20_2025-07-23T17-57-55.csv"
prompts_path = "final_prompts/prompts_v4.json"

# Trying out the metrics pipeline with only 1 query
#df_1 = df[df["request"] == query]
# Trying out the metrics pipeline with 3 queries
df_2 = df.loc[:2]

Tiempos direct / no self-correction (18/05/2025)

279m 29.9s
12m 46.5s

Tiempos direct / self-correction (19/05/2025)

5m 4.7s
409m 22s
139m 15.1s

Tiempos step-by-step / self-correction (20/05/2025)

6m 32.4s
159m 0s
135m 0s
4m 7.7s
210m 18.8s

In [12]:
from pipeline.metricsPipeline import metricsPipeline

# Model to use
#model = "claude-3-5-sonnet-20240620"
model = "gpt-4o-2024-11-20"

# Format for the pipeline
lang_type = "sql"
max_tokens = 10000

# Paths
pred_path = "experiments/preds_gpt-4o-2024-11-20_2025-07-23T17-57-55.csv"
prompts_path = "final_prompts/prompts_v4.json"

# Trying out the metrics pipeline with 3 queries    
df_2 = df_test[df_test["difficulty"] == "medium"]

# Running all the metrics of the test set
metrics = metricsPipeline(model, lang_type, max_tokens, 
                          t_conn=2, n_tries=3, direct=True, self_corr=True, 
                          self_corr_prompts={}, prompts_path=prompts_path)
metrics.run_metrics(sql_preds_path=pred_path, df=df_2, total_exps=10, 
                    restart=True)

2025-07-24 09:18:12,889 - INFO - Restarting
2025-07-24 09:18:12,957 - INFO - Query ID: 90, Run ID: 5
2025-07-24 09:18:12,957 - INFO - Getting the gold values to compare
2025-07-24 09:18:17,632 - INFO - Query ID: 90, Run ID: 5, Query gold:               oid classifier_name     classifier_version class_name  \
0    ZTF19abdyprc   lc_classifier   lc_classifier_1.1.13        CEP   
1    ZTF19abdyprc   lc_classifier  hierarchical_rf_1.1.0       DSCT   
2    ZTF19abdyprc   lc_classifier   lc_classifier_1.1.13        RRL   
3    ZTF19abdyprc   lc_classifier  hierarchical_rf_1.1.0        RRL   
4    ZTF18abadjvy   lc_classifier  hierarchical_rf_1.1.0          E   
..            ...             ...                    ...        ...   
341  ZTF18abasovn   lc_classifier  hierarchical_rf_1.1.0          E   
342  ZTF18abasovn   lc_classifier   lc_classifier_1.1.13          E   
343  ZTF18abasovn   lc_classifier  hierarchical_rf_1.1.0        RRL   
344  ZTF22aabwnvh   lc_classifier  hierarchical_rf_

In [13]:
df_2

Unnamed: 0,req_id,request,table_info,external_knowledge,domain_knowledge,gold_query,difficulty,type,nested_type,rephrased_request,rephrased_request_gpt-3.5-turbo-0125_t0.4,rephrased_request_gpt-4o-2024-05-13_t0.2
8,14,For the next list of oids: ['ZTF23aavzgjg' 'ZT...,"['detection', 'ps1_ztf']",0,0,"SELECT\n det.oid, det.candid,\n det.drb,...",medium,other,simple,,,
11,12,Give me all the SNe that first occurred betwee...,"['object', 'probability']",-- mjd date for February 13 = 59988.0\n-- mjd ...,\n-- SNe refers to all types of SuperNova\n,"\nSELECT o.oid, o.meanra, o.meandec\n\nFROM (\...",medium,spatial,simple,,,
15,89,Find at most 50 ZTF objects that have a probab...,"['probability', 'dataquality']",0,0,SELECT\n *\nFROM\n dataquality\nWHERE\n ...,medium,,,,Retrieve all the data from the 'dataquality' t...,Retrieve all columns from the 'dataquality' ta...
25,92,Find at most 500 ZTF objects ordered by their ...,"['probability', 'magstat']",0,0,"SELECT\n oid, classifier_version, class_nam...",medium,,,,"Retrieve the ZTF identifier, classifier versio...",Retrieve up to 500 objects from the ZTF datase...
34,83,Get all columns in the 'allwise' table for the...,"['xmatch', 'allwise']",0,0,SELECT\n *\nFROM\n allwise\nWHERE\n o...,medium,,,,Retrieve all the information from the 'allwise...,Retrieve every column from the 'allwise' table...
35,84,For the ZTF object 'ZTF19aascdol' get the foll...,"['xmatch', 'allwise']","Note that filters W1 to W4 means returning W1,...",0,"SELECT\n oid, xmatch.oid_catalog, dist,\n ...",medium,,,,Retrieve the ZTF object 'ZTF19aascdol' and gat...,"Retrieve the ZTF identifier, catalog identifie..."
36,96,For Solar System identifiers '2003FP134' and '...,"['detection', 'ss_ztf']",0,0,"SELECT\n ss_ztf.*, detection.mjd, detection...",medium,,,,Retrieve all detections for ZTF objects that a...,Retrieve all detections for ZTF objects that a...
41,94,Get all columns from the 'object' and 'referen...,"['object', 'reference']",0,0,SELECT\n *\nFROM\n object INNER JOIN\n ...,medium,,,,Retrieve the complete information from both th...,Retrieve all columns from both the 'object' an...
42,90,Find at most 100 ZTF objects that have a multi...,"['probability', 'feature']",0,0,SELECT\n *\nFROM\n probability\nWHERE\n ...,medium,,,,Retrieve all information from the 'probability...,Retrieve up to 100 ZTF objects whose multiband...
51,39,Get the columns that contains: object identifi...,"['probability', 'object', 'detection']",0,0,"SELECT\n sq.oid, sq.meanra, sq.meandec,\n de...",medium,other,simple,,,
