# Testing notebook

## Working the CSVs and Excel sheets

In [1]:
# Exploring the 'training' dataset
import pandas as pd
df = pd.read_csv("datasets/txt2sql_alerce_train_v3_1.csv")
df.head()

Unnamed: 0,req_id,request,table_info,external_knowledge,domain_knowledge,gold_query,difficulty,type,nested_type,rephrased_request,rephrased_request_gpt-3.5-turbo-0125_t0.4,rephrased_request_gpt-4o-2024-05-13_t0.2
0,13,Give me all the SNe that were first detected b...,"['object', 'probability']",\r\n-- mjd date for December = 59914.0\r\n-- m...,\r\n-- Super Nova (SNe) is a large explosion t...,"\r\nSELECT\r\n object.oid, probability.clas...",simple,object,none,,,
1,10,Get the object identifiers and probabilities i...,"['probability', 'object']",0,0,"\r\nSELECT\r\n sq1.oid, sq1.probability as SN...",medium,object,simple,,,
2,15,"Get the object identifiers, probabilities in t...","['object', 'probability', 'detection', 'magstat']",\r\n-- mjd date for September 01 = 60188.0\r\n...,\r\n-- A fast riser is defined as an object wh...,"\r\nSELECT\r\n sq.oid, sq.probability, sq.c...",advanced,other,multi,,,
3,4,"Get the object identifier, candidate identifie...","['object', 'probability', 'magstat', 'detection']",\r\n-- mjd date for the start of the year 2019...,0,"\r\nSELECT\r\n sq.oid, sq.fid, sq.dmdt_first,...",advanced,other,multi,,,
4,25,Query objects within 10 degress of the next po...,"['probability', 'object']",\r\n-- mjd date for February 01 = 59976.0\r\n-...,0,"\r\nWITH catalog ( source_id, ra, dec) AS (\r\...",advanced,spatial,simple,,,


In [2]:
df[df["request"] == "Find at most 10 cases where the feature called 'LinearTrend' is larger than 5 and their ZTF objects have a difference between last and first detection date larger than 1 day. Return the following columns. For the ZTF objects: ZTF identifier and number of detections; for their light curve classifier data at ranking 1: classifier version, class and probability; for the 'LinearTrend' feature: value and filter identifier"]["external_knowledge"].item()

"Since no feature version is specified, the combination between ZTF oid and fid can be non-unique in the 'feature' table output"

In [3]:
# Reading the excel file
excel = pd.read_excel("datasets/SQLusecases_alerce.xlsx", 
                      sheet_name="examples_alerce_usecasesV3_1")

# Cleaning the excel a bit
excel = excel.drop(columns=["Unnamed: 0.1", "Unnamed: 0"])
excel.head()

Unnamed: 0,req_id,request,table_info,external_knowledge,domain_knowledge,gold_query,difficulty,type,nested_type,Set,python_format
0,0,Get objects that are likely to be YSOs (possib...,"['probability', 'feature']",\n-- feature.name can be 'Multiband_period'\n-...,\n-- Multiband_period: Period obtained using t...,"\nSELECT\n oid, probability, value, name, fid...",advanced,object,tree,Train,"sub_query_1='''\nSELECT\n feature.oid, prob_o..."
1,1,Get all the objects classified as AGN with a p...,"['object', 'probability', 'feature', 'magstat']",\n-- object.ndet represents the number of dete...,\n-- Amplitude: Half of the difference between...,"\nSELECT\n sq.oid, sq.value, sq.name, sq.fid ...",advanced,object,tree,Test,\nsub_query_object='''\nSELECT\n object.oid...
2,2,Give me the objects classified as YSO by their...,['probability'],,,"\nSELECT\n oid, probability\nFROM\n prob...",simple,object,none,Train,"query='''\nSELECT\n oid, probability\nFROM\..."
3,3,Give me the objects classified as YSO by the l...,"['object', 'probability']","\n-- last june in mjd date: [start=60096.0, en...",,\nSELECT\n *\nFROM\n probability\nWHERE\...,simple,object,simple,Train,query=f'''\nSELECT\n *\nFROM\n probabili...
4,4,"Get the object identifier, candidate identifie...","['object', 'probability', 'magstat', 'detection']",\n-- mjd date for the start of the year 2019 =...,,"\nSELECT\n sq.oid, sq.fid, sq.dmdt_first,\n ...",advanced,other,multi,Train,# objects classified as SN II with probability...


## Tests

### Gold values and query to test

In [4]:
# Select a query
query = "Get the object identifiers, probabilities in the stamp classifier and light curves (only detections) for objects whose highest probability in the stamp classifier is obtained for class SN, that had their first detection in the first 2 days of september, and that qualify as fast risers."

# Check if the query is in the excel file
if query in excel["request"].to_list():
    # Obtain the gold SQL query and Python query
    sql_gold = excel[excel["request"] == query]["gold_query"].item()
    python_gold = excel[excel["request"] == query]["python_format"].item()

    # Obtain the necessary tables
    gold_tables = excel[excel["request"] == query]["table_info"].item()

    # Print all in orderly fashion
    print("Gold values\n")
    print("Tables needed for the query:")
    print(gold_tables + "\n")
    print("SQL gold query:")
    print(sql_gold)
    print("Python gold query:\n")
    print(python_gold)
    
else:
    # Obtain the gold SQL query
    sql_gold = df[df["request"] == query]["gold_query"].item()

    # Obtain the necessary tables
    gold_tables = df[df["request"] == query]["table_info"].item()

    # Print all in orderly fashion
    print("Gold values\n")
    print("Tables needed for the query:")
    print(gold_tables + "\n")
    print("SQL gold query:")
    print(sql_gold)

Gold values

Tables needed for the query:
['object', 'probability', 'detection', 'magstat']

SQL gold query:

SELECT
    sq.oid, sq.probability, sq.candid, sq.fid, sq.mjd,
    magstat.fid as magstat_fid, magstat.dmdt_first
FROM
  (
SELECT
  detection.oid, detection.candid, detection.fid, detection.mjd,
  obj_oids.probability
FROM
  (
SELECT
    object.oid, probability.probability
FROM
    object INNER JOIN
    probability
    ON object.oid = probability.oid
WHERE
    probability.classifier_name='stamp_classifier'
    AND probability.class_name='SN'
    AND probability.ranking=1
    AND object.firstmjd > 60188.0
    AND object.firstmjd < 60189.0
) as obj_oids
    INNER JOIN
    detection ON detection.oid = obj_oids.oid
) AS sq
  INNER JOIN magstat
  ON sq.oid = magstat.oid
WHERE
  magstat.dmdt_first < -0.25
ORDER BY oid

Python gold query:

sub_query_object=f'''
SELECT
    object.oid, probability.probability
FROM
    object INNER JOIN
    probability
    ON object.oid = probability.oid


In [5]:
# Running the gold query
from secret.config import SQL_URL
import requests
import sqlalchemy as sa

# Setup params for query engine
params = requests.get(SQL_URL).json()['params']
engine = sa.create_engine(f"postgresql+psycopg2://{params['user']}:{params['password']}@{params['host']}/{params['dbname']}")
engine.begin()

resultGold = pd.read_sql_query(sql_gold, con=engine)
resultGold

Unnamed: 0,oid,probability,candid,fid,mjd,magstat_fid,dmdt_first
0,ZTF18aaiaclp,0.520409,2434223984915015031,2,60188.223982,2,-0.445361
1,ZTF18aakjgzj,0.319556,2434219693515015039,2,60188.219699,2,-0.341342
2,ZTF18aaleydm,0.613368,2434217801715015066,2,60188.217801,2,-0.83061
3,ZTF18aamxief,0.323881,2434223031815015040,2,60188.223032,2,-0.298997
4,ZTF18aanglcr,0.336435,2434215873315015004,2,60188.21588,2,-0.713267
5,ZTF18aaouvke,0.282244,2434220161815015099,2,60188.220162,2,-0.333054
6,ZTF18aaqsuyn,0.414416,2434215875015015115,2,60188.21588,2,-0.880376
7,ZTF18aattenu,0.599858,2434216851815015097,2,60188.216852,2,-0.651601
8,ZTF18aaxkcvg,0.492333,2434224440015015034,2,60188.224444,2,-0.346522
9,ZTF18abbinqq,0.49701,2434182954115015170,1,60188.182951,1,-0.529858


### Trying out the pipelines

In [None]:
from pprint import pprint
from pipeline.eval import run_pipeline
from testing.tests import engine
engine.begin()

# Model to use
#model = "claude-3-5-sonnet-20240620"
model = "gpt-4o"

# Format for the pipeline
format = "python"

# RAG parameters
max_tokens = 10000
size = 700
overlap = 300
quantity = 10

# Running the pipeline
result, error, total_usage, prompts, _ = run_pipeline(query, model, max_tokens, size, 
                                            overlap, quantity, format, False,
                                            True, True, 2, 3)
print("Resulting table:")
print(result)
print("Total usage of the pipeline:")
pprint(total_usage)

# The prompts used will be saved in this file
with open(f"prompts/examples/prompts_query_{model}.txt", "w") as f:
    f.write(str(prompts))

  from .autonotebook import tqdm as notebook_tqdm


Tables needed: [object, feature, detection]
Difficulty: advanced
Raw response: ```python
# Sub-query for SN II Classification
sub_query_sn_ii = f'''
SELECT
    oid
FROM
    probability
WHERE
    classifier_name = 'lc_classifier'
    AND class_name = 'SNII'
    AND probability > 0.6
    AND ranking = 1
'''

# Sub-query for Number of Detections
sub_query_detections = f'''
SELECT
    oid
FROM
    object
WHERE
    ndet > 50
'''

# Sub-query for Magnitude Difference in g-band
sub_query_mag_diff = f'''
SELECT
    oid
FROM
    feature
WHERE
    name = 'delta_mag_fid'
    AND value > 2
    AND fid = 1  -- Assuming fid=1 corresponds to ZTF g-band
'''

# Combine Sub-queries with INTERSECT
sub_query_combined = f'''
SELECT oid FROM ({sub_query_sn_ii}) AS sn_ii
INTERSECT
SELECT oid FROM ({sub_query_detections}) AS detections
INTERSECT
SELECT oid FROM ({sub_query_mag_diff}) AS mag_diff
'''

# Main Query to Retrieve Required Information
full_query = f'''
SELECT
    detection.oid,
    detection.candid

In [7]:
# Running the gold query
from secret.config import SQL_URL
import requests
import sqlalchemy as sa
import csv
from pipeline.eval import run_pipeline

# Select a query
query = "Get the object identifiers, probabilities in the stamp classifier and light curves (only detections) for objects whose highest probability in the stamp classifier is obtained for class SN, that had their first detection in the first 2 days of september, and that qualify as fast risers."

# Obtain the gold SQL query and Python query
sql_gold = excel[excel["request"] == query]["gold_query"].item()

# Setup params for query engine
params = requests.get(SQL_URL).json()['params']
engine = sa.create_engine(f"postgresql+psycopg2://{params['user']}:{params['password']}@{params['host']}/{params['dbname']}")
engine.begin()

resultGold = pd.read_sql_query(sql_gold, con=engine)

# Model to use
model = "gpt-4o"

# Format for the pipeline
format = "python"
rag = True

# RAG parameters
max_tokens = 10000
size = 700
overlap = 300
quantity = 10

to_csv = []
for i in range(10):
    print(f"Experimento número: {i+1}")
    result, error, total_usage, _, table = run_pipeline(query, model, max_tokens, size, 
                                            overlap, quantity, format, False,
                                            rag, True, 2, 3)
    for_list = {
        "exp": 40 + i + 1,
        "request": query,
        "model": model,
        "format": format,
        "rag": rag,
        "genQuery": table,
        "goldQuery": sql_gold,
        "results": result,
        "resultsCols": result.columns.to_numpy() if result is not None else None,
        "resultsRows": result["oid"].to_numpy() if result is not None else None,
        "resultGold": resultGold,
        "resultGoldCols": resultGold.columns.to_numpy(),
        "resultGoldRows": resultGold["oid"].to_numpy()
    }
    to_csv.append(for_list)

headers = to_csv[0].keys()
file_name = f"experiments/{format}_RAG_{rag}.csv"
with open(file_name, mode='w', newline='') as file:
    writer = csv.DictWriter(file, fieldnames=headers)
    
    # Write the header row
    writer.writeheader()
    
    # Write the data rows
    for row in to_csv:
        writer.writerow(row)

Experimento número: 1
Tables needed: [object, probability, magstat]
Difficulty: advanced
Raw response: ```python
# Sub-query to get object identifiers with the highest probability for class 'SN' in the stamp classifier
sub_query_sn_probability = f'''
SELECT
    oid
FROM
    probability
WHERE
    classifier_name = 'stamp_classifier'
    AND class_name = 'SN'
    AND ranking = 1
'''

# Sub-query to get object identifiers with first detection in the first 2 days of September 2024
sub_query_first_detection_september = f'''
SELECT
    oid
FROM
    object
WHERE
    firstmjd >= 60275.0  -- MJD for September 1, 2024
    AND firstmjd < 60277.0  -- MJD for September 3, 2024
'''

# Sub-query to get object identifiers that qualify as fast risers
sub_query_fast_risers = f'''
SELECT
    oid
FROM
    magstat
WHERE
    dmdt_first < -0.25
'''

# Combine the sub-queries using INTERSECT to get the final list of object identifiers
sub_query_combined = f'''
SELECT
    oid
FROM
    ({sub_query_sn_probabilit

In [13]:
data = pd.read_csv("experiments/Astroinformatics_2024/python_RAG_True.csv")
data

Unnamed: 0.1,Unnamed: 0,exp,request,model,format,rag,genQuery,goldQuery,results,resultsCols,resultsRows,resultGold,resultGoldCols,resultGoldRows
0,0,31,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
1,1,32,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
2,2,33,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
3,3,34,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
4,4,35,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
5,5,36,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
6,6,37,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
7,7,38,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
8,8,39,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
9,9,40,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,"\nSELECT oid,\n probability,\n fir...","\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...


In [9]:
data["exp"] = [d-10 for d in data["exp"]]
data

Unnamed: 0,exp,request,model,format,rag,genQuery,goldQuery,results,resultsCols,resultsRows,resultGold,resultGoldCols,resultGoldRows
0,31,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
1,32,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
2,33,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
3,34,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
4,35,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
5,36,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
6,37,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
7,38,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
8,39,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,# Sub-query to get object identifiers with the...,"\nSELECT\n sq.oid, sq.probability, sq.candi...",oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...
9,40,"Get the object identifiers, probabilities in t...",gpt-4o,python,True,"\nSELECT oid,\n probability,\n fir...","\nSELECT\n sq.oid, sq.probability, sq.candi...",,,,oid probability ca...,['oid' 'probability' 'candid' 'fid' 'mjd' 'mag...,['ZTF18aaiaclp' 'ZTF18aakjgzj' 'ZTF18aaleydm' ...


In [None]:
data.to_csv("experiments/Astroinformatics_2024/python_RAG_True.csv")

#### o1 tests

In [None]:
from pprint import pprint
from pipeline.main import run_pipeline, engine
engine.begin()

# Model to use
#model = "claude-3-5-sonnet-20240620"
model = "o1-preview"

# Format for the pipeline
format = "python"

# RAG parameters
max_tokens = 10000
size = 700
overlap = 300
quantity = 10

# Running the pipeline
result, total_usage, prompts = run_pipeline(query, model, max_tokens, size, 
                                            overlap, quantity, format, False, 
                                            engine, rag_pipe=True, 
                                            self_corr=True)
print("Resulting table:")
print(result)
print("Total usage of the pipeline:")
pprint(total_usage)

# The prompts used will be saved in this file
with open(f"prompts/examples/prompts_query_{model}.txt", "w") as f:
    f.write(str(prompts))

Tables needed: [object, detection, magstat]
Difficulty: advanced
```python
# Get oids of objects classified as 'SN II' with probability > 0.6
sub_query_probability = f'''
SELECT oid
FROM probability
WHERE classifier_name='lc_classifier'
AND class_name='SN II'
AND ranking=1
AND probability > 0.6
'''

# Get oids of objects with first observation between 2019-01-01 and 2022-12-31
# MJD range from 58484 (2019-01-01) to 59580 (2022-01-01)
sub_query_object = f'''
SELECT oid
FROM object
WHERE firstmjd BETWEEN 58484 AND 59946
'''

# Combine previous subqueries to get oids satisfying both class and date criteria
sub_query_oid1 = f'''
SELECT prob_oids.oid
FROM ({sub_query_probability}) AS prob_oids
INNER JOIN ({sub_query_object}) AS obj_oids
ON prob_oids.oid = obj_oids.oid
'''

# Get oids with more than 30 detections
sub_query_ndet = f'''
SELECT oid
FROM detection
GROUP BY oid
HAVING COUNT(*) > 30
'''

# Combine to get oids satisfying class, date, and number of detections criteria
sub_query_oid2

Exception: Failed again: local variable 'results' referenced before assignment