## Parameters

In [35]:
import random

dataverse = "LocationDb"
dataset_name = "Locations_RTree_CorrelatedPrefix"
query_count = 10_000
dbHost = "http://localhost:19002"

random.seed(42)

osm_dataset = "cemetery"
csv_path_local = f"./data/csv/{osm_dataset}.csv"
adm_path_local = f"./data/adm/{osm_dataset}.adm"
adm_path_container = f"/opt/asterixdb/datasets/adm/{osm_dataset}.adm"

max_mbr_width = 5 # TODO Update with standard dev
max_mbr_height = 5 # TODO Update with standard dev

## Create Database

In [36]:
import requests

create_db_path = "CREATE_DATABASE.sql"
with open(create_db_path, "r") as sql_file:
    create_database_script = sql_file.read()
response = requests.post(f"{dbHost}/query/service", data={'statement': create_database_script})
print(response.text)

{
	"requestID": "dab48946-d953-4225-ad62-46d194557ccb",
	"plans":{},
	"status": "success",
	"metrics": {
		"elapsedTime": "3.782436876s",
		"executionTime": "3.219411335s",
		"compileTime": "0ns",
		"queueWaitTime": "0ns",
		"resultCount": 0,
		"resultSize": 0,
		"processedObjects": 0,
		"bufferCacheHitRatio": "0.00%",
		"bufferCachePageReadCount": 0
	}
}



## Load Data

In [37]:
load_command = f"""
LOAD DATASET {dataset_name} USING localfs
    (("path"="127.0.0.1:///{adm_path_container}"),("format"="adm"));
"""
response = requests.post(f"{dbHost}/query/service", data={'statement': load_command, 'dataverse': dataverse})
print(response.text)

{
	"requestID": "c27dd64a-a541-4928-8d12-5170ddff262e",
	"plans":{},
	"status": "success",
	"metrics": {
		"elapsedTime": "22.849247136s",
		"executionTime": "22.808055719s",
		"compileTime": "0ns",
		"queueWaitTime": "0ns",
		"resultCount": 0,
		"resultSize": 0,
		"processedObjects": 0,
		"bufferCacheHitRatio": "0.00%",
		"bufferCachePageReadCount": 0
	}
}



## Plan Query

In [20]:
import random

all_bounding_box_centers = []
with open(csv_path_local, "r") as csv_file:
    csv_file.readline()  # Skip header
    for line in csv_file:
        if line.startswith("lat"):
            continue
        id, x, y, description = line.strip().split(",")
        all_bounding_box_centers.append((float(x), float(y)))

bounding_box_centers = random.sample(all_bounding_box_centers, query_count)

In [22]:
import random
from utils import point, rectangle, create_offset_rectangle
query_benchmark_plan_path = f"commands/benchmark_query_only_plan_{dataverse}_{dataset_name}_{query_count}.sql"

with open(query_benchmark_plan_path, 'w') as f:
    for x, y in bounding_box_centers:
        mbr_width, mbr_height = random.uniform(1, max_mbr_width), random.uniform(1, max_mbr_height)
        start_x, start_y = x - mbr_width/2, y - mbr_height/2
        end_x, end_y = x + mbr_width/2, y + mbr_height/2
        query = f"SELECT * FROM table_name WHERE spatial_intersect(location, create_rectangle(create_point({start_x},{start_y}), create_point({end_x},{end_y})));"
        f.write(query + "\n") 

with open(query_benchmark_plan_path, 'r') as f:
    for i in range(5):
        print(f.readline().strip())

SELECT * FROM table_name WHERE spatial_intersect(location, create_rectangle(create_point(32.75425089702681,46.30926325080799), create_point(37.07498771145207,51.207432859479866)));
SELECT * FROM table_name WHERE spatial_intersect(location, create_rectangle(create_point(135.14376006162507,35.00700258745946), create_point(139.7932432983032,38.72208223273814)));
SELECT * FROM table_name WHERE spatial_intersect(location, create_rectangle(create_point(10.198663634658354,49.98518157687668), create_point(11.791897561752128,51.95208881311698)));
SELECT * FROM table_name WHERE spatial_intersect(location, create_rectangle(create_point(14.173785940939641,68.09614389393309), create_point(18.950676865832794,69.55073945234224)));
SELECT * FROM table_name WHERE spatial_intersect(location, create_rectangle(create_point(-2.106174588474235,46.68534263244907), create_point(0.7342847041863191,50.963711677178516)));


## Execute Queries

In [38]:
from utils import Logger

logs_filename_parts = [dataset_name, query_count]
logger = Logger(logs_filename_parts, True)
with open(query_benchmark_plan_path, 'r') as f:
    lines = f.readlines()
    for index, line in enumerate(lines):
        line = line.strip().replace("table_name", dataset_name)
        if not line:
            continue
        trace_id = str(index).zfill(10)
        response = requests.post(f"{dbHost}/query/service", data={'statement': line, 'dataverse': dataverse, 'client_context_id': trace_id})
        json_response = response.json()
        if response.status_code != 200:
            logger.error({
                "trace_id": json_response.get("clientContextID"),
                "status": json_response.get("status"),
                "http.status": response.status_code,
                "metrics": json_response.get("metrics"),
                "error": json_response.get("errors"),
            })
        else:
            logger.log({
                "trace_id": json_response.get("clientContextID"),
                "status": json_response.get("status"),
                "http.status": response.status_code,
                "result_count": len(json_response.get("results", [])) if json_response.get("results") else None,
                "metrics": json_response.get("metrics"),
            })

1745862836 - INFO - {"trace_id": "0000000000", "status": "success", "http.status": 200, "result_count": 4841, "metrics": {"elapsedTime": "1.540309793s", "executionTime": "1.483192209s", "compileTime": "205.537375ms", "queueWaitTime": "0ns", "resultCount": 4841, "resultSize": 1829240, "processedObjects": 9682, "bufferCacheHitRatio": "100.00%", "bufferCachePageReadCount": 6768}}
1745862836 - INFO - {"trace_id": "0000000001", "status": "success", "http.status": 200, "result_count": 4756, "metrics": {"elapsedTime": "479.372792ms", "executionTime": "475.565792ms", "compileTime": "58.912334ms", "queueWaitTime": "0ns", "resultCount": 4756, "resultSize": 1799050, "processedObjects": 9512, "bufferCacheHitRatio": "100.00%", "bufferCachePageReadCount": 6675}}
1745862837 - INFO - {"trace_id": "0000000002", "status": "success", "http.status": 200, "result_count": 2337, "metrics": {"elapsedTime": "110.252ms", "executionTime": "108.990584ms", "compileTime": "30.89475ms", "queueWaitTime": "0ns", "resu