In [None]:
input_dataset = "data/csv/sports.csv"
insert_batch_size = 10_000
query_count = 1000

dataverse = "LocationDb"
table_name = "Locations_RTree_Constant"

dbHost = "http://localhost:19002"
create_db_path = f"CREATE_DATABASE.sql"
benchmark_plan_path = f"commands/benchmark_plan_{dataverse}_{table_name}_{insert_batch_size}_{query_count}.sql"

# None means no limit
debug_max_batches = None

## Generate Commands

In [None]:
import random

data =[]
with open(input_dataset, "r") as f:
    f.readline()
    for line in f:
        row = line.strip().split(",")
        id = row[0]
        locationX = row[1]
        locationY = row[2]
        description = row[3]
        data.append([id, locationX, locationY, description])

random.shuffle(data)

In [None]:
import random
import requests
import os

def point(x, y):
    return f'point("{x},{y}")'

def rectangle(x1, y1, x2, y2):
    return f'rectangle("{x1},{y1} {x2},{y2}")'

def generate_insert_command(batch):
    query_lines = []
    query_lines.append(f"INSERT INTO {table_name} ([")
    for row in batch:
        id = row[0]
        location_x = row[1]
        location_y = row[2]
        description = row[3]
        is_last = (row == batch[-1])
        query_lines.append(f'{{"id": {id}, "location": {point(location_x, location_y)}, "description": "{description}"}}{ "" if is_last else "," }')
    query_lines.append("])")
    
    return "".join(query_lines)

def generate_query_commands(inserted_points):
    queries = []
    selected_points = random.sample(inserted_points, query_count)
    sigma_values = [3, 4, 5]  # Possible selectivities (σ)
    
    for selected_point in selected_points:
        sigma = random.choice(sigma_values)
        width = 360 * (10 ** -sigma)
        height = 180 * (10 ** -sigma)
        
        # Ensure coordinates are floats before calculations
        center_x = float(selected_point[0])
        center_y = float(selected_point[1])
        
        # Calculate MBR (Minimum Bounding Rectangle)
        start_x = center_x - (width / 2)
        end_x = center_x + (width / 2)
        start_y = center_y - (height / 2)
        end_y = center_y + (height / 2)
        
        query = f"SELECT * FROM {table_name} WHERE spatial_intersect(location, {rectangle(start_x, start_y, end_x, end_y)});"
        queries.append(query)
    
    return queries


In [None]:
inserted_points = []
current_batch = 0
commands = []
while True:
    if debug_max_batches is not None and current_batch >= debug_max_batches:
        break
    batch_to_insert = data[current_batch * insert_batch_size:(current_batch + 1) * insert_batch_size]
    if not batch_to_insert: # no more data to insert
        break
    
    insert_command = generate_insert_command(batch_to_insert)
    inserted_points.extend(map(lambda x: (x[1], x[2]), batch_to_insert))
    
    query_commands = generate_query_commands(inserted_points)
    commands.append(insert_command)
    commands.extend(query_commands)
    
    current_batch += 1
    
# write the commands to a file
benchmark_dir = os.path.dirname(benchmark_plan_path)
if not os.path.exists(benchmark_dir):
    os.makedirs(benchmark_dir)
with open(benchmark_plan_path, "w") as f:
    for command in commands:
        f.write(command + "\n")
    

In [None]:
import logging
import os

logs_dir = "./logs"
if not os.path.exists(logs_dir):
    os.makedirs(logs_dir, exist_ok=True)


## Generate Database Schema

In [None]:
with open(create_db_path, "r") as sql_file:
    create_database_script = sql_file.read()
response = requests.post(f"{dbHost}/query/service", data={'statement': create_database_script})
print(response.text)

## Execute Commands

In [None]:
from utils import Logger

logger = Logger(f"{dataverse}_{table_name}_{insert_batch_size}_{query_count}", True);
def execute_command(command, client_context_id=None):
    return requests.post(f"{dbHost}/query/service", data={
        'statement': command,
        'dataverse': dataverse,
        'client_context_id': client_context_id,
    })
with open(benchmark_plan_path, "r") as f:
    index = 0
    for line in f:
        index += 1
        command = line.strip()
        if not command:
            continue
        
        command_type = "insert" if command.startswith("INSERT") else "query"
        event = f"command.{command_type}"
        trace_id = str(index).zfill(10)
        logger.log({"trace-id": trace_id,"event": event, "status": "start"})
        response = execute_command(command, client_context_id=trace_id)
        json_response = response.json()
        if response.status_code != 200:
            print(f"Error: {response.json()}")
            logger.log({
                "trace_id": json_response.get("clientContextID"),
                "event": event,
                "http.status": response.status_code,
                "status": json_response.get("status"),
                "metrics": json_response.get("metrics"),
                "error": json_response.get("errors"),
            })
        else:
            logger.log({
                "trace_id": json_response.get("clientContextID"),
                "event": event,
                "http.status": response.status_code,
                "status": json_response.get("status"),
                "result_count": len(json_response.get("results", [])) if json_response.get("results") else None,
                "metrics": json_response.get("metrics"),
            })
                    
