In [2]:
insert_batch_size = 1000
query_count = 20


dataverse = "LocationDb"
table_name = "Locations_RTree_Constant"

dbHost = "http://localhost:19002"
benchmark_plan_path = f"commands/benchmark_plan_{dataverse}_{table_name}_{insert_batch_size}_{query_count}.sql"

# None means no limit
debug_max_batches = 10

In [3]:
data =[]
with open("data/point_data_centroid.csv", "r") as f:
    f.readline()
    for line in f:
        row = line.strip().split(",")
        id = row[0]
        locationX = row[1]
        locationY = row[2]
        description = row[3]
        data.append([id, locationX, locationY, description])

In [16]:
import random
import requests
import math

def point(x, y):
    return f'point("{x},{y}")'

def rectangle(x1, y1, x2, y2):
    return f'rectangle("{x1},{y1} {x2},{y2}")'

def generate_insert_command(batch):
    query_lines = []
    query_lines.append(f"INSERT INTO {table_name} ([")
    for row in batch:
        id = row[0]
        location_x = row[1]
        location_y = row[2]
        description = row[3]
        is_last = (row == batch[-1])
        query_lines.append(f'{{"id": {id}, "location": {point(location_x, location_y)}, "description": "{description}"}}{ "" if is_last else "," }')
    query_lines.append("])")
    
    return "".join(query_lines)

def generate_query_commands(inserted_points):
    queries = []
    selected_points = random.sample(inserted_points, query_count)
    for selected_point in selected_points:
        location_x = selected_point[0]
        location_y = selected_point[1]
        stdev_x = 60
        stdev_y = 20
        stdev_scale = .3
        offset_x = random.random() * stdev_x * stdev_scale
        offset_y = random.random() * stdev_y * stdev_scale
        start_x = float(location_x) - offset_x
        start_y = float(location_y) - offset_y
        end_x = float(location_x) + offset_x
        end_y = float(location_y) + offset_y
        
        query = f"SELECT location, description FROM {table_name} WHERE spatial_intersect(location, {rectangle(start_x, start_y, end_x, end_y)});";
        queries.append(query)
    return queries

In [18]:
inserted_points = []
current_batch = 0
commands = []
while True:
    if debug_max_batches is not None and current_batch >= debug_max_batches:
        break
    batch_to_insert = data[current_batch * insert_batch_size:(current_batch + 1) * insert_batch_size]
    if not batch_to_insert: # no more data to insert
        break
    
    insert_command = generate_insert_command(batch_to_insert)
    inserted_points.extend(map(lambda x: (x[1], x[2]), batch_to_insert))
    
    query_commands = generate_query_commands(inserted_points)
    commands.append(insert_command)
    commands.extend(query_commands)
    
    current_batch += 1
    
# write the commands to a file
with open(benchmark_plan_path, "w") as f:
    for command in commands:
        f.write(command + "\n")
    

In [12]:
import logging
import datetime
import os

logs_dir = "./logs"
os.makedirs(logs_dir, exist_ok=True)


In [19]:
import time

def execute_command(command):
    return requests.post(f"{dbHost}/query/service", data={
        'statement': command,
        'dataverse': dataverse,
    })

log_file_path = f"./logs/benchmark_{dataverse}_{table_name}_{insert_batch_size}_{query_count}_{str(time.time())}.log"
logging.basicConfig(
    level=logging.INFO,
    format='%(asctime)s - %(levelname)s - %(message)s',
    filename = log_file_path,
    filemode = 'a'
)
print(f"Logging to file")
print(f"To see the logs in real time: tail -f {log_file_path}")
logger = logging.getLogger()
with open(benchmark_plan_path, "r") as f:
    for line in f:
        command = line.strip()
        if command:
            logger.info(f"Executing command")
            response = execute_command(command)
            if response.status_code != 200:
                logger.error(f"Error executing command: {response.status_code}")
            else:
                logger.info(f"Command executed successfully: {response.json()}")


Logging to file
To see the logs in real time: tail -f ./logs/benchmark_LocationDb_Locations_RTree_Constant_1000_20_1745685807.316108.log
