In [17]:
import json

exp_config_file = "../merged.json"

In [105]:
class RTreeProperties:
    def __init__(self, **kwargs):
        
        self.Dimension = kwargs.get('Dimension', None)
        self.FillFactor = kwargs.get('Fill factor', None)
        self.IndexCapacity = kwargs.get('Index capacity', None)
        self.LeafCapacity = kwargs.get('Leaf capacity', None)
        self.TightMbrs = kwargs.get('Tight MBRs', None)
        self.NearMinimumOverlapFactor = kwargs.get('Near minimum overlap factor', None)
        self.ReinsertFactor = kwargs.get('Reinsert factor', None)
        self.SplitDistributionFactor = kwargs.get('Split distribution factor', None)
        self.Utilization = kwargs.get('Utilization', None)
        self.Reads = kwargs.get('Reads', None)
        self.Writes = kwargs.get('Writes', None)
        self.Hits = kwargs.get('Hits', None)
        self.Misses = kwargs.get('Misses', None)
        self.TreeHeight = kwargs.get('Tree height', None)
        self.NumberOfData = kwargs.get('Number of data', None)
        self.NumberOfNodes = kwargs.get('Number of nodes', None)
        self.Splits = kwargs.get('Splits', None)
        self.Adjustments = kwargs.get('Adjustments', None)
        self.QueryResults = kwargs.get('Query results', None)
        self.BufferHits = kwargs.get('Buffer hits', None)
        self.IndexId = kwargs.get('IndexId', None)
        self.Status = kwargs.get('Status', None)
        self.ElapsedTime = kwargs.get('Elapsed Time', None)

        # 新增属性，确保它们的命名也遵循首字母大写的规则
        self.KnnQuery = kwargs.get('knn query', None)
        self.IndexedSpace = kwargs.get('Indexed space', None)
        self.Operations = kwargs.get('Operations', None)
        self.IndexIo = kwargs.get('Index I/O', None)
        self.LeafIo = kwargs.get('Leaf I/O', None)

    def __str__(self):
        attrs = vars(self)
        return '\n'.join(f"{key}: {value}" for key, value in attrs.items())

In [82]:
class DataInfo:
    def __init__(self, size, dimensions, distribution, skewness, bounds):
        self.size = size
        self.dimensions = dimensions
        self.distribution = distribution
        self.skewness = skewness
        self.bounds = bounds

    # def get_file_prefix(self):
    #     range_1000_2_uniform_1_0.01x0.01

    #     return f"{}"
        
    
    def __str__(self):
        return (f"DataInfo(size={self.size}, dimensions={self.dimensions}, "
                f"distribution={self.distribution}, skewness={self.skewness}, "
                f"bounds={self.bounds})")

class QueryInfo:
    def __init__(self, type, size, dimensions, distribution, skewness, query_range=None, k=None, bounds=None):
        self.query_type = type
        self.size = size
        self.dimensions = dimensions
        self.distribution = distribution
        self.skewness = skewness
        self.query_ranges = query_range
        self.ks = k
        self.bounds = bounds
        self.query_strs = []

        if self.query_type == "range":
            for q_range in self.query_ranges:
                query_str = "x".join([str(_) for _ in q_range])
                self.query_strs.append(query_str)

        # print(self.query_type, self.query_strs)
        # print(self.query_type, self.ks)
    
    # def get_file_prefix(self):

    #     if self.query_type == "range":
    #         return f"{self.query_type}_{self.size}_{self.distribution}_{self.skewness}_{self.}"
    
    def __str__(self):
        return (f"QueryInfo(type={self.query_type}, size={self.size}, dimensions={self.dimensions}, "
                f"distribution={self.distribution}, skewness={self.skewness}, "
                f"query_range={self.query_ranges}, k={self.ks}, bounds={self.bounds})")

class BaselineInfo:
    def __init__(self, name, config):
        self.name = name
        self.config = config
    
    def __str__(self):
        return f"BaselineInfo(name={self.name}, config={self.config})"

class Experiment:
    def __init__(self, data_info, query_info, baseline_info):
        self.data_info = data_info
        self.query_info = query_info
        self.baseline_info = baseline_info

    def __str__(self):
        queries_str = "\n  ".join([str(q) for q in self.query_info])
        baseline_str = "\n  ".join([str(b) for b in self.baseline_info])
        return (f"Experiment:\n"
                f"Data Info:\n  {self.data_info}\n"
                f"Queries Info:\n  {queries_str}\n"
                f"Baseline Info:\n  {baseline_str}")

    def is_baseline(self, baseline_name):
        for info in self.baseline_info:
            if info.name == baseline_name:
                return True, f"result/libspatialindex/{baseline_name}"
        return False, None

    def is_data(self, data_size, dimensions, distribution, skewness):
        if self.data_info.size != data_size:
            return False, None
        # if self.data_info.dimensions != dimensions:
        #     return False
        if self.data_info.distribution != distribution:
            return False, None
        if self.data_info.skewness != skewness:
            return False, None
        return True, f"data_{data_size}_{dimensions}_{distribution}_{skewness}"

    def is_query(self, type, size, dimensions, distribution, skewness, query_str=None, k=None):
        for info in self.query_info:
            if info.query_type != type:
                continue
            if info.size != size:
                continue
            # if info.dimensions != dimensions:
            #     continue
            if info.distribution != distribution:
                continue
            if info.skewness != skewness:
                continue
            if info.query_type == "range" and query_str not in info.query_strs:
                continue
                
            if info.query_type == "knn" and k not in info.ks:
                print(k, info.ks)
                continue
            if type == "range":
                res = f"{type}_{size}_{dimensions}_{distribution}_{skewness}_{query_str}"
            else:
                res = f"{type}_{size}_{dimensions}_{distribution}_{skewness}_k_{k}"
            return True, res
        return False, None


In [83]:
experiments = []

with open(exp_config_file, "r") as exp_config:
    config = json.load(exp_config)

    for index, experiment in enumerate(config["experiments"]):
        
        # print("Exp")
        
        data_info = DataInfo(**experiment["data"])
        
        query_info = [QueryInfo(**query) for query in experiment["query"]]
        
        baseline_info = [BaselineInfo(name=baseline["name"], config=baseline["config"]) for baseline in experiment["baseline"]]
        
        exp = Experiment(data_info=data_info, query_info=query_info, baseline_info=baseline_info)

        experiments.append(exp)



In [84]:
def get_exp_data(baseline, type, data_size, data_distribution, data_skewness, query_size, query_distribution, query_skewness, query_str=None, k=None):
    for experiment in experiments:
        res, path = experiment.is_baseline(baseline)
        if not res:
            continue
        res, data = experiment.is_data(data_size, 2, data_distribution, query_skewness)
        if not res:
            continue
        res, query = experiment.is_query(type, query_size, 2, query_distribution, query_skewness, query_str, k)
        # print(res, query)
        if not res:
            continue
        
        return path, data, query


In [100]:
def parse_rtree_properties(file_path):
    properties = {}
    with open(file_path, 'r') as file:
        for line in file:
            if ": " in line:  # 确保是有效的属性行
                key, value = line.strip().split(": ", 1)
                
                # 尝试将字符串值转换为适当的类型
                if value.isdigit():
                    value = int(value)
                elif value.replace('.', '', 1).isdigit():
                    value = float(value)
                properties[key] = value
    print(properties)
    return RTreeProperties(**properties)

In [103]:
varying_cardinality = [100000, 200000, 400000, 800000, 1600000] #, 3200000]
dimensions = 2
distribution = "uniform"
skewness = 1
varying_query_range = ["0.01x0.01", "0.02x0.02", "0.04x0.04", "0.08x0.08", "0.16x0.16"]
varying_k = [1, 5, 25, 125, 625]
baselines = ["zorder", "bmtree"]


build = {baseline: [] for baseline in baselines}
range_query = {baseline: [] for baseline in baselines}
knn_query = {baseline: [] for baseline in baselines}

for baseline in baselines:
    for cardinality in varying_cardinality:
        path, data, query = get_exp_data(baseline, "range", cardinality, distribution, skewness, 1000, distribution, skewness, "0.01x0.01")
        if baseline == "zorder":
            build_file_name = f"../{path}/build/{data}.txt"
            range_query_file_name = f"../{path}/range/{data}_{query}.txt"
        else:
            build_file_name = f"../{path}/build/{data}_{query}_depth_1.txt"
            range_query_file_name = f"../{path}/range/{data}_{query}.txt"
        
        print(build_file_name)
        print(range_query_file_name)
        # rTreeProperties = parse_rtree_properties(build_file_name)
        # print(rTreeProperties)
        
print("--------")
for baseline in baselines:
    for cardinality in varying_cardinality:
        path, data, query = get_exp_data(baseline, "knn", cardinality, distribution, skewness, 1000, distribution, skewness, k=25)
        if baseline == "zorder":
            knn_query_file_name = f"../{path}/knn/{data}_{query}.txt"
        else:
            knn_query_file_name = f"../{path}/knn/{data}_{query}.txt"
        print(knn_query_file_name)
        rTreeProperties = parse_rtree_properties(knn_query_file_name)
        print(rTreeProperties)



../result/libspatialindex/zorder/build/data_100000_2_uniform_1.txt
../result/libspatialindex/zorder/range/data_100000_2_uniform_1_range_1000_2_uniform_1_0.01x0.01.txt
../result/libspatialindex/zorder/build/data_200000_2_uniform_1.txt
../result/libspatialindex/zorder/range/data_200000_2_uniform_1_range_1000_2_uniform_1_0.01x0.01.txt
../result/libspatialindex/zorder/build/data_400000_2_uniform_1.txt
../result/libspatialindex/zorder/range/data_400000_2_uniform_1_range_1000_2_uniform_1_0.01x0.01.txt
../result/libspatialindex/zorder/build/data_800000_2_uniform_1.txt
../result/libspatialindex/zorder/range/data_800000_2_uniform_1_range_1000_2_uniform_1_0.01x0.01.txt
../result/libspatialindex/zorder/build/data_1600000_2_uniform_1.txt
../result/libspatialindex/zorder/range/data_1600000_2_uniform_1_range_1000_2_uniform_1_0.01x0.01.txt
../result/libspatialindex/bmtree/build/data_100000_2_uniform_1_range_1000_2_uniform_1_0.01x0.01_depth_1.txt
../result/libspatialindex/bmtree/range/data_100000_2_un