# BI 11


In [1]:
from dg_builder import *

In [2]:
""" build `original data graph` """

build_original_dg(
    original_dg_filepath=BI_11_DG, optimized_dg_filepath=BI_11_DG_OPTIMIZED
)

Mapping `origin_id` to `uni_id`: 100%|██████████| 3181724/3181724 [00:02<00:00, 1088159.17it/s]
Build map of `vertex.uni_id -> label`: 100%|██████████| 3181724/3181724 [00:01<00:00, 1612742.41it/s]
Build edges in format: `(src_id, dst_id)`: 100%|██████████| 17256038/17256038 [00:13<00:00, 1236264.96it/s]

File `./out/original/data_graph.txt` already exists





In [3]:
""" build `optimized data graph` """

index_csv_filenames = ["person_city_country"]

build_optimized_dg(
    optimized_dg_filepath=BI_11_DG_OPTIMIZED,
    index_csv_filenames=index_csv_filenames,
)

Adding `index edge` into `edges`: 100%|██████████| 7949/7949 [00:00<00:00, 772844.75it/s]
Writing `labels` into `./out/optimized/BI_11/data_graph.txt`: 100%|██████████| 3181724/3181724 [00:01<00:00, 1798285.45it/s]
Writing `edges` into `./out/optimized/BI_11/data_graph.txt`: 100%|██████████| 17263982/17263982 [00:18<00:00, 933585.94it/s] 


## Original


In [4]:
""" labels & edges """

edges = [(0, 1), (1, 2), (2, 0)] + [(0, 3), (1, 4), (2, 5)] + [(3, 6), (4, 6), (5, 6)]
labels_template = ["person"] * 3 + ["city"] * 3 + ["country"]
task_names = [["person^-city~country"]]

original_builder = QueryBuilder(
    edges=edges,
    labels=labels_template,
    raw_task_names=task_names,
    QG_PRE=BI_11_ORIGINAL_Q_PRE,
    LOG_PRE=BI_11_ORIGINAL_L_PRE,
    args_starting=bi_11_original_args_starting,
    kwargs={},
)

original_builder.build()

QueryBuilder {
    edges: [(0, 1), (1, 2), (2, 0), (0, 3), (1, 4), (2, 5), (3, 6), (4, 6), (5, 6)],
    labels: ['person', 'person', 'person', 'city', 'city', 'city', 'country'],
    raw_task_names: [['person^-city~country']],
    QG_PRE: ./out/original/BI_11,
    LOG_PRE: ./log/original/BI_11,
    args_starting: ['wsl', './VEQ_M_100k', '-dg', './out/original/data_graph.txt', '-qg'],
    replace_indices: [],
    replace_wrapper: <function QueryBuilder.<lambda> at 0x00000200C4E7D120>,
    kwargs: {'person^-city~country': QGMetaRecord(labels=['person', 'person', 'person', 'city', 'city', 'city', 'country'], edges=[(0, 1), (1, 2), (2, 0), (0, 3), (1, 4), (2, 5), (3, 6), (4, 6), (5, 6)])},
}

In [5]:
""" exec """

time_table, outer_time_table = original_builder.run_with_elapsed_time_table_ret()
assert len(time_table) == 1 and len(outer_time_table) == 1

>>> Running: person^-city~country...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_11/person^-city~country.txt
    Output file: 
    Sum of |C(u)|: 16553
    Total Recursive Call Count: 154296
    Number of Matches: 100000
    Filtering Time (ms): 101.666
    Verification Time (ms): 9227.58
    Processing Time (ms): 9329.24
<<< Done! (Outer Elapsed Time: 29680.0744 ms)


## Optimized


In [6]:
""" labels & edges """

edges_optimized = [(0, 1), (1, 2), (2, 0)] + [(0, 3), (1, 3), (2, 3)]
labels_template_optimized = ["person"] * 3 + ["country"]
task_names_optimized = [["person^~country"]]

optimized_builder = QueryBuilder(
    edges=edges_optimized,
    labels=labels_template_optimized,
    raw_task_names=task_names_optimized,
    QG_PRE=BI_11_OPTIMIZED_Q_PRE,
    LOG_PRE=BI_11_OPTIMIZED_L_PRE,
    args_starting=bi_11_optimized_args_starting,
    kwargs={},
)

optimized_builder.build()

QueryBuilder {
    edges: [(0, 1), (1, 2), (2, 0), (0, 3), (1, 3), (2, 3)],
    labels: ['person', 'person', 'person', 'country'],
    raw_task_names: [['person^~country']],
    QG_PRE: ./out/optimized/BI_11,
    LOG_PRE: ./log/optimized/BI_11,
    args_starting: ['wsl', './VEQ_M_100k', '-dg', './out/optimized/BI_11/data_graph.txt', '-qg'],
    replace_indices: [],
    replace_wrapper: <function QueryBuilder.<lambda> at 0x00000200C4E7D120>,
    kwargs: {'person^~country': QGMetaRecord(labels=['person', 'person', 'person', 'country'], edges=[(0, 1), (1, 2), (2, 0), (0, 3), (1, 3), (2, 3)])},
}

In [7]:
""" exec """

time_table_optimized, outer_time_table_optimized = (
    optimized_builder.run_with_elapsed_time_table_ret()
)
assert len(time_table_optimized) == 1 and len(outer_time_table_optimized) == 1

>>> Running: person^~country...
    Data file: ./out/optimized/BI_11/data_graph.txt
    Query file: ./out/optimized/BI_11/person^~country.txt
    Output file: 
    Sum of |C(u)|: 11928
    Total Recursive Call Count: 22687
    Number of Matches: 75570
    Filtering Time (ms): 77.824
    Verification Time (ms): 2642.24
    Processing Time (ms): 2720.06
<<< Done! (Outer Elapsed Time: 18133.2707 ms)


In [8]:
""" Show `comparison data-frame` """

print("Comparison between: `original_match` & `optimized_match`")

df = pl.DataFrame(
    {
        "task": task_names,
        "original (ms)": time_table,
        "optimized (ms)": time_table_optimized,
        "original outer (ms)": outer_time_table,
        "optimized outer (ms)": outer_time_table_optimized,
    }
)
df

Comparison between: `original_match` & `optimized_match`


task,original (ms),optimized (ms),original outer (ms),optimized outer (ms)
list[str],f64,f64,f64,f64
"[""person^-city~country""]",9329.24,2720.06,29680.0744,18133.2707
