# BI 14


In [1]:
from dg_builder import *

In [2]:
""" build `original data graph` """

build_original_dg(
    original_dg_filepath=BI_14_DG, optimized_dg_filepath=BI_14_DG_OPTIMIZED
)

File `./out/optimized/BI_14/data_graph.txt` & `./out/original/data_graph.txt` already exists


In [3]:
""" build `optimized data graph` """

index_csv_filenames = [
    "person_city_country",
    "comment_comment_person",
    "comment_post_person",
]

build_optimized_dg(
    optimized_dg_filepath=BI_14_DG_OPTIMIZED,
    index_csv_filenames=index_csv_filenames,
)

File `./out/optimized/BI_14/data_graph.txt` already exists


## Original


In [4]:
edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]
labels_template = ["country", "city", "person", "person", "city", "country"]
task_names = [["sub_task_1"]]

original_builder = QueryBuilder(
    edges=edges,
    labels=labels_template,
    raw_task_names=task_names,
    QG_PRE=BI_14_ORIGINAL_Q_PRE,
    LOG_PRE=BI_14_ORIGINAL_L_PRE,
    args_starting=bi_14_original_args_starting,
    kwargs={},
)

original_builder.build()

QueryBuilder {
    edges: [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)],
    labels: ['country', 'city', 'person', 'person', 'city', 'country'],
    raw_task_names: [['sub_task_1']],
    QG_PRE: ./out/original/BI_14,
    LOG_PRE: ./log/original/BI_14,
    args_starting: ['wsl', './VEQ_M_100k', '-dg', './out/original/data_graph.txt', '-qg'],
    replace_indices: [],
    replace_wrapper: <function QueryBuilder.<lambda> at 0x000001777D1FD120>,
    kwargs: {'sub_task_1': QGMetaRecord(labels=['country', 'city', 'person', 'person', 'city', 'country'], edges=[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])},
}

In [5]:
edges2 = [(0, 1), (1, 2), (2, 3)]
labels_template2 = ["person", "comment", "message", "person"]
task_names2 = [["comment"], ["post"]]

original_builder2 = QueryBuilder(
    edges=edges2,
    labels=labels_template2,
    raw_task_names=task_names2,
    QG_PRE=BI_14_ORIGINAL_Q_PRE,
    LOG_PRE=BI_14_ORIGINAL_L_PRE,
    args_starting=bi_14_original_args_starting,
    kwargs={},
)

original_builder2.with_replace_indices([2]).build()

QueryBuilder {
    edges: [(0, 1), (1, 2), (2, 3)],
    labels: ['person', 'comment', 'message', 'person'],
    raw_task_names: [['comment'], ['post']],
    QG_PRE: ./out/original/BI_14,
    LOG_PRE: ./log/original/BI_14,
    args_starting: ['wsl', './VEQ_M_100k', '-dg', './out/original/data_graph.txt', '-qg'],
    replace_indices: [2],
    replace_wrapper: <function QueryBuilder.<lambda> at 0x000001777D1FD120>,
    kwargs: {'comment': QGMetaRecord(labels=['person', 'comment', 'comment', 'person'], edges=[(0, 1), (1, 2), (2, 3)]), 'post': QGMetaRecord(labels=['person', 'comment', 'post', 'person'], edges=[(0, 1), (1, 2), (2, 3)])},
}

In [6]:
""" exec """

time_table, outer_time_table = original_builder.run_with_elapsed_time_table_ret()
assert len(time_table) == 1 and len(outer_time_table) == 1

time_table2, outer_time_table2 = original_builder2.run_with_elapsed_time_table_ret()
assert len(time_table2) == 2 and len(outer_time_table2) == 2

merged_time_table2 = [time_table2[0] + time_table2[1]]
merged_outer_time_table2 = [outer_time_table2[0] + outer_time_table2[1]]

File `./log/original/BI_14/sub_task_1.txt` already exists
    lines[-2] ~> Processing Time (ms): 26142.1
    lines[-1] ~> Outer Elapsed Time (ms): 47484.3858

File `./log/original/BI_14/comment.txt` already exists
    lines[-2] ~> Processing Time (ms): 19132.3
    lines[-1] ~> Outer Elapsed Time (ms): 32394.6119

File `./log/original/BI_14/post.txt` already exists
    lines[-2] ~> Processing Time (ms): 8891.63
    lines[-1] ~> Outer Elapsed Time (ms): 22713.3127



## Optimized


In [7]:
""" labels & edges """

edges_optimized: list[tuple[int, int]] = [(0, 1), (1, 2), (2, 3)]
labels_template_optimized = ["country", "person", "person", "country"]
task_names_optimized = task_names

optimized_builder = QueryBuilder(
    edges=edges_optimized,
    labels=labels_template_optimized,
    raw_task_names=task_names_optimized,
    QG_PRE=BI_14_OPTIMIZED_Q_PRE,
    LOG_PRE=BI_14_OPTIMIZED_L_PRE,
    args_starting=bi_14_optimized_args_starting,
    kwargs={},
)

optimized_builder.build()

QueryBuilder {
    edges: [(0, 1), (1, 2), (2, 3)],
    labels: ['country', 'person', 'person', 'country'],
    raw_task_names: [['sub_task_1']],
    QG_PRE: ./out/optimized/BI_14,
    LOG_PRE: ./log/optimized/BI_14,
    args_starting: ['wsl', './VEQ_M_100k', '-dg', './out/optimized/BI_14/data_graph.txt', '-qg'],
    replace_indices: [],
    replace_wrapper: <function QueryBuilder.<lambda> at 0x000001777D1FD120>,
    kwargs: {'sub_task_1': QGMetaRecord(labels=['country', 'person', 'person', 'country'], edges=[(0, 1), (1, 2), (2, 3)])},
}

In [8]:
edges2_optimized: list[tuple[int, int]] = [(0, 1), (1, 2)]
labels_template2_optimized = ["person", "comment", "person"]
task_names2_optimized = [["comment_and_post"]]
merged_task_names2 = task_names2_optimized

optimized_builder2 = QueryBuilder(
    edges=edges2_optimized,
    labels=labels_template2_optimized,
    raw_task_names=task_names2_optimized,
    QG_PRE=BI_14_OPTIMIZED_Q_PRE,
    LOG_PRE=BI_14_OPTIMIZED_L_PRE,
    args_starting=bi_14_optimized_args_starting,
    kwargs={},
)

optimized_builder2.build()

QueryBuilder {
    edges: [(0, 1), (1, 2)],
    labels: ['person', 'comment', 'person'],
    raw_task_names: [['comment_and_post']],
    QG_PRE: ./out/optimized/BI_14,
    LOG_PRE: ./log/optimized/BI_14,
    args_starting: ['wsl', './VEQ_M_100k', '-dg', './out/optimized/BI_14/data_graph.txt', '-qg'],
    replace_indices: [],
    replace_wrapper: <function QueryBuilder.<lambda> at 0x000001777D1FD120>,
    kwargs: {'comment_and_post': QGMetaRecord(labels=['person', 'comment', 'person'], edges=[(0, 1), (1, 2)])},
}

In [9]:
""" exec """

time_table_optimized, outer_time_table_optimized = (
    optimized_builder.run_with_elapsed_time_table_ret()
)
assert len(time_table_optimized) == 1 and len(outer_time_table_optimized) == 1

time_table2_optimized, outer_time_table2_optimized = (
    optimized_builder2.run_with_elapsed_time_table_ret()
)
assert len(time_table2_optimized) == 1 and len(outer_time_table2_optimized) == 1

File `./log/optimized/BI_14/sub_task_1.txt` already exists
    lines[-2] ~> Processing Time (ms): 18691.6
    lines[-1] ~> Outer Elapsed Time (ms): 33470.9367

>>> Running: comment_and_post...


    Data file: ./out/optimized/BI_14/data_graph.txt
    Query file: ./out/optimized/BI_14/comment_and_post.txt
    Output file: 
    Sum of |C(u)|: 2012832
    Total Recursive Call Count: 120
    Number of Matches: 100172
    Filtering Time (ms): 5777.89
    Verification Time (ms): 206.808
    Processing Time (ms): 5984.7
<<< Done! (Outer Elapsed Time: 24364.6866 ms)


In [11]:
""" Show `comparison data-frame` """

print("Comparison between: `original_match` & `optimized_match`")

df = pl.DataFrame(
    {
        "task": task_names + merged_task_names2,
        "original (ms)": time_table + merged_time_table2,
        "optimized (ms)": time_table_optimized + time_table2_optimized,
        "original outer (ms)": outer_time_table + merged_outer_time_table2,
        "optimized outer (ms)": outer_time_table_optimized
        + outer_time_table2_optimized,
    }
)
df

Comparison between: `original_match` & `optimized_match`


task,original (ms),optimized (ms),original outer (ms),optimized outer (ms)
list[str],f64,f64,f64,f64
"[""sub_task_1""]",26142.1,18691.6,47484.3858,33470.9367
"[""comment_and_post""]",28023.93,5984.7,55107.9246,24364.6866
