# BI 3


In [1]:
from dg_builder import *

In [2]:
""" build `original data graph` """

build_original_dg(original_dg_filepath=BI_3_DG, optimized_dg_filepath=BI_3_DG_OPTIMIZED)

File `./out/optimized/BI_3/data_graph.txt` & `./out/original/data_graph.txt` already exists


In [3]:
""" build `optimized data graph` """

index_csv_filenames = ["forum_person_city_country"]

build_optimized_dg(
    optimized_dg_filepath=BI_3_DG_OPTIMIZED,
    index_csv_filenames=index_csv_filenames,
)

File `./out/optimized/BI_3/data_graph.txt` already exists


## Original


In [4]:
edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)]
labels_template = [
    "country",
    "city",
    "person",
    "forum",
    "post",
    "comment",
    "tag",
    "tagclass",
]
task_names = [["bi_3_query"]]

original_builder = QueryBuilder(
    edges=edges,
    labels=labels_template,
    raw_task_names=task_names,
    QG_PRE=BI_3_ORIGINAL_Q_PRE,
    LOG_PRE=BI_3_ORIGINAL_L_PRE,
    args_starting=bi_3_original_args_starting,
    kwargs={},
)

original_builder.build()

QueryBuilder {
    edges: [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)],
    labels: ['country', 'city', 'person', 'forum', 'post', 'comment', 'tag', 'tagclass'],
    raw_task_names: [['bi_3_query']],
    QG_PRE: ./out/original/BI_3,
    LOG_PRE: ./log/original/BI_3,
    args_starting: ['wsl', './VEQ_M_100k', '-dg', './out/original/data_graph.txt', '-qg'],
    replace_indices: [],
    replace_wrapper: <function QueryBuilder.<lambda> at 0x0000021B4C931120>,
    kwargs: {'bi_3_query': QGMetaRecord(labels=['country', 'city', 'person', 'forum', 'post', 'comment', 'tag', 'tagclass'], edges=[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)])},
}

In [5]:
""" exec """

time_table, outer_time_table = original_builder.run_with_elapsed_time_table_ret()
assert len(time_table) == 1 and len(outer_time_table) == 1

>>> Running: bi_3_query...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_3/bi_3_query.txt
    Output file: 
    Sum of |C(u)|: 536384
    Total Recursive Call Count: 103854
    Number of Matches: 100000
    Filtering Time (ms): 2205
    Verification Time (ms): 28173.5
    Processing Time (ms): 30378.5
<<< Done! (Outer Elapsed Time: 49121.8554 ms)


## Optimized


In [6]:
""" labels & edges """

edges_optimized = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]
labels_template_optimized = [
    "country",
    "forum",
    "post",
    "comment",
    "tag",
    "tagclass",
]

optimized_builder = QueryBuilder(
    edges=edges_optimized,
    labels=labels_template_optimized,
    raw_task_names=task_names,
    QG_PRE=BI_3_OPTIMIZED_Q_PRE,
    LOG_PRE=BI_3_OPTIMIZED_L_PRE,
    args_starting=bi_3_optimized_args_starting,
    kwargs={},
)

optimized_builder.build()

QueryBuilder {
    edges: [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)],
    labels: ['country', 'forum', 'post', 'comment', 'tag', 'tagclass'],
    raw_task_names: [['bi_3_query']],
    QG_PRE: ./out/optimized/BI_3,
    LOG_PRE: ./log/optimized/BI_3,
    args_starting: ['wsl', './VEQ_M_100k', '-dg', './out/optimized/BI_3/data_graph.txt', '-qg'],
    replace_indices: [],
    replace_wrapper: <function QueryBuilder.<lambda> at 0x0000021B4C931120>,
    kwargs: {'bi_3_query': QGMetaRecord(labels=['country', 'forum', 'post', 'comment', 'tag', 'tagclass'], edges=[(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)])},
}

In [7]:
""" exec """

time_table_optimized, outer_time_table_optimized = (
    optimized_builder.run_with_elapsed_time_table_ret()
)
assert len(time_table_optimized) == 1 and len(outer_time_table_optimized) == 1

>>> Running: bi_3_query...
    Data file: ./out/optimized/BI_3/data_graph.txt
    Query file: ./out/optimized/BI_3/bi_3_query.txt
    Output file: 
    Sum of |C(u)|: 318926
    Total Recursive Call Count: 200494
    Number of Matches: 100000
    Filtering Time (ms): 1434.08
    Verification Time (ms): 32297.5
    Processing Time (ms): 33731.5
<<< Done! (Outer Elapsed Time: 51257.1005 ms)


In [8]:
""" Show `comparison data-frame` """

print("Comparison between: `original_match` & `optimized_match`")

df = pl.DataFrame(
    {
        "task": task_names,
        "original (ms)": time_table,
        "optimized (ms)": time_table_optimized,
        "original outer (ms)": outer_time_table,
        "optimized outer (ms)": outer_time_table_optimized,
    }
)
df

Comparison between: `original_match` & `optimized_match`


task,original (ms),optimized (ms),original outer (ms),optimized outer (ms)
list[str],f64,f64,f64,f64
"[""bi_3_query""]",30378.5,33731.5,49121.8554,51257.1005
