# BI 3


In [1]:
from prelude import *

## Original


In [2]:
""" labels & edges """

edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)]
labels_template = [
    "country",
    "city",
    "person",
    "forum",
    "post",
    "message",  # could use "comment" / "post"
    "tag",
    "tagclass",
]


def wrapper(tagClass: str, country: str, msgType: str = "post") -> list[str]:
    return [BaseLabel.TagClass + tagClass, BaseLabel.Country + country, msgType]


task_names = [
    "Saint,India,post",
    "Saint,India,comment",
    "President,India,post",
    "President,India,comment",
    "Song,China,post",
    "Song,China,comment",
]

kwargs = {}
for task_name in task_names:
    tagClass, country, msgType = task_name.split(",")
    kwargs[task_name] = QGMetaRecord(
        replace(labels_template, [7, 0, 5], wrapper(tagClass, country, msgType)), edges
    )

In [3]:
""" Init Original Query Graph """

for qg_name, meta_record in kwargs.items():
    qg_path = f"{BI_3_ORIGINAL_Q_PRE}/{qg_name}.txt"
    labels = meta_record.labels
    edges = meta_record.edges
    qg_emitter(qg_path, labels, edges)

In [4]:
""" args """

args_list = [
    bi_3_original_args_starting + [f"{BI_3_ORIGINAL_Q_PRE}/{task_name}.txt"]
    for task_name in task_names
]

In [5]:
""" exec """

time_table = []

result_path_list = [
    f"{BI_3_ORIGINAL_L_PRE}/{task_name}.txt" for task_name in task_names
]

run_multiple_veq_m_100k(
    result_path_list,
    task_names,
    args_list,
    time_table,
)

>>> Running: Saint,India,post...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_3/Saint,India,post.txt
    Output file: 
    Total Recursive Call Count: 0
    Number of Matches: 0
    Filtering Time (ms): 28.6267
    Verification Time (ms): 0
    Processing Time (ms): 28.6267
<<< Done!
>>> Running: Saint,India,comment...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_3/Saint,India,comment.txt
    Output file: 
    Sum of |C(u)|: 36100
    Total Recursive Call Count: 178205
    Number of Matches: 100000
    Filtering Time (ms): 229.923
    Verification Time (ms): 26599.1
    Processing Time (ms): 26829.1
<<< Done!
>>> Running: President,India,post...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_3/President,India,post.txt
    Output file: 
    Total Recursive Call Count: 0
    Number of Matches: 0
    Filtering Time (ms): 24.6046
    Verification Time (ms): 0
    Processing Time (ms): 24.604

## Optimized


In [6]:
""" labels & edges """

edges_optimized = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]
labels_template_optimized = [
    "country",
    "forum",
    "post",
    "message",  # could use "comment" / "post"
    "tag",
    "tagclass",
]

kwargs_optimized = {}
for task_name in task_names:
    tagClass, country, msgType = task_name.split(",")
    kwargs_optimized[task_name] = QGMetaRecord(
        replace(
            labels_template_optimized, [5, 0, 3], wrapper(tagClass, country, msgType)
        ),
        edges_optimized,
    )

In [7]:
""" Init Original Query Graph """

for qg_name, meta_record in kwargs_optimized.items():
    qg_path = f"{BI_3_OPTIMIZED_Q_PRE}/{qg_name}.txt"
    labels = meta_record.labels
    edges = meta_record.edges
    qg_emitter(qg_path, labels, edges)

In [8]:
""" args """

args_list_optimized = [
    bi_3_optimized_args_starting + [f"{BI_3_OPTIMIZED_Q_PRE}/{task_name}.txt"]
    for task_name in task_names
]

In [9]:
""" exec """

time_table_optimized = []

result_path_list_optimized = [
    f"{BI_3_OPTIMIZED_L_PRE}/{task_name}.txt" for task_name in task_names
]

run_multiple_veq_m_100k(
    result_path_list_optimized,
    task_names,
    args_list_optimized,
    time_table_optimized,
)

File `./log/optimized/BI_3/Saint,India,post.txt` already exists
    last_line ~> Processing Time (ms): 9.53107

File `./log/optimized/BI_3/Saint,India,comment.txt` already exists
    last_line ~> Processing Time (ms): 875.86

File `./log/optimized/BI_3/President,India,post.txt` already exists
    last_line ~> Processing Time (ms): 9.28269

File `./log/optimized/BI_3/President,India,comment.txt` already exists
    last_line ~> Processing Time (ms): 913.997

File `./log/optimized/BI_3/Song,China,post.txt` already exists
    last_line ~> Processing Time (ms): 8.38721

File `./log/optimized/BI_3/Song,China,comment.txt` already exists
    last_line ~> Processing Time (ms): 1136.47



In [10]:
""" Show BI-3 `comparison data-frame` """

print("Comparison between: `original_match` & `optimized_match`")

df = pl.DataFrame(
    {
        "task": task_names,
        "original (ms)": time_table,
        "optimized (ms)": time_table_optimized,
    }
)
df

Comparison between: `original_match` & `optimized_match`


task,original (ms),optimized (ms)
str,f64,f64
"""Saint,India,po…",28.6267,9.53107
"""Saint,India,co…",26829.1,875.86
"""President,Indi…",24.6046,9.28269
"""President,Indi…",24232.4,913.997
"""Song,China,pos…",28.4468,8.38721
"""Song,China,com…",26986.0,1136.47
