# BI 3


In [1]:
from prelude import *

## Original


In [2]:
""" labels & edges """

edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)]
labels_template = [
    "country",
    "city",
    "person",
    "forum",
    "post",
    "message",  # could use "comment" / "post"
    "tag",
    "tagclass",
]


def wrapper(tagClass: str, country: str, msgType: str = "post") -> list[str]:
    return [BaseLabel.TagClass + tagClass, BaseLabel.Country + country, msgType]


task_names = [
    "Saint,India,post",
    "Saint,India,comment",
    "President,India,post",
    "President,India,comment",
    "Song,China,post",
    "Song,China,comment",
]

kwargs = {}
for task_name in task_names:
    tagClass, country, msgType = task_name.split(",")
    kwargs[task_name] = QGMetaRecord(
        replace(labels_template, [7, 0, 5], wrapper(tagClass, country, msgType)), edges
    )

In [3]:
""" Init Original Query Graph """

for qg_name, meta_record in kwargs.items():
    qg_path = f"{BI_3_ORIGINAL_Q_PRE}/{qg_name}.txt"
    labels = meta_record.labels
    edges = meta_record.edges
    qg_emitter(qg_path, labels, edges)

In [4]:
""" args """

args_list = [
    bi_3_original_args_starting + [f"{BI_3_ORIGINAL_Q_PRE}/{task_name}.txt"]
    for task_name in task_names
]

In [5]:
""" exec """

time_table = []

result_path_list = [
    f"{BI_3_ORIGINAL_L_PRE}/{task_name}.txt" for task_name in task_names
]

run_multiple_veq_m_100k(
    result_path_list,
    task_names,
    args_list,
    time_table,
)

>>> Running: Saint,India,post...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_3/Saint,India,post.txt
    Output file: 
    Total Recursive Call Count: 0
    Number of Matches: 0
    Filtering Time (ms): 29.4075
    Verification Time (ms): 0
    Processing Time (ms): 29.4075
<<< Done!
>>> Running: Saint,India,comment...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_3/Saint,India,comment.txt
    Output file: 
    Sum of |C(u)|: 36100
    Total Recursive Call Count: 178205
    Number of Matches: 100000
    Filtering Time (ms): 250.786
    Verification Time (ms): 31587
    Processing Time (ms): 31837.8
<<< Done!
>>> Running: President,India,post...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_3/President,India,post.txt
    Output file: 
    Total Recursive Call Count: 0
    Number of Matches: 0
    Filtering Time (ms): 32.7491
    Verification Time (ms): 0
    Processing Time (ms): 32.7491


## Optimized


In [6]:
""" labels & edges """

edges_optimized = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]
labels_template_optimized = [
    "country",
    "forum",
    "post",
    "message",  # could use "comment" / "post"
    "tag",
    "tagclass",
]

kwargs_optimized = {}
for task_name in task_names:
    tagClass, country, msgType = task_name.split(",")
    kwargs_optimized[task_name] = QGMetaRecord(
        replace(
            labels_template_optimized, [5, 0, 3], wrapper(tagClass, country, msgType)
        ),
        edges_optimized,
    )

In [7]:
""" Init Original Query Graph """

for qg_name, meta_record in kwargs_optimized.items():
    qg_path = f"{BI_3_OPTIMIZED_Q_PRE}/{qg_name}.txt"
    labels = meta_record.labels
    edges = meta_record.edges
    qg_emitter(qg_path, labels, edges)

In [8]:
""" args """

args_list_optimized = [
    bi_3_optimized_args_starting + [f"{BI_3_OPTIMIZED_Q_PRE}/{task_name}.txt"]
    for task_name in task_names
]

In [9]:
""" exec """

time_table_optimized = []

result_path_list_optimized = [
    f"{BI_3_OPTIMIZED_L_PRE}/{task_name}.txt" for task_name in task_names
]

run_multiple_veq_m_100k(
    result_path_list_optimized,
    task_names,
    args_list_optimized,
    time_table_optimized,
)

>>> Running: Saint,India,post...
    Data file: ./out/optimized/data_graph.txt
    Query file: ./out/optimized/BI_3/Saint,India,post.txt
    Output file: 
    Total Recursive Call Count: 0
    Number of Matches: 0
    Filtering Time (ms): 10.1837
    Verification Time (ms): 0
    Processing Time (ms): 10.1837
<<< Done!
>>> Running: Saint,India,comment...
    Data file: ./out/optimized/data_graph.txt
    Query file: ./out/optimized/BI_3/Saint,India,comment.txt
    Output file: 
    Sum of |C(u)|: 6219
    Total Recursive Call Count: 5740
    Number of Matches: 3077
    Filtering Time (ms): 76.7111
    Verification Time (ms): 877.163
    Processing Time (ms): 953.874
<<< Done!
>>> Running: President,India,post...
    Data file: ./out/optimized/data_graph.txt
    Query file: ./out/optimized/BI_3/President,India,post.txt
    Output file: 
    Total Recursive Call Count: 0
    Number of Matches: 0
    Filtering Time (ms): 10.9808
    Verification Time (ms): 0
    Processing Time (ms): 10.98

In [10]:
""" Show BI-3 `comparison data-frame` """

print("Comparison between: `original_match` & `optimized_match`")

df = pl.DataFrame(
    {
        "task": task_names,
        "original (ms)": time_table,
        "optimized (ms)": time_table_optimized,
    }
)
df

Comparison between: `original_match` & `optimized_match`


task,original (ms),optimized (ms)
str,f64,f64
"""Saint,India,po…",29.4075,10.1837
"""Saint,India,co…",31837.8,953.874
"""President,Indi…",32.7491,10.9808
"""President,Indi…",27402.3,1033.01
"""Song,China,pos…",27.2211,11.9063
"""Song,China,com…",29396.1,1359.44
