# BI 10


In [None]:
from prelude import *

## Original


In [None]:
""" labels & edges """

from dataclasses import dataclass


@dataclass
class BI_10_Record:
    labels: list[str]
    edges: list[tuple[int, int]]


short_edges = [(0, 1), (1, 6), (6, 2), (2, 3), (2, 4), (4, 5)] + [
    (6, 7),
    (7, 8),
    (8, 9),
]
long_edges = short_edges + [(9, 10)]

long_labels_template = [
    "country",
    "city",
    "message",  # could use "comment" / "post"
    "tag",
    "tag",
    "tagclass",
] + ["person"] * 5
short_labels_template = [
    "country",
    "city",
    "message",  # could use "comment" / "post"
    "tag",
    "tag",
    "tagclass",
] + ["person"] * 4


def wrapper(
    personId: str, country: str, tagclass: str, msgType: str = "post"
) -> list[str]:
    return [
        BaseLabel.PersonId + personId,
        BaseLabel.Country + country,
        BaseLabel.TagClass + tagclass,
        msgType,
    ]


task_names = [
    "98214,Tunisia,NascarDriver,4,post",
    "98214,Tunisia,NascarDriver,4,comment",
    "98214,Tunisia,NascarDriver,3,post",
    "98214,Tunisia,NascarDriver,3,comment",
    "4886,Cuba,Thing,4,post",
    "4886,Cuba,Thing,4,comment",
    "4886,Cuba,Thing,3,post",
    "4886,Cuba,Thing,3,comment",
    "60769,France,Politician,4,post",
    "60769,France,Politician,4,comment",
    "60769,France,Politician,3,post",
    "60769,France,Politician,3,comment",
]

kwargs = {}
for task_name in task_names:
    personId, country, tagclass, length, msgType = task_name.split(",")
    kwargs[task_name] = BI_10_Record(
        replace(
            (long_labels_template if length == "4" else short_labels_template),
            [10, 0, 5, 2] if length == "4" else [9, 0, 5, 2],
            wrapper(personId, country, tagclass, msgType),
        ),
        long_edges if length == "4" else short_edges,
    )

In [None]:
""" Init Original Query Graph """

for qg_name, meta_record in kwargs.items():
    qg_path = f"{BI_10_ORIGINAL_Q_PRE}/{qg_name}.txt"
    labels = meta_record.labels
    edges = meta_record.edges
    qg_emitter(qg_path, labels, edges)

In [None]:
""" args """

args_list = [
    original_args_starting + [f"{BI_10_ORIGINAL_Q_PRE}/{task_name}.txt"]
    for task_name in task_names
]

In [None]:
""" exec """

time_table = []

result_path_list = [
    f"{BI_10_ORIGINAL_L_PRE}/{task_name}.txt" for task_name in task_names
]

run_multiple_veq_m_100k(
    result_path_list,
    task_names,
    args_list,
    time_table,
)

## Optimized


In [None]:
""" labels & edges """

short_edges_optimized = [(0, 5), (5, 1), (1, 2), (1, 3), (3, 4)] + [
    (5, 6),
    (6, 7),
    (7, 8),
]
long_edges_optimized = short_edges_optimized + [(8, 9)]

long_labels_template_optimized = [
    "country",
    "message",  # could use "comment" / "post"
    "tag",
    "tag",
    "tagclass",
] + ["person"] * 5
short_labels_template_optimized = [
    "country",
    "message",  # could use "comment" / "post"
    "tag",
    "tag",
    "tagclass",
] + ["person"] * 4

task_names_optimized = task_names

kwargs_optimized = {}
for task_name in task_names_optimized:
    personId, country, tagclass, length, msgType = task_name.split(",")
    kwargs_optimized[task_name] = BI_10_Record(
        replace(
            (
                long_labels_template_optimized
                if length == "4"
                else short_labels_template_optimized
            ),
            [9, 0, 1, 4] if length == "4" else [8, 0, 1, 4],
            wrapper(personId, country, tagclass, msgType),
        ),
        long_edges_optimized if length == "4" else short_edges_optimized,
    )

In [None]:
""" Init Optimized Query Graph """

for qg_name, meta_record in kwargs.items():
    qg_path = f"{BI_10_OPTIMIZED_Q_PRE}/{qg_name}.txt"
    labels = meta_record.labels
    edges = meta_record.edges
    qg_emitter(qg_path, labels, edges)

In [None]:
""" args """

args_list_optimized = [
    optimized_args_starting + [f"{BI_10_OPTIMIZED_Q_PRE}/{task_name}.txt"]
    for task_name in task_names_optimized
]

In [None]:
""" exec """

time_table_optimized = []

result_path_list_optimized = [
    f"{BI_10_OPTIMIZED_L_PRE}/{task_name}.txt" for task_name in task_names_optimized
]

run_multiple_veq_m_100k(
    result_path_list_optimized,
    task_names_optimized,
    args_list_optimized,
    time_table_optimized,
)

In [None]:
""" Show BI-10 `comparison data-frame` """

print("Comparison between: `original_match` & `optimized_match`")

df = pl.DataFrame(
    {
        "task": task_names,
        "original (ms)": time_table,
        "optimized (ms)": time_table_optimized,
    }
)
df