# BI 10


In [1]:
from prelude import *

## Original


In [2]:
""" labels & edges """

short_edges = [(0, 1), (1, 6), (6, 2), (2, 3), (2, 4), (4, 5)] + [
    (6, 7),
    (7, 8),
    (8, 9),
]
long_edges = short_edges + [(9, 10)]

long_labels_template = [
    "country",
    "city",
    "message",  # could use "comment" / "post"
    "tag",
    "tag",
    "tagclass",
] + ["person"] * 5
short_labels_template = [
    "country",
    "city",
    "message",  # could use "comment" / "post"
    "tag",
    "tag",
    "tagclass",
] + ["person"] * 4


def wrapper(
    personId: str, country: str, tagclass: str, msgType: str = "post"
) -> list[str]:
    return [
        BaseLabel.PersonId + personId,
        BaseLabel.Country + country,
        BaseLabel.TagClass + tagclass,
        msgType,
    ]


task_names = [
    # p1
    "4758,Finland,Criminal,4,post",
    "4758,Finland,Criminal,4,comment",
    "4758,Finland,Criminal,3,post",
    "4758,Finland,Criminal,3,comment",
    # p2
    "7861,Israel,SnookerChamp,4,post",
    "7861,Israel,SnookerChamp,4,comment",
    "7861,Israel,SnookerChamp,3,post",
    "7861,Israel,SnookerChamp,3,comment",
    # p3
    "2199023259494,Singapore,SnookerChamp,4,post",
    "2199023259494,Singapore,SnookerChamp,4,comment",
    "2199023259494,Singapore,SnookerChamp,3,post",
    "2199023259494,Singapore,SnookerChamp,3,comment",
]

kwargs = {}
for task_name in task_names:
    personId, country, tagclass, length, msgType = task_name.split(",")
    kwargs[task_name] = QGMetaRecord(
        replace(
            (long_labels_template if length == "4" else short_labels_template),
            [10, 0, 5, 2] if length == "4" else [9, 0, 5, 2],
            wrapper(personId, country, tagclass, msgType),
        ),
        long_edges if length == "4" else short_edges,
    )

In [3]:
""" Init Original Query Graph """

for qg_name, meta_record in kwargs.items():
    qg_path = f"{BI_10_ORIGINAL_Q_PRE}/{qg_name}.txt"
    labels = meta_record.labels
    edges = meta_record.edges
    qg_emitter(qg_path, labels, edges)

In [4]:
""" args """

args_list = [
    bi_10_original_args_starting + [f"{BI_10_ORIGINAL_Q_PRE}/{task_name}.txt"]
    for task_name in task_names
]

In [5]:
""" exec """

time_table = []

result_path_list = [
    f"{BI_10_ORIGINAL_L_PRE}/{task_name}.txt" for task_name in task_names
]

run_multiple_veq_m_100k(
    result_path_list,
    task_names,
    args_list,
    time_table,
)

>>> Running: 98214,Tunisia,NascarDriver,4,post...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_10/98214,Tunisia,NascarDriver,4,post.txt
    Output file: 
    Sum of |C(u)|: 25839
    Total Recursive Call Count: 11
    Number of Matches: 100004
    Filtering Time (ms): 176.97
    Verification Time (ms): 498.863
    Processing Time (ms): 675.833
<<< Done!
>>> Running: 98214,Tunisia,NascarDriver,4,comment...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_10/98214,Tunisia,NascarDriver,4,comment.txt
    Output file: 
    Total Recursive Call Count: 0
    Number of Matches: 0
    Filtering Time (ms): 6.11001
    Verification Time (ms): 0
    Processing Time (ms): 6.11001
<<< Done!
>>> Running: 98214,Tunisia,NascarDriver,3,post...
    Data file: ./out/original/data_graph.txt
    Query file: ./out/original/BI_10/98214,Tunisia,NascarDriver,3,post.txt
    Output file: 
    Sum of |C(u)|: 17029
    Total Recursive Call Count: 18
  

## Optimized


In [6]:
""" labels & edges """

short_edges_optimized = [(0, 5), (5, 1), (1, 2), (1, 3), (3, 4)] + [
    (5, 6),
    (6, 7),
    (7, 8),
]
long_edges_optimized = short_edges_optimized + [(8, 9)]

long_labels_template_optimized = [
    "country",
    "message",  # could use "comment" / "post"
    "tag",
    "tag",
    "tagclass",
] + ["person"] * 5
short_labels_template_optimized = [
    "country",
    "message",  # could use "comment" / "post"
    "tag",
    "tag",
    "tagclass",
] + ["person"] * 4

task_names_optimized = task_names

kwargs_optimized = {}
for task_name in task_names_optimized:
    personId, country, tagclass, length, msgType = task_name.split(",")
    kwargs_optimized[task_name] = QGMetaRecord(
        replace(
            (
                long_labels_template_optimized
                if length == "4"
                else short_labels_template_optimized
            ),
            [9, 0, 1, 4] if length == "4" else [8, 0, 1, 4],
            wrapper(personId, country, tagclass, msgType),
        ),
        long_edges_optimized if length == "4" else short_edges_optimized,
    )

In [7]:
""" Init Optimized Query Graph """

for qg_name, meta_record in kwargs.items():
    qg_path = f"{BI_10_OPTIMIZED_Q_PRE}/{qg_name}.txt"
    labels = meta_record.labels
    edges = meta_record.edges
    qg_emitter(qg_path, labels, edges)

In [8]:
""" args """

args_list_optimized = [
    bi_10_optimized_args_starting + [f"{BI_10_OPTIMIZED_Q_PRE}/{task_name}.txt"]
    for task_name in task_names_optimized
]

In [9]:
""" exec """

time_table_optimized = []

result_path_list_optimized = [
    f"{BI_10_OPTIMIZED_L_PRE}/{task_name}.txt" for task_name in task_names_optimized
]

run_multiple_veq_m_100k(
    result_path_list_optimized,
    task_names_optimized,
    args_list_optimized,
    time_table_optimized,
)

>>> Running: 98214,Tunisia,NascarDriver,4,post...
    Data file: ./out/optimized/data_graph.txt
    Query file: ./out/optimized/BI_10/98214,Tunisia,NascarDriver,4,post.txt
    Output file: 
    Sum of |C(u)|: 25839
    Total Recursive Call Count: 10
    Number of Matches: 101052
    Filtering Time (ms): 142.314
    Verification Time (ms): 72.3893
    Processing Time (ms): 214.703
<<< Done!
>>> Running: 98214,Tunisia,NascarDriver,4,comment...
    Data file: ./out/optimized/data_graph.txt
    Query file: ./out/optimized/BI_10/98214,Tunisia,NascarDriver,4,comment.txt
    Output file: 
    Total Recursive Call Count: 0
    Number of Matches: 0
    Filtering Time (ms): 8.04338
    Verification Time (ms): 0
    Processing Time (ms): 8.04338
<<< Done!
>>> Running: 98214,Tunisia,NascarDriver,3,post...
    Data file: ./out/optimized/data_graph.txt
    Query file: ./out/optimized/BI_10/98214,Tunisia,NascarDriver,3,post.txt
    Output file: 
    Sum of |C(u)|: 17029
    Total Recursive Call Count

In [10]:
""" Show BI-10 `comparison data-frame` """

print("Comparison between: `original_match` & `optimized_match`")

df = pl.DataFrame(
    {
        "task": task_names,
        "original (ms)": time_table,
        "optimized (ms)": time_table_optimized,
    }
)
df

Comparison between: `original_match` & `optimized_match`


task,original (ms),optimized (ms)
str,f64,f64
"""98214,Tunisia,…",675.833,214.703
"""98214,Tunisia,…",6.11001,8.04338
"""98214,Tunisia,…",350.955,201.395
"""98214,Tunisia,…",6.56101,7.67404
"""4886,Cuba,Thin…",414.601,472.007
"""4886,Cuba,Thin…",247.138,249.824
"""4886,Cuba,Thin…",482.073,518.133
"""4886,Cuba,Thin…",326.622,351.691
"""60769,France,P…",376.415,309.841
"""60769,France,P…",326.788,280.011
