# BI 3


In [None]:
from prelude import *

## Original


In [None]:
from dataclasses import dataclass

BI_3_ORIGINAL_Q_PRE = f"{ORIGINAL_QUERY_PREFIX}/BI_3"
if not os.path.exists(BI_3_ORIGINAL_Q_PRE):
    os.makedirs(BI_3_ORIGINAL_Q_PRE)

BI_3_OPTIMIZED_Q_PRE = f"{OPTIMIZED_QUERY_PREFIX}/BI_3"
if not os.path.exists(BI_3_OPTIMIZED_Q_PRE):
    os.makedirs(BI_3_OPTIMIZED_Q_PRE)


@dataclass
class OriginalBI3:
    dirname = "BI_3"
    labels = [
        BaseLabel.Country + "China",
        "city",
        "person",
        "forum",
        "post",
        "comment",
        "tag",
        "tagclass",
    ]
    edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5), (5, 6), (6, 7)]
    task_name = "original_china_bi3"
    query_graph_name = "original_china_bi3_query_graph.txt"
    log_name = "original_china_bi3_result.txt"
    time_table = list[float]()
    args = wsl_if_on_windows + [
        "./VEQ_M_100k",
        "-dg",
        BI_11_DG,
        "-qg",
        f"{BI_3_ORIGINAL_Q_PRE}/{query_graph_name}",
    ]

    def build_query_graph(self):
        query_prefix = BI_3_ORIGINAL_Q_PRE
        if not os.path.exists(f"{query_prefix}/{self.query_graph_name}"):
            with open(f"{query_prefix}/{self.query_graph_name}", "w") as f:
                f.write("#0\n")
                f.write(f"{len(self.labels)}\n")
                [f.write(f"{label}\n") for label in self.labels]
                f.write(f"{len(self.edges)}\n")
                [f.write(f"{src} {dst}\n") for src, dst in self.edges]

    def run_query(self):
        log_prefix = f"{ORIGINAL_LOG_PREFIX}/{self.dirname}"
        if not os.path.exists(log_prefix):
            os.makedirs(log_prefix)
        run_veq_m_100k(
            f"{log_prefix}/{self.log_name}",
            self.task_name,
            self.args,
            self.time_table,
        )


original_query_proc = OriginalBI3()

In [None]:
""" Build `query graph` """

original_query_proc.build_query_graph()

In [None]:
""" Run query """

original_query_proc.run_query()

>>> Running: original_china_bi3...
    Data file: ./out/original/BI_11/data_graph.txt
    Query file: ./out/original/BI_3/original_china_bi3_query_graph.txt
    Output file: 
    Sum of |C(u)|: 488316
    Total Recursive Call Count: 40584
    Number of Matches: 100000
    Filtering Time (ms): 854.313
    Verification Time (ms): 25661.9
    Processing Time (ms): 26516.2
<<< Done!


## Optimized


In [None]:
@dataclass
class OptimizedBI3:
    dirname = "BI_3"
    labels = [
        BaseLabel.Country + "China",
        "forum",
        "post",
        "comment",
        "tag",
        "tagclass",
    ]
    edges = [(0, 1), (1, 2), (2, 3), (3, 4), (4, 5)]
    task_name = "optimized_china_bi3"
    query_graph_name = "optimized_china_bi3_query_graph.txt"
    log_name = "optimized_china_bi3_result.txt"
    time_table = list[float]()
    args = wsl_if_on_windows + [
        "./VEQ_M_100k",
        "-dg",
        BI_11_DG_OPTIMIZED,
        "-qg",
        f"{BI_3_OPTIMIZED_Q_PRE}/{query_graph_name}",
    ]

    def build_query_graph(self):
        query_prefix = BI_3_OPTIMIZED_Q_PRE
        if not os.path.exists(f"{query_prefix}/{self.query_graph_name}"):
            with open(f"{query_prefix}/{self.query_graph_name}", "w") as f:
                f.write("#0\n")
                f.write(f"{len(self.labels)}\n")
                [f.write(f"{label}\n") for label in self.labels]
                f.write(f"{len(self.edges)}\n")
                [f.write(f"{src} {dst}\n") for src, dst in self.edges]

    def run_query(self):
        log_prefix = f"{OPTIMIZED_LOG_PREFIX}/{self.dirname}"
        if not os.path.exists(log_prefix):
            os.makedirs(log_prefix)
        run_veq_m_100k(
            f"{log_prefix}/{self.log_name}",
            self.task_name,
            self.args,
            self.time_table,
        )


optimized_query_proc = OptimizedBI3()

In [None]:
""" Build `query graph` """

optimized_query_proc.build_query_graph()

In [None]:
""" Run query """

optimized_query_proc.run_query()

>>> Running: optimized_china_bi3...
    Data file: ./out/optimized/BI_11/data_graph.txt
    Query file: ./out/optimized/BI_3/optimized_china_bi3_query_graph.txt
    Output file: 
    Sum of |C(u)|: 125935
    Total Recursive Call Count: 47581
    Number of Matches: 100000
    Filtering Time (ms): 309.348
    Verification Time (ms): 24971.3
    Processing Time (ms): 25280.6
<<< Done!


In [None]:
""" Show BI-3 `comparison data-frame` """

df = pl.DataFrame(
    {
        "task": ["china_bi3"],
        "original (ms)": original_query_proc.time_table,
        "optimized (ms)": optimized_query_proc.time_table,
    }
)
df

task,original (ms),optimized (ms)
str,f64,f64
"""china_bi3""",26516.2,25280.6
