In [22]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import subprocess
import re


In [None]:
path = "midsummer-nights-dream.txt"
params = [16, 32, 64, 128, 256, 512]
num_executions = 10000

results = {param: {"REC": [], "HLL": [], "PCSA": []} for param in params}


for param in params:
    for seed in range(1, num_executions + 1):
        # Run the command with subprocess
        p = subprocess.run(["./Cardinality", "-f", path, "-p", str(param), "--seed", str(seed)], capture_output=True, text=True)

        if p.returncode == 0:
            for line in p.stdout.splitlines():
                if line.startswith("Recordinality"):
                    results[param]["REC"].append(float(line.split(": ")[1]))
                elif line.startswith("HyperLogLog"):
                    results[param]["HLL"].append(float(line.split(": ")[1]))
                elif line.startswith("Probabilistic Counting"):
                    results[param]["PCSA"].append(float(line.split(": ")[1]))
        else:
            print(f"Error with seed {seed} and param {param}: {p.stderr}")


In [27]:
averages = {}
for param in params:
    averages[param] = {
        algo: np.mean(scores) for algo, scores in results[param].items()
    }

latex_output = ""
with open("datasets/" + path.split('.')[0] + ".dat", 'r') as file:
    lines = file.readlines()
    line_count = len(lines)
    latex_output = ""

    for param, scores in averages.items():
        row = f"\\textbf{{{param}}}"
        for algo, avg in scores.items():
            error = np.std(results[param][algo], ddof=0) / line_count
            # error = abs(avg - line_count) / line_count * 100
            row += f" & {avg:.0f} & {error:.2f}\%"
        latex_output += row + " \\\\ \\hline\n"

print(latex_output)

\textbf{16} & 3146 & 0.55\% & 3141 & 0.27\% & 3207 & 0.20\% \\ \hline
\textbf{32} & 3132 & 0.34\% & 3135 & 0.19\% & 3179 & 0.14\% \\ \hline
\textbf{64} & 3162 & 0.22\% & 3146 & 0.13\% & 3161 & 0.10\% \\ \hline
\textbf{128} & 3163 & 0.13\% & 3152 & 0.09\% & 3151 & 0.07\% \\ \hline
\textbf{256} & 3140 & 0.08\% & 3145 & 0.06\% & 3143 & 0.04\% \\ \hline
\textbf{512} & 3138 & 0.04\% & 3141 & 0.04\% & 3158 & 0.03\% \\ \hline

