In [1]:
%pip install pandas


Collecting pandas
  Using cached pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
Collecting numpy>=1.26.0 (from pandas)
  Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (62 kB)
Collecting pytz>=2020.1 (from pandas)
  Using cached pytz-2025.2-py2.py3-none-any.whl.metadata (22 kB)
Collecting tzdata>=2022.7 (from pandas)
  Using cached tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Using cached pandas-2.3.3-cp312-cp312-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.4 MB)
Using cached numpy-2.3.5-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl (16.6 MB)
Using cached pytz-2025.2-py2.py3-none-any.whl (509 kB)
Using cached tzdata-2025.2-py2.py3-none-any.whl (347 kB)
Installing collected packages: pytz, tzdata, numpy, pandas
Successfully installed numpy-2.3.5 pandas-2.3.3 pytz-2025.2 tzdata-2025.2
Note: you may need to restart the kernel to use updated packages.


In [None]:
import sys
sys.executable


In [3]:
import pandas as pd
import re
from pathlib import Path

base = Path("results")
custom_file = base / "producer_custom.txt"
native_file = base / "producer_native.txt"

def parse_custom(path):
    text = path.read_text()
    m = re.findall(r"Sent\s+(\d+)\s+messages\s+in\s+(\d+)s", text)
    if not m:
        raise ValueError("Não encontrei linha 'Sent ... messages in ...s' no custom")
    num, secs = map(int, m[-1])
    thr = num / secs if secs > 0 else float("nan")
    return num, secs, thr

def parse_native(path):
    text = path.read_text()
    m = re.findall(r"(\d+)\s+records sent.*?([\d\.]+)\s+records/sec", text)
    if not m:
        raise ValueError("Não encontrei linha 'records sent' no native")
    num = int(m[-1][0])
    thr = float(m[-1][1])
    secs = num / thr if thr > 0 else float("nan")
    return num, secs, thr

# parse dos arquivos
num_c, t_c, thr_c = parse_custom(custom_file)
num_n, t_n, thr_n = parse_native(native_file)

# monta DataFrame com os resultados
df = pd.DataFrame([
    {
        "Abordagem": "Script custom (console-producer)",
        "# Mensagens": num_c,
        "Tempo (s)": round(t_c, 2),
        "Throughput (msg/s)": round(thr_c, 2),
    },
    {
        "Abordagem": "Ferramenta nativa (perf-test)",
        "# Mensagens": num_n,
        "Tempo (s)": round(t_n, 2),
        "Throughput (msg/s)": round(thr_n, 2),
    },
])

display(df)

# gera tabela em LaTeX para colar no artigo
latex = df.to_latex(
    index=False,
    caption="Resultados dos testes de carga em Apache Kafka.",
    label="tab:resultados-kafka"
)

print(latex)


Unnamed: 0,Abordagem,# Mensagens,Tempo (s),Throughput (msg/s)
0,Script custom (console-producer),20000,87.0,229.89
1,Ferramenta nativa (perf-test),20000,0.72,27662.5


\begin{table}
\caption{Resultados dos testes de carga em Apache Kafka.}
\label{tab:resultados-kafka}
\begin{tabular}{lrrr}
\toprule
Abordagem & # Mensagens & Tempo (s) & Throughput (msg/s) \\
\midrule
Script custom (console-producer) & 20000 & 87.000000 & 229.890000 \\
Ferramenta nativa (perf-test) & 20000 & 0.720000 & 27662.500000 \\
\bottomrule
\end{tabular}
\end{table}

