## Descriptive Statistics by Topology

Table structure (ICSE-style):
- **Columns:** 6 topologies (Seq FO, Par FO, Chain, Hierarchical, Probabilistic, Mesh)
- **Blocks:** Throughput (RPS), Latency (s), Energy (kJ), CPU Utilization (%)
- **Rows per block:** Mean, Min, 50%, Max, Std, CV

Data: `run_table.csv` (all sizes and repetitions aggregated per topology).

In [1]:
import pandas as pd
import numpy as np
from pathlib import Path

# Same path logic as Boxplots.ipynb â€” run_table.csv in 5_results_data
cwd = Path('.').resolve()
candidates = [
    cwd.parent / '5_results_data' / 'run_table.csv',
    cwd / '5_results_data' / 'run_table.csv',
]
RUN_TABLE_PATH = next((p for p in candidates if p.exists()), candidates[0])
print("Data:", RUN_TABLE_PATH, "| Exists:", RUN_TABLE_PATH.exists())

Data: /home/irena/Documents/Research Project/topology-scale-mubench-replication/5_results_analysis/5_results_data/run_table.csv | Exists: False


In [2]:
df = pd.read_csv(RUN_TABLE_PATH)
df = df[df['__done'] == 'DONE'].copy()

# Topology short names (column order for table)
TOPOLOGY_ORDER = [
    'sequential_fanout',
    'parallel_fanout',
    'chain_with_branching',
    'hierarchical_tree',
    'probabilistic_tree',
    'complex_mesh',
]
TOPOLOGY_LABELS = {
    'sequential_fanout': 'Seq FO',
    'parallel_fanout': 'Par FO',
    'chain_with_branching': 'Chain',
    'hierarchical_tree': 'Hierarchical',
    'probabilistic_tree': 'Probabilistic',
    'complex_mesh': 'Mesh',
}

# Restrict to topologies present in data
df = df[df['topology'].isin(TOPOLOGY_ORDER)]
df['topology'] = pd.Categorical(df['topology'], categories=TOPOLOGY_ORDER, ordered=True)

# If cpu_usage_avg is fractional (e.g. 0.5 = 50%), convert to percentage for display
if 'cpu_usage_avg' in df.columns and df['cpu_usage_avg'].max() <= 1.5:
    df = df.copy()
    df['cpu_usage_avg'] = df['cpu_usage_avg'] * 100
# Latency in seconds, energy in kJ for table
df['avg_latency_s'] = df['avg_latency_ms'] / 1000
df['energy_kj'] = df['energy'] / 1000

print("Rows:", len(df), "| Topologies:", df['topology'].nunique(), "| Sizes:", sorted(df['system_size'].unique().tolist()))

FileNotFoundError: [Errno 2] No such file or directory: '/home/irena/Documents/Research Project/topology-scale-mubench-replication/5_results_analysis/5_results_data/run_table.csv'

In [None]:
def descriptive_block(series):
    """Mean, Min, 50%, Max, Std, CV for a series."""
    mu = series.mean()
    std = series.std()
    cv = (std / mu * 100) if mu != 0 else np.nan
    return {
        'Mean': mu,
        'Min': series.min(),
        '50%': series.median(),
        'Max': series.max(),
        'Std': std,
        'CV': cv,
    }

def build_block_df(df, metric_col, stat_order=None):
    """One block: rows = Mean, Min, 50%, Max, Std, CV; columns = 6 topologies."""
    if metric_col not in df.columns:
        return pd.DataFrame()
    stat_order = stat_order or ['Mean', 'Min', '50%', 'Max', 'Std', 'CV']
    by_topo = df.groupby('topology', observed=True)[metric_col].apply(
        lambda x: pd.Series(descriptive_block(x))
    ).unstack(level=1)
    by_topo = by_topo.reindex(TOPOLOGY_ORDER, axis=1)
    by_topo.columns = [TOPOLOGY_LABELS[t] for t in by_topo.columns]
    by_topo = by_topo.reindex(stat_order)
    return by_topo

METRIC_BLOCKS = [
    ('throughput_rps', 'Throughput (RPS)'),
    ('avg_latency_s', 'Latency (s)'),
    ('energy_kj', 'Energy (kJ)'),
    ('cpu_usage_avg', 'CPU Utilization (%)'),
]
STAT_ORDER = ['Mean', 'Min', '50%', 'Max', 'Std', 'CV']

In [None]:
blocks = {}
for col, title in METRIC_BLOCKS:
    blocks[title] = build_block_df(df, col, STAT_ORDER)

for title, tbl in blocks.items():
    print("\n" + "=" * 60)
    print("Block:", title)
    print("=" * 60)
    display(tbl.round(4))

### Single combined table (all blocks stacked)

MultiIndex: (Block, Stat) x 6 topology columns.

In [None]:
combined = []
for title, tbl in blocks.items():
    tbl = tbl.copy()
    tbl.index = pd.MultiIndex.from_product([[title], tbl.index], names=['Metric', 'Stat'])
    combined.append(tbl)
table_combined = pd.concat(combined, axis=0)
table_combined

In [None]:
# Save CSV (one file with all blocks)
OUT_DIR = Path('tables')
OUT_DIR.mkdir(exist_ok=True)
table_combined.to_csv(OUT_DIR / 'descriptive_stats_by_topology.csv')
print("Saved:", OUT_DIR / 'descriptive_stats_by_topology.csv')

In [None]:
# LaTeX (rounded for paper)
latex_parts = []
latex_parts.append(r'\begin{table}[t]')
latex_parts.append(r'\caption{Descriptive statistics by topology.}')
latex_parts.append(r'\label{tab:descriptive-topology}')
latex_parts.append(r'\begin{tabular}{l' + 'r' * 6 + '}')
latex_parts.append(r'\toprule')
latex_parts.append(' & '.join([''] + [TOPOLOGY_LABELS[t] for t in TOPOLOGY_ORDER]) + r' \\')
latex_parts.append(r'\midrule')

for title, tbl in blocks.items():
    latex_parts.append(r'\multicolumn{7}{l}{\textbf{' + title + '}} \\')
    latex_parts.append(r'\midrule')
    for stat in STAT_ORDER:
        if stat not in tbl.index:
            continue
        row = tbl.loc[stat].round(2)
        stat_tex = r'\textbf{50\%}' if stat == '50%' else stat
        latex_parts.append(stat_tex + ' & ' + ' & '.join(row.astype(str)) + r' \\')
    latex_parts.append(r'\midrule')
latex_parts.append(r'\bottomrule')
latex_parts.append(r'\end{tabular}')
latex_parts.append(r'\end{table}')

latex_str = '\n'.join(latex_parts)
print(latex_str)

In [None]:
with open(OUT_DIR / 'descriptive_stats_by_topology.tex', 'w') as f:
    f.write(latex_str)
print("Saved:", OUT_DIR / 'descriptive_stats_by_topology.tex')