In [None]:
# analysis/compute_stats.py
import pandas as pd
import numpy as np
from pathlib import Path
from scipy import stats

folder = Path('sample-hardhat/repro-artifacts/local')  # adjust path if needed
files = sorted(folder.glob('interaction-summary-local-*.csv'))
if not files:
    print("No CSV files found in", folder)
    raise SystemExit(1)

dfs = [pd.read_csv(f) for f in files]
full = pd.concat(dfs, ignore_index=True)

# Convert numeric columns (coerce errors -> NaN)
numcols = ['transferGas','mintGas','ownershipGas','ownerBalance','user1Balance','user2Balance','totalSupplyBefore','totalSupplyAfter']
for c in numcols:
    full[c] = pd.to_numeric(full[c], errors='coerce')

# Summary stats
summary = full[numcols].agg(['count','mean','std','min','max']).T
summary['mean±sd'] = summary['mean'].round(2).astype(str) + ' ± ' + summary['std'].round(2).astype(str)
print(summary[['count','mean±sd','min','max']])

# Example paired test: if you have two conditions aligned per-index you can run t-test
# (This is just a template; adapt when you have baseline vs extension CSVs.)
# Suppose you have baseline_df and extension_df with same index:
# tstat, pval = stats.ttest_rel(baseline_df['transferGas'], extension_df['transferGas'], nan_policy='omit')
# print("paired t-test transferGas: t=%.3f p=%.3g" % (tstat, pval))


                   count             mean±sd        min        max
transferGas         21.0  49155.14 ± 6131.52  34498.000  51598.000
mintGas             18.0       53572.0 ± 0.0  53572.000  53572.000
ownershipGas        18.0       28656.0 ± 0.0  28656.000  28656.000
ownerBalance        21.0         100.0 ± 0.0     99.998     99.999
user1Balance        21.0           0.0 ± 0.0      0.001      0.002
user2Balance        21.0          0.01 ± 0.0      0.010      0.010
totalSupplyBefore   21.0         100.0 ± 0.0    100.000    100.010
totalSupplyAfter    21.0        100.01 ± 0.0    100.010    100.010
