# §5.1 Concentration (HHI) — ETH & Cosmos

This notebook displays the **official HHI summary** (from `reports/metrics/hhi_summary.json`) and **recomputes** HHI directly from the processed CSVs for a sanity check.


In [None]:
import json, pandas as pd
from pathlib import Path
js = json.load(open('reports/metrics/hhi_summary.json'))
df = pd.DataFrame(js)
display(df[['label','HHI','N_eff','file']].round({'HHI':6,'N_eff':2}))


## Cross-check from source files
We recompute HHI from your processed CSVs to confirm the summary. Shares are auto-detected; preference is operator-level for ETH.

In [None]:
import pandas as pd, math, os
from src.metrics.hhi import hhi_from_shares

paths = {
  'ETH — Operator level': 'data/processed/ethereum/rated_operator_hhi_2025-10-21.csv',
  'ETH — Owner level':    'data/processed/ethereum/owner_hhi_2025-10-21.csv',
  'Cosmos Hub':           'data/processed/cosmos/cosmoshub_hhi_2025-10-21.csv',
}

def find_share_col(df):
    # prefer explicit names
    for c in ['raw_share','share','hhi_share','fraction','weight']:
        if c in df.columns: return c
    # fallback heuristic
    num = df.select_dtypes('number')
    for c in num.columns:
        if 0 <= num[c].min() <= 1 and 0 <= num[c].max() <= 1 and 0.9 <= float(num[c].sum()) <= 1.1:
            return c
    raise KeyError('No share-like column found')

rows = []
for label, path in paths.items():
    if not os.path.exists(path):
        rows.append({'label': label, 'file': path, 'HHI': None, 'N_eff': None})
        continue
    df = pd.read_csv(path)
    col = find_share_col(df)
    hhi, neff = hhi_from_shares(df[col])
    rows.append({'label': label, 'file': path, 'HHI': hhi, 'N_eff': neff})

df_check = pd.DataFrame(rows)
display(df_check[['label','HHI','N_eff','file']].round({'HHI':6,'N_eff':2}))


## Write refreshed human summary
This mirrors `scripts/build_hhi_summary.py` so the README can cite a human-readable file.

In [None]:
from pathlib import Path
md = ['# Concentration Metrics — HHI Summary\n']
for _,r in df_check.iterrows():
    if pd.isna(r['HHI']):
        md.append(f"- {r['label']}: **(file not found)**  \n  _Source:_ `{r['file']}`")
    else:
        md.append(f"- {r['label']}: HHI ≈ **{r['HHI']:.6f}** → N_eff ≈ **{r['N_eff']:.2f}**  \n  _Source:_ `{r['file']}`")
Path('reports/metrics').mkdir(parents=True, exist_ok=True)
Path('reports/metrics/hhi_summary.md').write_text('\n'.join(md)+'\n')
print('Wrote reports/metrics/hhi_summary.md')
