# Player Feature EDA
Quick sanity checks on the feature extract before training models.

In [None]:
from pathlib import Path
import csv

FEATURES = Path('analysis/features/players.csv')
assert FEATURES.exists(), f'{FEATURES} not found. Run analysis/ml_features.py first.'

rows = []
with FEATURES.open('r', encoding='utf-8') as fh:
    reader = csv.DictReader(fh)
    for row in reader:
        rows.append(row)

len(rows)

## Distribution Helpers
Use pure-Python summaries when pandas isn't installed; fallback to pandas describe() when available.

In [None]:
try:
    import pandas as pd  # noqa: F401
    use_pandas = True
except ImportError:
    use_pandas = False
use_pandas

In [None]:
if use_pandas:
    import pandas as pd
    df = pd.DataFrame(rows)
    for col in ['bb_per_100', 'vpip_pct', 'three_bet_pct']:
        if col in df:
            display(df[col].astype(float).describe())
else:
    import math
    from statistics import mean
    
    def summary(key):
        values = [float(r.get(key, 0.0) or 0.0) for r in rows]
        values.sort()
        n = len(values)
        if not n:
            return
        q1 = values[int(0.25 * (n - 1))]
        q3 = values[int(0.75 * (n - 1))]
        print(key)
        print('  count', n)
        print('  mean', round(mean(values), 3))
        print('  min / q1 / median / q3 / max', values[0], q1, values[n//2], q3, values[-1])

    for column in ['bb_per_100', 'vpip_pct', 'three_bet_pct']:
        summary(column)