# Quantify Bias

In [1]:
import csv, json, re
from collections import Counter, defaultdict
from pathlib import Path
ROOT=Path('.')
RESP=ROOT/'results'/'responses.csv'
OUT=ROOT/'analysis'; OUT.mkdir(exist_ok=True, parents=True)
POS=set('improve improving growth potential breakout strong efficient efficiency excellent good great positive opportunity opportunities upside'.split())
NEG=set('poor struggle struggling weak decline declining worse worst negative risk downside issue issues problem problems'.split())
PLAYER_PATTERN=re.compile(r'(player\s+[A-Z]|Player\s+[A-Z])', re.I)
def polarity(s):
    toks=re.findall(r"[A-Za-z']+", s.lower()); return sum(t in POS for t in toks)-sum(t in NEG for t in toks)
rows=[]
if RESP.exists():
    with RESP.open(encoding='utf-8') as f: rows=list(csv.DictReader(f))
sent, ment, buck=defaultdict(list), defaultdict(Counter), defaultdict(Counter)
for r in rows:
    h=r.get('hypothesis_id',''); txt=r.get('response_text','')
    sent[h].append(polarity(txt))
    for m in PLAYER_PATTERN.findall(txt): ment[h][m.strip().title()]+=1
    low=txt.lower()
    if 'offense' in low: buck[h]['offense']+=1
    if 'defense' in low: buck[h]['defense']+=1
    if 'team' in low:    buck[h]['team']+=1
    if re.search(r'\bplayer\b', low): buck[h]['individual']+=1
summary={'n_rows':len(rows),'sentiment_mean_by_hypothesis':{k:(sum(v)/len(v) if v else 0.0) for k,v in sent.items()},'mentions_by_hypothesis':{k:dict(c) for k,c in ment.items()},'recommendation_buckets_by_hypothesis':{k:dict(c) for k,c in buck.items()}}
def chi2(obs):
    groups=list(obs.keys()); cats=sorted({c for g in groups for c in obs[g]})
    if not cats: return {'chi2':0.0,'dof':-1,'groups':groups,'categories':[], 'table':[], 'row_sums':[], 'col_sums':[], 'total':0}
    table=[[obs[g].get(c,0) for c in cats] for g in groups]
    rs=[sum(r) for r in table]; cs=[sum(row[i] for row in table) for i in range(len(cats))]; tot=sum(cs)
    chi=0.0
    for r_i,row in enumerate(table):
        for c_i,obsval in enumerate(row):
            exp=(rs[r_i]*cs[c_i])/tot if tot else 0
            if exp>0: chi+=(obsval-exp)**2/exp
    dof=(len(groups)-1)*(len(cats)-1)
    return {'chi2':chi,'dof':dof,'groups':groups,'categories':cats,'table':table,'row_sums':rs,'col_sums':cs,'total':tot}
pairs=[('H1_frame_negative','H1_frame_positive'),('H3_confirm_neutral','H3_confirm_primed')]
chis={}
for a,b in pairs: chis[f'{a}_vs_{b}']=chi2({a:buck.get(a,Counter()), b:buck.get(b,Counter())})
(OUT/'analysis_summary.json').write_text(json.dumps(summary, indent=2))
(OUT/'chi_square_results.json').write_text(json.dumps(chis, indent=2))
print('Wrote analysis/analysis_summary.json and analysis/chi_square_results.json')


Wrote analysis/analysis_summary.json and analysis/chi_square_results.json
