# 03 — Feedback Effects
Simulate auditor feedback from train, re‑score, and compare metrics.

In [None]:

import yaml
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

from crs.dataio import load_artifacts, load_controls
from crs.controls import build_index_text
from crs.recommenders.tfidf import TFIDFRecommender
from crs.metrics import top1_accuracy, precision_at_k, recall_at_k, jaccard
from crs.feedback import learn_from_train

CFG_PATH = Path('../configs/defaults.yaml')
cfg = yaml.safe_load(CFG_PATH.read_text())

controls = load_controls(cfg['paths']['controls'])
artifacts = load_artifacts(cfg['paths']['artifacts'])
index_texts = build_index_text(controls)

rec = TFIDFRecommender(
    ngram_range=tuple(cfg.get('tfidf', {}).get('ngram_range', [1,2])),
    min_df=cfg.get('tfidf', {}).get('min_df', 1)
).fit(index_texts, controls['control_id'].tolist())

k = cfg.get('k', 3)
train = artifacts[artifacts['split']=='train'].copy()
test  = artifacts[artifacts['split']=='test'].copy()

# Baseline
rows=[]
for _, r in test.iterrows():
    ids, scores = rec.predict_topk(r['text'], k=k)
    rows.append({'artifact_id': int(r['artifact_id']), 'gold_controls': r['gold_controls'],
                 'predicted_topk': ';'.join(ids), 'scores_topk': ';'.join(f"{s:.4f}" for s in scores)})
preds_base = pd.DataFrame(rows)

# Learn boosts/negatives
boosts, negatives = learn_from_train(train, rec, rounds=1, alpha=0.03, beta=0.01)

# Feedback variant
rows_fb=[]
for _, r in test.iterrows():
    ids, scores = rec.predict_topk(r['text'], k=k, boosts=boosts, negatives=negatives)
    rows_fb.append({'artifact_id': int(r['artifact_id']), 'gold_controls': r['gold_controls'],
                    'predicted_topk': ';'.join(ids), 'scores_topk': ';'.join(f"{s:.4f}" for s in scores)})
preds_fb = pd.DataFrame(rows_fb)

def metrics_row(df, tag):
    return {
        'tag': tag,
        'top1': round(top1_accuracy(df), 3),
        f'P@{k}': round(precision_at_k(df, k=k), 3),
        f'R@{k}': round(recall_at_k(df, k=k), 3),
        f'J@{k}': round(jaccard(df, k=k), 3),
    }

m = pd.DataFrame([metrics_row(preds_base, 'baseline'), metrics_row(preds_fb, 'feedback')])
display(m)

plt.figure()
plt.bar(m['tag'], m[f'P@{k}'])
plt.title('Precision@{}: Baseline vs Feedback'.format(k))
plt.xlabel('Model')
plt.ylabel('Precision')
plt.tight_layout()

out_dir = Path('../outputs/predictions'); out_dir.mkdir(parents=True, exist_ok=True)
preds_base.to_csv(out_dir/'test_baseline.csv', index=False)
preds_fb.to_csv(out_dir/'test_feedback.csv', index=False)
print('Saved:', out_dir/'test_baseline.csv', 'and', out_dir/'test_feedback.csv')
