# Comparative Retrieval Analysis -- Affective-RAG

Compares five retrieval configurations on agreement and dissonance query sets:

| Method | Description |
|--------|-------------|
| BM25 | Lexical baseline |
| Vector-RAG | Semantic + emotion, no knowledge graph |
| KRAG ($\alpha$=1.0) | Relevance-dominated |
| KRAG ($\alpha$=0.5) | Balanced |
| KRAG ($\alpha$=0.3) | Affective-focused |

Evaluates AP@5, ADE, and Semantic Recall on 800 agreement + 800 dissonance queries.
Includes paired statistical tests (Cohen's $d_z$, 95% CIs).

### Prerequisites
| # | Requirement |
|---|-------------|
| 1 | Runtime set to **GPU** |
| 2 | GCP project with GCS bucket containing the dataset |
| 3 | Trained encoder checkpoint (`krag_encoder.pt`) to upload |

## Install & clone

In [None]:
import subprocess, sys

def _pip(*a):
    subprocess.check_call([sys.executable, "-m", "pip", "install"] + list(a) + ["-q"])

_pip('torch-geometric')

REPO_URL = "https://github.com/Prashant002-1/Affective-RAG-Recommender-Systems.git"
!git clone {REPO_URL} ARAG

_pip("-r", "ARAG/requirements.txt")
print("Done. Restart runtime.")

### Restart the runtime
Go to **Runtime > Restart runtime**, then continue from the next cell.

## Authenticate to GCS

In [None]:
from google.colab import auth
auth.authenticate_user()

import os
PROJECT_ID = "YOUR_PROJECT_ID"          # <-- set this
os.environ["GOOGLE_CLOUD_PROJECT"] = PROJECT_ID

from google.cloud import storage
storage.Client(project=PROJECT_ID)
print(f"Authenticated: {PROJECT_ID}")


## Upload trained encoder

In [None]:
from google.colab.files import upload
print("Upload trained encoder checkpoint (krag_encoder.pt)")
uploaded = upload()
ENCODER_PATH = list(uploaded.keys())[0]
print(f"Uploaded: {ENCODER_PATH}")

## Imports

In [None]:
import sys, torch, json, numpy as np
from scipy import stats as sp_stats
sys.path.insert(0, "ARAG/src")

from krag.system               import ARAGSystem, ARAGSystemConfig
from krag.training.gnn_trainer  import prepare_emotion_ground_truth
from krag.data.adapters         import get_adapter
from krag.evaluation.synthetic_testset import SyntheticTestSetGenerator
from krag.evaluation.metrics    import (
    compute_affective_precision_at_k,
    compute_affective_displacement_error,
    compute_semantic_recall_at_k,
)
from krag.retrieval.bm25_retriever  import create_bm25_retriever_from_content_items
from krag.retrieval.krag_retriever  import QueryContext
from krag.core.emotion_detection    import EmotionProfile

DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Imports OK | device = {DEVICE}")

## Initialize system

In [None]:
config = ARAGSystemConfig()
config.vertex_ai_project = PROJECT_ID
system = ARAGSystem(config)
system.initialize()
stats  = system.load_and_index_data()
print(f"Content items : {stats['content_items']}")
print(f"KG nodes      : {stats['knowledge_graph_nodes']}")
print(f"KG edges      : {stats['knowledge_graph_edges']}")


## Load encoder and re-index at k=2

In [None]:
system.krag_encoder.load_state_dict(
    torch.load(ENCODER_PATH, map_location=DEVICE)
)
system.krag_encoder.to(DEVICE)
system.krag_encoder.eval()

system.response_generator.generate_response = lambda *a, **kw: ""

content_ids = [str(item.id) for item in system.content_items]
system.subgraph_retriever.subgraph_embeddings = {}
system.subgraph_retriever.index_subgraphs(content_ids, hops=2)
print("Encoder loaded, subgraphs re-indexed at k=2.")

## Load evaluation data

In [None]:
adapter = get_adapter()
movies_df = adapter.load_movies(vector_ready=True)
aff_sigs  = prepare_emotion_ground_truth(movies_df)
print(f"Affective signatures : {len(aff_sigs)}")

_result = system.vector_store.semantic_collection.get(include=["embeddings"])
_emb_map = dict(zip(_result["ids"], _result["embeddings"]))
content_embeddings = np.array([_emb_map[cid] for cid in content_ids])
print(f"Content embeddings   : {content_embeddings.shape}")

## Generate test sets

800 agreement queries and 800 dissonance queries.  Thresholds and seed
match the original comparative run (semantic = 0.5, affective = 0.6).


In [None]:
gen = SyntheticTestSetGenerator(
    content_embedder=system.content_embedder,
    semantic_threshold=0.5,
    affective_threshold=0.6,
    seed=42,
)

print("[1/2] Agreement queries …")
agreement = gen.generate_test_set(
    content_items=system.content_items,
    content_embeddings=content_embeddings,
    movie_affective_signatures=aff_sigs,
    num_queries=800,
    min_relevant=3,
)
print(f"      {len(agreement)} queries")

print("[2/2] Dissonance queries …")
dissonance = gen.generate_dissonance_queries(
    content_items=system.content_items,
    content_embeddings=content_embeddings,
    movie_affective_signatures=aff_sigs,
    num_queries=800,
)
print(f"      {len(dissonance)} queries")


## Create retrievers

BM25 is created directly from content items.  The KRAG and Vector-RAG
variants are switched in via the system so they share the same indexed
data and encoder.


In [None]:
# BM25 — created manually (needs content_items, not available via set_retriever_type)
bm25_retriever = create_bm25_retriever_from_content_items(system.content_items)

# Retriever configs for the system-based loop:
#   (label, retriever_type, alpha | None)
SYSTEM_CONFIGS = [
    ("Vector-RAG",    "vector_only",  None),
    ("KRAG (a=1.0)",  "krag",        1.0),
    ("KRAG (a=0.5)",  "krag",        0.5),
    ("KRAG (a=0.3)",  "krag",        0.3),
]
print("Retrievers ready.")

## Evaluation

BM25 is evaluated via its own retrieve path.  The remaining four
configurations are run through `system.get_recommendations` after
switching the active retriever.  Per-query AP@5 and ADE scores are
retained for the paired statistical tests.


In [None]:
EMOTION_ORDER = ["happiness", "sadness", "anger", "fear", "surprise", "disgust"]

def to_vec(target_emotions):
    if isinstance(target_emotions, dict):
        return np.array([target_emotions.get(e, 0.0) for e in EMOTION_ORDER])
    return np.asarray(target_emotions)

def to_sliders(target_emotions):
    vec = to_vec(target_emotions)
    return {e: int(round(v * 10)) for e, v in zip(EMOTION_ORDER, vec.tolist())}

def _make_query_context(query_text):
    return QueryContext(
        query_text=query_text,
        user_emotions=EmotionProfile(),
        query_embedding=np.zeros(768),
        emotion_embedding=np.zeros(768),
    )

# ── BM25 evaluation helper ─────────────────────────────────
def eval_bm25(test_cases):
    sem_r5, sem_r10, ap5s, ap10s, ades = [], [], [], [], []
    for tc in test_cases:
        target_vec = to_vec(tc.target_emotions)
        try:
            qc  = _make_query_context(tc.query_text)
            raw = bm25_retriever.retrieve(qc, k=10)
            retrieved_ids = [r.content_id for r in raw]
        except Exception as e:
            print(f"  [BM25] {e}")
            continue
        if not retrieved_ids:
            continue

        sem_r5.append(compute_semantic_recall_at_k(retrieved_ids, tc.semantic_relevant or [], 5))
        sem_r10.append(compute_semantic_recall_at_k(retrieved_ids, tc.semantic_relevant or [], 10))
        ap5s.append(compute_affective_precision_at_k(retrieved_ids, aff_sigs, target_vec, 5))
        ap10s.append(compute_affective_precision_at_k(retrieved_ids, aff_sigs, target_vec, 10))
        ade = compute_affective_displacement_error(retrieved_ids, aff_sigs, target_vec, 10)
        if ade != float('inf'):
            ades.append(ade)

    return {
        "Semantic_Recall@5":  float(np.mean(sem_r5))  if sem_r5  else 0.0,
        "Semantic_Recall@10": float(np.mean(sem_r10)) if sem_r10 else 0.0,
        "AP@5":               float(np.mean(ap5s))    if ap5s    else 0.0,
        "AP@10":              float(np.mean(ap10s))   if ap10s   else 0.0,
        "ADE":                float(np.mean(ades))    if ades    else float('inf'),
    }, {"AP@5": ap5s, "ADE": ades}

# ── system-retriever evaluation helper ─────────────────────
def eval_system(test_cases, rtype, alpha):
    if alpha is not None:
        system.config.alpha = alpha
    system.set_retriever_type(rtype)

    sem_r5, sem_r10, ap5s, ap10s, ades = [], [], [], [], []
    for tc in test_cases:
        target_vec = to_vec(tc.target_emotions)
        sliders    = to_sliders(tc.target_emotions)

        recs  = system.query(
            query_text=tc.query_text, emotion_sliders=sliders,
            max_results=10, include_explanation=False
        )
        items = recs.get("recommendations", [])
        if not items:
            continue
        retrieved_ids = [r["content_id"] for r in items]

        sem_r5.append(compute_semantic_recall_at_k(retrieved_ids, tc.semantic_relevant or [], 5))
        sem_r10.append(compute_semantic_recall_at_k(retrieved_ids, tc.semantic_relevant or [], 10))
        ap5s.append(compute_affective_precision_at_k(retrieved_ids, aff_sigs, target_vec, 5))
        ap10s.append(compute_affective_precision_at_k(retrieved_ids, aff_sigs, target_vec, 10))
        ade = compute_affective_displacement_error(retrieved_ids, aff_sigs, target_vec, 10)
        if ade != float('inf'):
            ades.append(ade)

    return {
        "Semantic_Recall@5":  float(np.mean(sem_r5))  if sem_r5  else 0.0,
        "Semantic_Recall@10": float(np.mean(sem_r10)) if sem_r10 else 0.0,
        "AP@5":               float(np.mean(ap5s))    if ap5s    else 0.0,
        "AP@10":              float(np.mean(ap10s))   if ap10s   else 0.0,
        "ADE":                float(np.mean(ades))    if ades    else float('inf'),
    }, {"AP@5": ap5s, "ADE": ades}

print("Evaluation helpers defined.")

## Run — Agreement set

In [None]:
system.response_generator.generate_response = lambda *a, **kw: ""

print("=== Agreement (n={}) ===\n".format(len(agreement)))

agr_results  = {}
agr_per_query = {}

# BM25
print("BM25 …", flush=True)
agr_results["BM25"], agr_per_query["BM25"] = eval_bm25(agreement)
print(f"  AP@5={agr_results['BM25']['AP@5']:.4f}  ADE={agr_results['BM25']['ADE']:.4f}")

# System-based retrievers
for name, rtype, alpha in SYSTEM_CONFIGS:
    print(f"{name} …", flush=True)
    agr_results[name], agr_per_query[name] = eval_system(agreement, rtype, alpha)
    print(f"  AP@5={agr_results[name]['AP@5']:.4f}  ADE={agr_results[name]['ADE']:.4f}")

## Run — Dissonance set

In [None]:
system.response_generator.generate_response = lambda *a, **kw: ""

print("=== Dissonance (n={}) ===\n".format(len(dissonance)))

dis_results  = {}
dis_per_query = {}

print("BM25 …", flush=True)
dis_results["BM25"], dis_per_query["BM25"] = eval_bm25(dissonance)
print(f"  AP@5={dis_results['BM25']['AP@5']:.4f}  ADE={dis_results['BM25']['ADE']:.4f}")

for name, rtype, alpha in SYSTEM_CONFIGS:
    print(f"{name} …", flush=True)
    dis_results[name], dis_per_query[name] = eval_system(dissonance, rtype, alpha)
    print(f"  AP@5={dis_results[name]['AP@5']:.4f}  ADE={dis_results[name]['ADE']:.4f}")

## Statistical tests

Paired comparison between KRAG α=0.3 and KRAG α=1.0.
Reports mean paired difference, 95% CI, and Cohen's $d_z$.


In [None]:
def paired_stats(a_scores, b_scores, label_a, label_b, metric):
    a = np.array(a_scores)
    b = np.array(b_scores)
    n = min(len(a), len(b))
    a, b = a[:n], b[:n]
    diff = a - b
    mean_d  = float(np.mean(diff))
    pooled  = np.sqrt((np.var(a, ddof=1) + np.var(b, ddof=1)) / 2)
    dz      = mean_d / pooled if pooled > 0 else 0.0
    se      = np.std(diff, ddof=1) / np.sqrt(n)
    t_crit  = sp_stats.t.ppf(0.975, df=n - 1)
    ci      = (mean_d - t_crit * se, mean_d + t_crit * se)
    print(f"  {metric:6s}  diff={mean_d:+.4f}  95% CI [{ci[0]:+.4f}, {ci[1]:+.4f}]  dz={dz:+.2f}  n={n}")
    return {"mean_diff": mean_d, "ci": list(ci), "dz": dz, "n": n}

stat = {}

print("\nAgreement  (a=0.3 minus a=1.0)")
stat["agreement"] = {}
for m in ["AP@5", "ADE"]:
    stat["agreement"][m] = paired_stats(
        agr_per_query["KRAG (a=0.3)"][m],
        agr_per_query["KRAG (a=1.0)"][m],
        "a=0.3", "a=1.0", m,
    )

print("\nDissonance  (a=0.3 minus a=1.0)")
stat["dissonance"] = {}
for m in ["AP@5", "ADE"]:
    stat["dissonance"][m] = paired_stats(
        dis_per_query["KRAG (a=0.3)"][m],
        dis_per_query["KRAG (a=1.0)"][m],
        "a=0.3", "a=1.0", m,
    )


## Plots

In [None]:
import matplotlib.pyplot as plt

NAMES  = ["BM25", "Vector-RAG", "KRAG (a=1.0)", "KRAG (a=0.5)", "KRAG (a=0.3)"]
x      = np.arange(len(NAMES))
width  = 0.6

plots = [
    ("agr_ap5",  agr_results, "AP@5",                "Agreement — Affective Precision@5", "steelblue"),
    ("agr_ade",  agr_results, "ADE",                 "Agreement — Displacement Error",    "steelblue"),
    ("agr_sr10", agr_results, "Semantic_Recall@10",   "Agreement — Semantic Recall@10",    "steelblue"),
    ("dis_ap5",  dis_results, "AP@5",                "Dissonance — Affective Precision@5", "darkorange"),
    ("dis_ade",  dis_results, "ADE",                 "Dissonance — Displacement Error",    "darkorange"),
    ("dis_sr10", dis_results, "Semantic_Recall@10",   "Dissonance — Semantic Recall@10",    "darkorange"),
]

for fname, results, metric, title, color in plots:
    fig, ax = plt.subplots(figsize=(6, 3.5))
    vals = [results[n][metric] for n in NAMES]
    ax.bar(x, vals, width, color=color)
    ax.set_xticks(x)
    ax.set_xticklabels(NAMES, rotation=15, ha='right', fontsize=9)
    ax.set_ylabel(metric)
    ax.set_title(title)
    ax.grid(True, alpha=0.3, axis='y')
    plt.tight_layout()
    plt.savefig(f"/tmp/comp_{fname}.png", dpi=200, bbox_inches="tight")
    plt.show()

print("Saved 6 individual plots.")

## Save & download

In [None]:
out = {
    "metadata": {
        "experiment": "comparative_retrieval_analysis",
        "encoder": "krag_encoder_aw0.3",
        "encoder_config": {
            "contrastive_weight": 0.2,
            "alignment_weight": 0.3,
            "embedding_dim": 768,
            "num_layers": 3,
            "num_heads": 4,
        },
        "n_agreement": len(agreement),
        "n_dissonance": len(dissonance),
        "methods": ["BM25", "Vector-RAG", "KRAG (a=1.0)", "KRAG (a=0.5)", "KRAG (a=0.3)"],
        "device": DEVICE,
    },
    "agreement": agr_results,
    "dissonance": dis_results,
    "statistical_tests": stat,
    "per_query": {
        "agreement": {name: {m: scores for m, scores in pq.items()} for name, pq in agr_per_query.items()},
        "dissonance": {name: {m: scores for m, scores in pq.items()} for name, pq in dis_per_query.items()},
    },
}
with open("/tmp/comparative_analysis.json", "w") as f:
    json.dump(out, f, indent=2)

from google.colab.files import download
download("/tmp/comparative_analysis.json")
for fname in ["agr_ap5", "agr_ade", "agr_sr10", "dis_ap5", "dis_ade", "dis_sr10"]:
    download(f"/tmp/comp_{fname}.png")
print("Downloads triggered.")