In [None]:
!pip -q uninstall -y numpy pandas faiss-cpu -y
!pip -q install --no-cache-dir \
  numpy==2.1.3 pandas==2.2.2 scikit-learn==1.5.2 matplotlib==3.9.2 \
  sentence-transformers==2.7.0

import os
print("✅ Reinstalled core packages. Forcing runtime restart to clear old C-extensions…")
os.kill(os.getpid(), 9)


[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
google-colab 1.0.0 requires requests==2.32.4, but you have requests 2.32.5 which is incompatible.
numba 0.60.0 requires numpy<2.1,>=1.22, but you have numpy 2.1.3 which is incompatible.
umap-learn 0.5.9.post2 requires scikit-learn>=1.6, but you have scikit-learn 1.5.2 which is incompatible.
datasets 4.0.0 requires fsspec[http]<=2025.3.0,>=2023.1.0, but you have fsspec 2025.9.0 which is incompatible.[0m[31m
[0m

In [1]:
import numpy, pandas, sklearn, matplotlib
print("numpy:", numpy.__version__)
print("pandas:", pandas.__version__)
print("sklearn:", sklearn.__version__)
print("matplotlib:", matplotlib.__version__)


numpy: 2.1.3
pandas: 2.2.2
sklearn: 1.5.2
matplotlib: 3.9.2


In [2]:
import os, time, json, random, textwrap
from pathlib import Path
from dataclasses import dataclass
from typing import List, Dict, Any, Optional, Tuple

import numpy as np
import pandas as pd
from sklearn.neighbors import NearestNeighbors
from sentence_transformers import SentenceTransformer, util as st_util
import matplotlib.pyplot as plt

# ---------------- Paths & detection ----------------
BASE = Path("/content")
OUT  = BASE / "trackC_outputs"
OUT.mkdir(parents=True, exist_ok=True)

CORPUS_CSV   = BASE / "corpus.csv"
RUN_CFG_PATH = BASE / "Week7_run_config.json"   # optional

print("Detected files:")
for p in [CORPUS_CSV, RUN_CFG_PATH]:
    print(" -", p, "EXISTS" if p.exists() else "missing")

# ---------------- Config ----------------
CFG = {
    "retrieval": {"embedder": "sentence-transformers/all-MiniLM-L6-v2", "top_k": 5},
    "orchestrator": {"max_hops": 2},
    "eval": {"max_items": 12},
    "plots": {"figsize": [6,4]},
    "random_seed": 42,
    "ablation": {"agent_counts": [1, 2, 3], "temperatures": [0.2, 0.6, 1.0]},
}
if RUN_CFG_PATH.exists():
    try:
        user_cfg = json.loads(RUN_CFG_PATH.read_text())
        for k, v in user_cfg.items():  # shallow merge
            CFG[k] = v
        print("[CFG] Loaded Week7_run_config.json")
    except Exception as e:
        print("[CFG] Failed to parse run config; using defaults.", e)

random.seed(CFG["random_seed"]); np.random.seed(CFG["random_seed"])

# ---------------- RAG Index (sklearn KNN) ----------------
@dataclass
class RAGIndex:
    model_name: str
    top_k: int = 5
    model: Any = None
    knn: Any = None
    df: Optional[pd.DataFrame] = None
    text_col: Optional[str] = None
    id_col: Optional[str] = None
    embs: Optional[np.ndarray] = None

    def load_model(self):
        if self.model is None:
            self.model = SentenceTransformer(self.model_name)

    def _pick_text_col(self, df: pd.DataFrame) -> str:
        for c in ["text","content","chunk","passage","body","abstract","desc"]:
            if c in df.columns: return c
        str_cols = [c for c in df.columns if df[c].dtype == object]
        if not str_cols:
            df["text"] = df.astype(str).agg(" ".join, axis=1)
            return "text"
        return max(str_cols, key=lambda c: df[c].astype(str).str.len().sum())

    def _pick_id_col(self, df: pd.DataFrame) -> Optional[str]:
        for c in ["id","doc_id","source","filename","title"]:
            if c in df.columns: return c
        return None

    def _encode(self, texts: List[str]) -> np.ndarray:
        return self.model.encode(
            texts, batch_size=64, show_progress_bar=False,
            convert_to_numpy=True, normalize_embeddings=True
        ).astype(np.float32)

    def build(self, df: pd.DataFrame):
        self.load_model()
        self.text_col = self._pick_text_col(df)
        self.id_col = self._pick_id_col(df)
        df = df[df[self.text_col].notna()].reset_index(drop=True)
        self.df = df
        self.embs = self._encode(df[self.text_col].astype(str).tolist())
        self.knn = NearestNeighbors(n_neighbors=min(self.top_k, len(df)), metric="cosine")
        self.knn.fit(self.embs)

    def search(self, query: str, k: Optional[int] = None, temp: float = 0.7, diversity: int = 0) -> List[Dict[str,Any]]:
        k = k or self.top_k
        q_emb = self._encode([query])
        n_nei = min(max(k*(1+diversity), k), len(self.df))
        distances, indices = self.knn.kneighbors(q_emb, n_neighbors=n_nei)
        # cosine distance -> similarity
        pool = [(float(1.0 - distances[0][i]), int(indices[0][i])) for i in range(len(indices[0]))]
        # soft sampling within pool for diversity at higher temperature
        if len(pool) > k and temp > 0.5:
            probs = np.array([max(s, 1e-6) for s,_ in pool], dtype=np.float64)
            probs /= probs.sum()
            pick = np.random.choice(len(pool), size=k, replace=False, p=probs)
            pool = [pool[i] for i in pick]
        top = sorted(pool, key=lambda x: -x[0])[:k]
        hits = []
        for score, i in top:
            row = self.df.iloc[i]
            hits.append({
                "score": float(score),
                "id": str(row[self.id_col]) if self.id_col else str(i),
                "text": str(row[self.text_col])
            })
        return hits

# ---------------- Corpus  ----------------
def load_corpus_or_demo() -> pd.DataFrame:
    if CORPUS_CSV.exists():
        try:
            df = pd.read_csv(CORPUS_CSV)
            print(f"[RAG] Loaded corpus.csv with shape {df.shape}")
            return df
        except Exception as e:
            print("[WARN] Failed to read corpus.csv:", e)
    data = [
        {"id":"p1","text":"Multi-agent systems can outperform solo agents by coordinating plans and sharing evidence."},
        {"id":"p2","text":"RAG pipelines combine retrieval with generation; temperature can influence exploration."},
        {"id":"p3","text":"Medical MDT frameworks leverage moderator and recruiter roles to improve decisions."},
        {"id":"p4","text":"Diplomacy-style agents negotiate over a map; trust and strategy affect outcomes."},
        {"id":"p5","text":"LoRA enables lightweight fine-tuning, which can specialize models to a domain quickly."},
    ]
    df = pd.DataFrame(data)
    print("[RAG] Using tiny demo corpus (5 rows). Upload /content/corpus.csv to use your data.")
    return df

corpus_df = load_corpus_or_demo()
index = RAGIndex(model_name=CFG["retrieval"]["embedder"], top_k=int(CFG["retrieval"]["top_k"]))
index.build(corpus_df)

# ---------------- Eval set ----------------
def make_eval_from_corpus(df: pd.DataFrame, max_items: int = 12) -> pd.DataFrame:
    if "question" in df.columns and "answer" in df.columns:
        eval_df = df[["question","answer"]].dropna().head(max_items).copy()
        if len(eval_df):
            print(f"[EVAL] Using provided QA pairs (n={len(eval_df)})")
            return eval_df.rename(columns={"answer":"reference"})
    text_col = index.text_col or "text"
    rows = []
    for _, row in df.head(max_items).iterrows():
        passage = str(row[text_col])
        q = "What does the text say about agents or retrieval or decisions?"
        rows.append({"question": q, "reference": passage})
    eval_df = pd.DataFrame(rows)
    print(f"[EVAL] Built synthetic eval set (n={len(eval_df)})")
    return eval_df

eval_df = make_eval_from_corpus(corpus_df, max_items=int(CFG["eval"]["max_items"]))

# ---------------- Agent logic + metrics ----------------
def summarize_hits(hits: List[Dict[str,Any]], max_chars: int = 240) -> str:
    if not hits: return ""
    text = " ".join(h["text"] for h in hits[:3])
    return textwrap.shorten(text, width=max_chars, placeholder="...")

def score_answer(pred: str, reference: str) -> Dict[str,float]:
    if not pred:
        return {"sim": 0.0, "kw_recall": 0.0}
    embeds = index.model.encode([pred, reference], normalize_embeddings=True)
    sim = float(st_util.cos_sim(embeds[0], embeds[1]).item())
    def kws(t): return set([w.lower() for w in t.split() if w.isalpha()])
    ref_k = kws(reference); pr_k = kws(pred)
    rec = len(ref_k & pr_k) / max(1, len(ref_k))
    return {"sim": sim, "kw_recall": rec}

def hit_at_k(hits: List[Dict[str,Any]], reference: str, k: int = 1) -> int:
    if not hits: return 0
    ref_emb = index.model.encode([reference], normalize_embeddings=True)[0]
    cand = [h["text"] for h in hits[:k]]
    if not cand: return 0
    c_embs = index.model.encode(cand, normalize_embeddings=True)
    sims = st_util.cos_sim(c_embs, ref_emb)
    return int(float(sims.max().item()) >= 0.4)

def run_agent_once(question: str, agent_count: int, temperature: float) -> Dict[str,Any]:
    t0 = time.perf_counter()
    if agent_count == 1:
        hits = index.search(question, k=index.top_k, temp=temperature, diversity=0)
    else:
        h1 = index.search(question, k=index.top_k, temp=temperature, diversity=1)
        bridge = h1[0]["id"] if h1 else ""
        q2 = f"{question} Focus on {bridge}" if bridge else question + " (detail)"
        h2 = index.search(q2, k=index.top_k, temp=temperature, diversity=1)
        merged = { (h["id"], h["text"]): h for h in (h1 + h2) }
        hits = sorted(merged.values(), key=lambda x: -x["score"])[:index.top_k]
    answer = summarize_hits(hits)
    latency = round(time.perf_counter() - t0, 3)
    return {"hits": hits, "answer": answer, "latency_s": latency}

# ---------------- Ablation----------------
rows = []
for agent_count in CFG["ablation"]["agent_counts"]:
    for temp in CFG["ablation"]["temperatures"]:
        h1_total = h3_total = 0
        sim_list, kw_list, lat_list = [], [], []
        for _, row in eval_df.iterrows():
            question = row["question"]; reference = row["reference"]
            out = run_agent_once(question, agent_count=agent_count, temperature=temp)
            h1_total += hit_at_k(out["hits"], reference, k=1)
            h3_total += hit_at_k(out["hits"], reference, k=3)
            sc = score_answer(out["answer"], reference)
            sim_list.append(sc["sim"]); kw_list.append(sc["kw_recall"]); lat_list.append(out["latency_s"])
        n = len(eval_df)
        rows.append({
            "agent_count": agent_count,
            "temperature": temp,
            "hit@1": round(h1_total / max(1,n), 3),
            "hit@3": round(h3_total / max(1,n), 3),
            "mean_sim": round(float(np.mean(sim_list)), 3),
            "kw_recall": round(float(np.mean(kw_list)), 3),
            "latency_s": round(float(np.mean(lat_list)), 3),
            "n_eval": n
        })

ablation_df = pd.DataFrame(rows)
ABL_CSV = OUT / "ablation_results.csv"
ablation_df.to_csv(ABL_CSV, index=False)
print(f"[ABL] Wrote ablation CSV → {ABL_CSV.resolve()}")
display(ablation_df)

# ---------------- Plot +report ----------------
fig = plt.figure(figsize=tuple(CFG["plots"]["figsize"]))
for temp, sub in ablation_df.groupby("temperature"):
    xs = sorted(sub["agent_count"].unique())
    ys = [sub[sub["agent_count"]==x]["mean_sim"].mean() for x in xs]
    plt.plot(xs, ys, marker="o", label=f"T={temp}")
plt.title("Ablation: mean similarity vs agent count (by temperature)")
plt.xlabel("Agent Count"); plt.ylabel("Mean Similarity")
plt.grid(True, linestyle="--", alpha=0.3); plt.legend()
PLOT_PATH = OUT / "ablation_plot.png"
fig.tight_layout(); fig.savefig(PLOT_PATH, dpi=150); plt.close(fig)
print("Plot saved:", PLOT_PATH.resolve())

report = {
    "title": "Week 7 — Track C Ablation (generated)",
    "config": CFG,
    "summary": (
        "We generated ablation results by running a small eval set through the agent pipeline. "
        "Team agents generally improve retrieval quality (Hit@k / similarity) at a modest latency increase. "
        "Temperature controls diversity; mid values (~0.6) balance quality and variety."
    ),
    "artifacts": {"csv": str(ABL_CSV), "plot": str(PLOT_PATH)}
}
(REPORT_PATH := OUT / "ablation_report.json").write_text(json.dumps(report, indent=2))
print("Report saved:", REPORT_PATH.resolve())


Detected files:
 - /content/corpus.csv EXISTS
 - /content/Week7_run_config.json EXISTS
[CFG] Loaded Week7_run_config.json
[RAG] Loaded corpus.csv with shape (3, 2)


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

[EVAL] Built synthetic eval set (n=3)
[ABL] Wrote ablation CSV → /content/trackC_outputs/ablation_results.csv


Unnamed: 0,agent_count,temperature,hit@1,hit@3,mean_sim,kw_recall,latency_s,n_eval
0,1,0.2,1.0,1.0,0.565,0.004,0.167,3
1,1,0.6,1.0,1.0,0.565,0.004,0.215,3
2,1,1.0,1.0,1.0,0.565,0.004,0.141,3
3,2,0.2,1.0,1.0,0.565,0.004,0.167,3
4,2,0.6,1.0,1.0,0.565,0.004,0.152,3
5,2,1.0,1.0,1.0,0.565,0.004,0.195,3
6,3,0.2,1.0,1.0,0.565,0.004,0.156,3
7,3,0.6,1.0,1.0,0.565,0.004,0.148,3
8,3,1.0,1.0,1.0,0.565,0.004,0.156,3


Plot saved: /content/trackC_outputs/ablation_plot.png
Report saved: /content/trackC_outputs/ablation_report.json


In [3]:
from pathlib import Path
import shutil

OUT = Path("/content/trackC_outputs")
print("Artifacts in", OUT.resolve())
for p in sorted(OUT.glob("*")):
    print(" -", p.name)

zip_path = shutil.make_archive("week7_trackC_outputs", "zip", str(OUT))
print("Zipped to:", zip_path)


Artifacts in /content/trackC_outputs
 - ablation_plot.png
 - ablation_report.json
 - ablation_results.csv
Zipped to: /content/week7_trackC_outputs.zip
