In [20]:
# Install necessary libraries
!pip install -q -U google-generativeai scikit-learn pandas

import os
from google.colab import userdata # If on Colab
from kaggle_secrets import UserSecretsClient # If on Kaggle

# Setup API Key
# 1. Get your key from aistudio.google.com
# 2. In Kaggle: Add-ons -> Secrets -> Add a new secret called 'GEMINI_API_KEY'
try:
    user_secrets = UserSecretsClient()
    os.environ["GEMINI_API_KEY"] = user_secrets.get_secret("GEMINI_API_KEY")
except:
    # Fallback for local/colab
    # os.environ["GEMINI_API_KEY"] = "YOUR_RAW_KEY_HERE" # Unsafe, better to use input()
    pass

print("Environment setup complete")

Environment setup complete


In [21]:
import google.generativeai as genai
import pandas as pd
import sklearn

print(f"GenAI Version: {genai.__version__}")
print(f"Pandas Version: {pd.__version__}")
print(f"Scikit-Learn Version: {sklearn.__version__}")
print("Environment is ready!")

GenAI Version: 0.8.5
Pandas Version: 2.3.3
Scikit-Learn Version: 1.7.2
Environment is ready!


## Data Generation

In [22]:
import os, random, csv, textwrap, math, shutil
from pathlib import Path

random.seed(7)

base = Path("/kaggle/working/qcb_evidence_agent/data")
app_dir = base / "appendices"
claims_dir = base / "claims"
base.mkdir(exist_ok=True, parents=True)
app_dir.mkdir(exist_ok=True, parents=True)
claims_dir.mkdir(exist_ok=True, parents=True)

themes = {
    "Summer School": {"prefix": "A"},
    "Mentoring": {"prefix": "E"},
    "Curriculum Modules": {"prefix": "C"},
    "Research Engagement": {"prefix": "R"},
    "Public Outreach": {"prefix": "P"},
    "Collaboration Analysis": {"prefix": "M"},
}

years = [2023, 2024, 2025]

appendix_catalog = []

def wrap(text, width=78):
    return "\n".join(textwrap.wrap(text, width=width))

def write_survey_appendix(idx, theme):
    """Quant-style appendix with multi-year table"""
    prefix = themes[theme]["prefix"]
    app_id = f"{prefix}{idx}"
    # choose 1‚Äì3 years
    num_years = random.choice([1, 2, 3])
    ys = sorted(random.sample(years, num_years))
    lines = []
    lines.append(f"APPENDIX {app_id}. {theme.upper()} SURVEY SUMMARY, {ys[0]}‚Äì{ys[-1]}")
    lines.append("")
    lines.append(f"Program Area: {theme}")
    lines.append(f"Years Covered: {', '.join(str(y) for y in ys)}")
    lines.append("This appendix summarizes synthetic survey results on satisfaction and confidence.")
    lines.append("")
    lines.append(f"Table {app_id}.1. Mean Satisfaction (1‚Äì5 scale) by Year")
    lines.append("")
    header = "+--------+-------+-------+------+-----------------+"
    lines.append(header)
    lines.append("| Year   |   N   | Mean  |  SD  |  % High (4‚Äì5)   |")
    lines.append(header)
    stats_for_years = []
    for y in ys:
        n = random.randint(40, 160)
        mean = round(random.uniform(3.3, 4.6), 2)
        sd = round(random.uniform(0.4, 0.9), 2)
        pct_high = round(random.uniform(55, 95), 1)
        lines.append(f"| {y:<6}|{n:6d} | {mean:4.2f} | {sd:4.2f} | {pct_high:7.1f}%        |")
        stats_for_years.append({"year": y, "n": n, "mean": mean, "pct_high": pct_high})
    lines.append(header)
    lines.append("")
    trend_note = "increased" if stats_for_years[-1]["mean"] >= stats_for_years[0]["mean"] else "decreased"
    lines.append("Interpretive Notes:")
    lines.append(
        wrap(
            f"Across {len(ys)} synthetic cohorts, average satisfaction {trend_note} from "
            f"{stats_for_years[0]['mean']:.2f} in {stats_for_years[0]['year']} to "
            f"{stats_for_years[-1]['mean']:.2f} in {stats_for_years[-1]['year']}. "
            "Values are internally consistent but represent illustrative data only."
        )
    )
    content = "\n".join(lines)
    (app_dir / f"Appendix_{app_id}_{theme.replace(' ', '')}_Survey.txt").write_text(content)
    appendix_catalog.append({
        "app_id": app_id,
        "theme": theme,
        "type": "survey",
        "years": ys,
        "stats": stats_for_years
    })

def write_transcript_appendix(idx, theme):
    prefix = themes[theme]["prefix"]
    app_id = f"{prefix}{idx}"
    year = random.choice(years)
    participants = ["PhD1", "PhD2", "UG1", "UG2", "PI1", "Postdoc1"]
    chosen = random.sample(participants, k=random.randint(3, 5))
    all_themes = [
        "Interdisciplinarity", "Instructor Clarity", "Pacing",
        "Lab‚ÄìComputation Connection", "Mentoring Support",
        "Collaboration Barriers", "Belonging", "Cognitive Overload"
    ]
    used_themes = random.sample(all_themes, k=random.randint(3, 5))
    lines = []
    lines.append(f"APPENDIX {app_id}. {theme} Focus Group Transcript, {year}")
    lines.append("")
    lines.append(f"Participants: {', '.join(chosen)}")
    lines.append("Length: synthetic 58 minutes")
    lines.append("")
    t = 0
    for _ in range(20):
        speaker = random.choice(chosen)
        t += random.randint(1, 4)
        mm = t // 60
        ss = t % 60
        ts = f"{mm:02d}:{ss:02d}"
        comment_theme = random.choice(used_themes)
        snippet = (f"{speaker}: I felt that the {theme.lower()} experience in {year} "
                   f"really shaped how I think about {comment_theme.lower()}.")
        lines.append(f"{ts} {snippet}")
    lines.append("")
    lines.append("Thematic Codes:")
    for th in used_themes:
        subcodes = [f"{th} ‚Äì Depth", f"{th} ‚Äì Barriers"]
        lines.append(f"[Theme: {th}]")
        for sc in subcodes:
            lines.append(f"[Code: {sc}]")
    content = "\n".join(lines)
    (app_dir / f"Appendix_{app_id}_{theme.replace(' ', '')}_Transcript.txt").write_text(content)
    appendix_catalog.append({
        "app_id": app_id,
        "theme": theme,
        "type": "transcript",
        "years": [year],
        "themes": used_themes
    })

def write_observation_appendix(idx, theme):
    prefix = themes[theme]["prefix"]
    app_id = f"{prefix}{idx}"
    year = random.choice(years)
    session_title = random.choice([
        "Mechanistic Modeling", "Network Dynamics", "Microscopy Lab",
        "Stochastic Processes", "Mentoring Roundtable"
    ])
    lines = []
    lines.append(f"APPENDIX {app_id}. Observation Notes ‚Äî {session_title}, {year}")
    lines.append("")
    lines.append("Observer: Synthetic Evaluator")
    lines.append("Mode: In-person, structured observation")
    lines.append("")
    t = 0
    for _ in range(12):
        t += random.randint(2, 5)
        mm = t // 60
        ss = t % 60
        ts = f"{mm:02d}:{ss:02d}"
        note = random.choice([
            "Instructor checks in with groups; several participants ask for clarification.",
            "Participants appear deeply engaged in small-group discussion.",
            "A subset of the room seems confused by the notation on the board.",
            "The instructor explicitly connects the computation to the experimental context.",
            "Multiple students volunteer to share their reasoning at the board."
        ])
        lines.append(f"{ts} {note}")
    lines.append("")
    clarity = random.randint(3, 5)
    pacing = random.randint(2, 5)
    engagement = random.randint(3, 5)
    lines.append("Rubric Scores (1‚Äì5):")
    lines.append(f"  Instructor Clarity: {clarity}")
    lines.append(f"  Pacing: {pacing}")
    lines.append(f"  Interactive Engagement: {engagement}")
    content = "\n".join(lines)
    (app_dir / f"Appendix_{app_id}_{theme.replace(' ', '')}_Observation.txt").write_text(content)
    appendix_catalog.append({
        "app_id": app_id,
        "theme": theme,
        "type": "observation",
        "years": [year],
        "rubric": {"clarity": clarity, "pacing": pacing, "engagement": engagement}
    })

def write_curriculum_appendix(idx):
    theme = "Curriculum Modules"
    prefix = themes[theme]["prefix"]
    app_id = f"{prefix}{idx}"
    year = random.choice(years)
    module_name = random.choice(["Modeling Module A", "Analysis Module B", "Imaging Module C"])
    lines = []
    lines.append(f"APPENDIX {app_id}. Curriculum Module Summary ‚Äî {module_name}, {year}")
    lines.append("")
    los = [
        "Interpret basic mechanistic models.",
        "Relate computational outputs to biological phenomena.",
        "Collaborate across disciplinary backgrounds."
    ]
    lines.append("Learning Outcomes:")
    for lo in los:
        lines.append(f"  - {lo}")
    lines.append("")
    header = "+--------+-------+-------+------+-----------------+"
    lines.append(f"Table {app_id}.1. Student Ratings of the Module (1‚Äì5 scale)")
    lines.append("")
    lines.append(header)
    lines.append("| Cohort |   N   | Mean  |  SD  |  % High (4‚Äì5)   |")
    lines.append(header)
    cohorts = ["2024 Pilot", "2025 Main"]
    stats = []
    for c in cohorts:
        n = random.randint(18, 60)
        mean = round(random.uniform(3.4, 4.7), 2)
        sd = round(random.uniform(0.4, 0.9), 2)
        pct_high = round(random.uniform(60, 96), 1)
        lines.append(f"| {c:<8}|{n:6d} | {mean:4.2f} | {sd:4.2f} | {pct_high:7.1f}%        |")
        stats.append({"cohort": c, "n": n, "mean": mean, "pct_high": pct_high})
    lines.append(header)
    content = "\n".join(lines)
    (app_dir / f"Appendix_{app_id}_CurriculumModule.txt").write_text(content)
    appendix_catalog.append({
        "app_id": app_id,
        "theme": theme,
        "type": "curriculum",
        "years": [year],
        "stats": stats
    })

def write_research_engagement_appendix(idx):
    theme = "Research Engagement"
    prefix = themes[theme]["prefix"]
    app_id = f"{prefix}{idx}"
    ys = sorted(random.sample(years, k=random.choice([2, 3])))
    lines = []
    lines.append(f"APPENDIX {app_id}. Research Engagement Hours, {ys[0]}‚Äì{ys[-1]}")
    lines.append("")
    header = "+--------+----------------------+--------+-----------+"
    lines.append(header)
    lines.append("| Year   | Activity Type        |  N     | Mean Hrs  |")
    lines.append(header)
    activity_types = ["Lab Work", "Mentor Meetings", "Group Analysis", "Workshops"]
    stats = []
    for y in ys:
        for a in activity_types:
            n = random.randint(15, 70)
            mean_hrs = round(random.uniform(4, 25), 1)
            lines.append(f"| {y:<6}| {a:<20} |{n:7d}| {mean_hrs:7.1f} |")
            stats.append({"year": y, "activity": a, "n": n, "mean_hrs": mean_hrs})
    lines.append(header)
    content = "\n".join(lines)
    (app_dir / f"Appendix_{app_id}_ResearchEngagement.txt").write_text(content)
    appendix_catalog.append({
        "app_id": app_id,
        "theme": theme,
        "type": "research",
        "years": ys,
        "stats": stats
    })

def write_collaboration_appendix(idx):
    theme = "Collaboration Analysis"
    prefix = themes[theme]["prefix"]
    app_id = f"{prefix}{idx}"
    year = random.choice(years)
    nodes = random.randint(25, 60)
    edges = random.randint(nodes, nodes * 4)
    density = round((2*edges)/(nodes*(nodes-1)), 3)
    avg_degree = round(2*edges/nodes, 2)
    modularity = round(random.uniform(0.2, 0.6), 2)
    communities = random.randint(3, 6)
    lines = []
    lines.append(f"APPENDIX {app_id}. Collaboration Network Analysis, {year}")
    lines.append("")
    lines.append(f"Network Size (nodes): {nodes}")
    lines.append(f"Number of Edges: {edges}")
    lines.append(f"Density: {density}")
    lines.append(f"Average Degree: {avg_degree}")
    lines.append(f"Modularity (Louvain): {modularity}")
    lines.append(f"Communities Detected: {communities}")
    lines.append("")
    lines.append("Interpretive Summary:")
    lines.append(wrap(
        "This synthetic collaboration network shows moderate modularity, indicating several "
        "tightly connected sub-communities alongside cross-cutting ties. Values are illustrative."
    ))
    lines.append("")
    # small adjacency matrix snippet
    k = min(8, nodes)
    lines.append("Adjacency Matrix Snippet (first few nodes):")
    header = "    " + " ".join(f"{i:02d}" for i in range(1, k+1))
    lines.append(header)
    for i in range(1, k+1):
        row = [str(random.choice([0, 1])) for _ in range(k)]
        lines.append(f"{i:02d}  " + " ".join(row))
    content = "\n".join(lines)
    (app_dir / f"Appendix_{app_id}_CollaborationNetwork.txt").write_text(content)
    appendix_catalog.append({
        "app_id": app_id,
        "theme": theme,
        "type": "collaboration",
        "years": [year],
        "network": {
            "nodes": nodes,
            "edges": edges,
            "density": density,
            "avg_degree": avg_degree,
            "modularity": modularity,
            "communities": communities
        }
    })

# Generate appendices according to a richer distribution
# approx counts per domain
counts_plan = {
    "survey": 18,
    "transcript": 15,
    "observation": 10,
    "curriculum": 10,
    "research": 10,
    "collaboration": 12
}

# For surveys: spread across core themes
survey_themes = ["Summer School", "Mentoring", "Curriculum Modules", "Research Engagement", "Public Outreach"]
si = {t: 0 for t in survey_themes}
for _ in range(counts_plan["survey"]):
    t = random.choice(survey_themes)
    si[t] += 1
    write_survey_appendix(si[t], t)

# Transcripts: mostly Summer, Mentoring, Curriculum
transcript_themes = ["Summer School", "Mentoring", "Curriculum Modules"]
ti = {t: 0 for t in transcript_themes}
for _ in range(counts_plan["transcript"]):
    t = random.choice(transcript_themes)
    ti[t] += 1
    write_transcript_appendix(ti[t], t)

# Observations: mix of Summer, Curriculum, Public Outreach
obs_themes = ["Summer School", "Curriculum Modules", "Public Outreach"]
oi = {t: 0 for t in obs_themes}
for _ in range(counts_plan["observation"]):
    t = random.choice(obs_themes)
    oi[t] += 1
    write_observation_appendix(oi[t], t)

# Curriculum module summaries
for i in range(1, counts_plan["curriculum"] + 1):
    write_curriculum_appendix(i)

# Research engagement
for i in range(1, counts_plan["research"] + 1):
    write_research_engagement_appendix(i)

# Collaboration analysis
for i in range(1, counts_plan["collaboration"] + 1):
    write_collaboration_appendix(i)

# Ensure we have about 75 appendices
len_appendices = len(appendix_catalog)

# Generate claims
num_claims = 600
incorrect_ratio = 0.20
num_incorrect = int(num_claims * incorrect_ratio)

claim_rows = []
gt_rows = []

def make_claim(app_meta, claim_id, correct: bool):
    theme = app_meta["theme"]
    app_id = app_meta["app_id"]
    section_label = f"Section 4.{random.randint(1,5)}"
    error_type = ""
    claim_text = ""
    detail = ""

    if app_meta["type"] == "survey":
        ystat = random.choice(app_meta["stats"])
        y = ystat["year"]
        if correct:
            claim_text = (
                f"In {y}, participants in the {theme.lower()} program reported an average "
                f"satisfaction rating of approximately {ystat['mean']:.2f} on a 1‚Äì5 scale, "
                f"with about {ystat['pct_high']:.1f}% selecting high values (4‚Äì5), as shown in Appendix {app_id}."
            )
        else:
            wrong_mean = round(ystat["mean"] + random.choice([-0.8, -0.5, 0.5, 0.8]), 2)
            claim_text = (
                f"In {y}, satisfaction in the {theme.lower()} program dropped to around "
                f"{wrong_mean:.2f} with fewer than 40% of participants choosing high values, "
                f"according to Appendix {app_id}."
            )
            error_type = "misreported_survey_stat"
        detail = f"Table {app_id}.1"

    elif app_meta["type"] == "transcript":
        year = app_meta["years"][0]
        t_used = app_meta["themes"]
        all_possible = [
            "Interdisciplinarity", "Instructor Clarity", "Pacing",
            "Lab‚ÄìComputation Connection", "Mentoring Support",
            "Collaboration Barriers", "Belonging", "Cognitive Overload"
        ]
        if correct:
            th = random.choice(t_used)
            claim_text = (
                f"Focus group comments in Appendix {app_id} (from {year}) repeatedly highlight "
                f"{th.lower()} as a salient theme in participants' experiences of the "
                f"{theme.lower()} activities."
            )
        else:
            th = random.choice([x for x in all_possible if x not in t_used] or all_possible)
            claim_text = (
                f"Appendix {app_id} shows that {th.lower()} was the dominant recurring theme in "
                f"participant discussions, with little attention paid to other issues."
            )
            error_type = "qualitative_theme_mismatch"
        detail = "Thematic codes section"

    elif app_meta["type"] == "observation":
        year = app_meta["years"][0]
        r = app_meta["rubric"]
        if correct:
            claim_text = (
                f"Observation notes in Appendix {app_id} for {year} indicate relatively strong "
                f"instructor clarity (rated {r['clarity']}/5) and high interactive engagement "
                f"({r['engagement']}/5), with pacing rated at {r['pacing']}/5."
            )
        else:
            claim_text = (
                f"Appendix {app_id} documents very low engagement scores (1/5) and uniformly "
                f"poor clarity ratings for the observed {theme.lower()} session in {year}."
            )
            error_type = "reversed_observation_ratings"
        detail = "Rubric scores"

    elif app_meta["type"] == "curriculum":
        year = app_meta["years"][0]
        s = random.choice(app_meta["stats"])
        if correct:
            claim_text = (
                f"For the {s['cohort']} cohort of the curriculum module documented in Appendix {app_id}, "
                f"mean ratings were approximately {s['mean']:.2f} with about {s['pct_high']:.1f}% of students "
                f"selecting high satisfaction (4‚Äì5)."
            )
        else:
            claim_text = (
                f"Appendix {app_id} shows that fewer than 30% of students in the {s['cohort']} cohort "
                f"rated the module positively, with mean scores below 3.0."
            )
            error_type = "fabricated_low_module_rating"
        detail = f"Table {app_id}.1"

    elif app_meta["type"] == "research":
        s = random.choice(app_meta["stats"])
        if correct:
            claim_text = (
                f"In {s['year']}, participants in the {theme.lower()} logs spent on average "
                f"{s['mean_hrs']:.1f} hours on {s['activity'].lower()}, based on the synthetic data in Appendix {app_id}."
            )
        else:
            claim_text = (
                f"Appendix {app_id} indicates that participants devoted less than 2 hours on average "
                f"to {s['activity'].lower()} across all recorded years."
            )
            error_type = "understated_research_hours"
        detail = f"{s['year']} ‚Äì {s['activity']}"

    elif app_meta["type"] == "collaboration":
        net = app_meta["network"]
        year = app_meta["years"][0]
        if correct:
            claim_text = (
                f"The {year} collaboration network in Appendix {app_id} includes {net['nodes']} participants "
                f"and {net['edges']} edges, with an average degree of about {net['avg_degree']:.2f} and "
                f"modularity near {net['modularity']:.2f}, indicating several semi-independent communities."
            )
        else:
            claim_text = (
                f"Appendix {app_id} describes a very sparse {year} collaboration network with fewer than "
                f"10 edges and no discernible community structure (modularity near 0.0)."
            )
            error_type = "mischaracterized_network_structure"
        detail = "Network summary"

    claim_rows.append({
        "claim_id": claim_id,
        "section_label": section_label,
        "theme": theme,
        "claim_text": claim_text,
        "is_correct": str(correct),
        "source_appendices": app_id,
        "source_details": detail,
        "error_type": error_type,
    })
    gt_rows.append({
        "claim_id": claim_id,
        "source_appendices": app_id,
        "source_tables": detail,
        "source_lines": app_meta["type"],
        "is_correct": str(correct),
        "error_type": error_type,
    })

for i in range(num_claims):
    cid = f"C{i+1:03d}"
    correct = i >= num_incorrect  # first 20% incorrect
    meta = random.choice(appendix_catalog)
    make_claim(meta, cid, correct)

# Write claims CSVs
with open(claims_dir / "claims_catalog.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=claim_rows[0].keys())
    writer.writeheader()
    writer.writerows(claim_rows)

with open(claims_dir / "claims_ground_truth.csv", "w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=gt_rows[0].keys())
    writer.writeheader()
    writer.writerows(gt_rows)

# Create a README description
readme = """
QCB-Style Synthetic Evaluation Corpus
=====================================

This folder contains a synthetic, high-fidelity corpus designed to mimic a mixed-methods
STEM center evaluation environment (similar in spirit to QCB), without using any real data.

Structure
---------
- appendices/: 75 text-based appendices that look like exported PDF content.
  These include:
  * Survey-style summaries with multi-year tables and satisfaction metrics.
  * Focus group and interview transcripts with full conversational turns, coded themes,
    and subcodes.
  * Lecture and workshop observation notes with timestamps and rubric scores.
  * Curriculum module summaries with learning outcomes and module ratings.
  * Research engagement logs with hours by activity type and year.
  * Collaboration network summaries with graph statistics (nodes, edges, density,
    modularity, communities) and small adjacency matrix snippets.

- claims/:
  * claims_catalog.csv ‚Äì 600 synthetic claims that might appear in an evaluation
    report. Around 20% are intentionally incorrect or misleading.
  * claims_ground_truth.csv ‚Äì a private mapping file indicating, for each claim:
    - whether it is correct,
    - which appendix it should be traced to,
    - what kind of error is present if incorrect.

Usage
-----
This corpus is intended for building and evaluating an "Evidence Tracing & Explanation Agent"
capstone project. An agent can:
- Take a claim from claims_catalog.csv.
- Retrieve and read the relevant appendix (and possibly others).
- Decide whether the claim is supported by the evidence.
- Produce a natural-language explanation pointing to the appropriate appendix and section.
- Optionally, flag inconsistencies or missing evidence.

All numbers, participants, and narratives are fabricated, but internally consistent
within each appendix. Any resemblance to real programs or data is coincidental.
"""
(base / "README_SYNTHETIC_DATA.txt").write_text(readme.strip() + "\n")

# Zip it
zip_path = shutil.make_archive("qcb_capstone_data", "zip", base_dir=base)
zip_path

'qcb_capstone_data.zip'

### Project Structure

In [23]:
import os

base_dir = "/kaggle/working/qcb_evidence_agent"
dirs = [
    f"{base_dir}/src",
    f"{base_dir}/data/appendices",
    f"{base_dir}/data/claims"
]

for d in dirs:
    os.makedirs(d, exist_ok=True)
    print(f"Created: {d}")

# Create __init__.py so python can import from src
with open(f"{base_dir}/src/__init__.py", "w") as f:
    f.write("")

Created: /kaggle/working/qcb_evidence_agent/src
Created: /kaggle/working/qcb_evidence_agent/data/appendices
Created: /kaggle/working/qcb_evidence_agent/data/claims


### Implementing the RAG Layer

In [24]:
%%writefile /kaggle/working/qcb_evidence_agent/src/data_index.py
import os
from pathlib import Path
from typing import List, Dict, Any
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

class AppendixIndex:
    def __init__(self, appendices_dir: str):
        self.appendices_dir = Path(appendices_dir)
        self.filepaths: List[Path] = []
        self.texts: List[str] = []
        self.appendix_ids: List[str] = []
        self.vectorizer: TfidfVectorizer | None = None
        self.tfidf_matrix = None

    def load_appendices(self) -> None:
        self.filepaths = sorted(self.appendices_dir.glob("*.txt"))
        self.texts = []
        self.appendix_ids = []

        for fp in self.filepaths:
            text = fp.read_text(encoding="utf-8", errors="ignore")
            self.texts.append(text)
            stem = fp.stem
            parts = stem.split("_")
            app_id = parts[1] if len(parts) > 1 else stem
            self.appendix_ids.append(app_id)

    def build_index(self) -> None:
        self.vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
        self.tfidf_matrix = self.vectorizer.fit_transform(self.texts)

    def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        if self.vectorizer is None:
            raise RuntimeError("Index not built.")

        q_vec = self.vectorizer.transform([query])
        scores = (self.tfidf_matrix @ q_vec.T).toarray().ravel()
        top_idx = np.argsort(scores)[::-1][:top_k]

        results = []
        for idx in top_idx:
            if scores[idx] <= 0: continue
            full_text = self.texts[idx]
            results.append({
                "appendix_id": self.appendix_ids[idx],
                "filename": str(self.filepaths[idx].name),
                "score": float(scores[idx]),
                "snippet": full_text[:800], # Increased snippet size for context
                "full_text": full_text
            })
        return results

Overwriting /kaggle/working/qcb_evidence_agent/src/data_index.py


In [25]:
%%writefile /kaggle/working/qcb_evidence_agent/src/tools.py
from typing import List, Dict, Any
from .data_index import AppendixIndex

class AppendixSearchTool:
    def __init__(self, appendices_dir: str):
        self.index = AppendixIndex(appendices_dir)
        self.index.load_appendices()
        self.index.build_index()

    def search_appendices(self, query_text: str, top_k: int = 5) -> List[Dict[str, Any]]:
        return self.index.search(query_text, top_k=top_k)

Overwriting /kaggle/working/qcb_evidence_agent/src/tools.py


### Implement the Agents

In [26]:
%%writefile /kaggle/working/qcb_evidence_agent/src/agents.py
import os
import json
import re
import time
from typing import Any, Dict, List
import google.generativeai as genai
from .tools import AppendixSearchTool

def get_gemini_model(model_name: str = "gemini-2.0-flash") -> genai.GenerativeModel:
    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        print("üö® CRITICAL WARNING: GEMINI_API_KEY is missing from environment variables!")
    genai.configure(api_key=api_key)
    return genai.GenerativeModel(
        model_name, 
        generation_config={"response_mime_type": "application/json"}
    )

class ClaimUnderstandingAgent:
    def __init__(self, model):
        self.model = model

    def understand_claim(self, claim_id: str, claim_text: str) -> Dict[str, Any]:
        prompt = f'''Analyze this claim. Extract JSON. Claim: "{claim_text}"
        Output: {{ "claim_id": "{claim_id}", "claim_text": "{claim_text}", "key_concepts": [list of strings] }}'''
        
        try:
            # Retry logic for rate limits
            try:
                resp = self.model.generate_content(prompt)
            except Exception as e:
                if "429" in str(e):
                    print("‚ö†Ô∏è Rate limit hit. Waiting 5 seconds...")
                    time.sleep(5)
                    resp = self.model.generate_content(prompt)
                else:
                    raise e

            parsed = json.loads(resp.text)
            if isinstance(parsed, list): parsed = parsed[0]
            return parsed
        except Exception as e:
            print(f"‚ùå ClaimUnderstandingAgent Error: {e}")
            return {"claim_id": claim_id, "claim_text": claim_text, "key_concepts": []}

class EvidenceRetrievalAgent:
    def __init__(self, search_tool):
        self.search_tool = search_tool

    def retrieve_evidence(self, parsed_claim: Dict[str, Any]) -> Dict[str, Any]:
        text = parsed_claim.get("claim_text", "")
        concepts = parsed_claim.get("key_concepts", [])
        if isinstance(concepts, list): concepts = " ".join(str(c) for c in concepts)
        query = f"{text} {concepts}"
        
        results = self.search_tool.search_appendices(query, top_k=4)
        if not results:
            print(f"‚ö†Ô∏è Warning: No evidence found for query: '{query[:50]}...'")
            
        return {"parsed_claim": parsed_claim, "candidate_evidence": results}

class EvidenceAnalysisAgent:
    def __init__(self, model):
        self.model = model

    def analyze(self, retrieval_bundle: Dict[str, Any]) -> Dict[str, Any]:
        claim = retrieval_bundle["parsed_claim"]
        evidence = retrieval_bundle["candidate_evidence"]
        
        # Check if we actually have evidence
        if not evidence:
            return {"verdict": "unsupported", "explanation": "No relevant evidence found in the appendices.", "evidence_appendix_ids": []}

        ev_text = "".join([f"\n[Appendix {e['appendix_id']}]: {e['snippet']}..." for e in evidence])
        
        prompt = f'''Verify if claim is supported by evidence.
        Claim: "{claim.get('claim_text','')}"
        Evidence: {ev_text}
        Output JSON: {{ "verdict": "supported"|"unsupported"|"partially_supported", "evidence_appendix_ids": [], "explanation": "" }}'''
        
        try:
            try:
                resp = self.model.generate_content(prompt)
            except Exception as e:
                if "429" in str(e):
                    print("‚ö†Ô∏è Rate limit hit (Analysis). Waiting 5 seconds...")
                    time.sleep(5)
                    resp = self.model.generate_content(prompt)
                else:
                    raise e
                    
            parsed = json.loads(resp.text)
            if isinstance(parsed, list): parsed = parsed[0]
            return parsed
        except Exception as e:
            print(f"‚ùå EvidenceAnalysisAgent Error: {e}")
            return {"verdict": "error", "explanation": f"System Error: {str(e)}", "evidence_appendix_ids": []}

Overwriting /kaggle/working/qcb_evidence_agent/src/agents.py


### Orchestrator

In [27]:
%%writefile /kaggle/working/qcb_evidence_agent/src/orchestrator.py
import pandas as pd
from .agents import get_gemini_model, ClaimUnderstandingAgent, EvidenceRetrievalAgent, EvidenceAnalysisAgent
from .tools import AppendixSearchTool

class EvidenceTracingOrchestrator:
    def __init__(self, data_dir: str):
        self.data_dir = data_dir
        self.model = get_gemini_model()
        
        # Initialize Tools
        self.search_tool = AppendixSearchTool(f"{data_dir}/data/appendices")
        
        # Initialize Agents
        self.understander = ClaimUnderstandingAgent(self.model)
        self.retriever = EvidenceRetrievalAgent(self.search_tool)
        self.analyzer = EvidenceAnalysisAgent(self.model)
        
        # Load claims
        self.claims_df = pd.read_csv(f"{data_dir}/data/claims/claims_catalog.csv")

    def review_claim(self, claim_id: str):
        # 0. Get Data
        row = self.claims_df[self.claims_df["claim_id"] == claim_id].iloc[0]
        
        # 1. Understand
        parsed = self.understander.understand_claim(claim_id, row["claim_text"])
        
        # 2. Retrieve
        context = self.retriever.retrieve_evidence(parsed)
        
        # 3. Analyze
        result = self.analyzer.analyze(context)
        
        return {
            "claim_id": claim_id,
            "original_text": row["claim_text"],
            "verdict": result["verdict"],
            "explanation": result["explanation"],
            "cited_appendices": result["evidence_appendix_ids"]
        }

Overwriting /kaggle/working/qcb_evidence_agent/src/orchestrator.py


### Final Execution

In [28]:
import os
from kaggle_secrets import UserSecretsClient

# Try to load from Kaggle Secrets first
try:
    user_secrets = UserSecretsClient()
    # Ensure you added the secret named 'GEMINI_API_KEY' in the Add-ons menu!
    api_key = user_secrets.get_secret("GEMINI_API_KEY") 
    os.environ["GEMINI_API_KEY"] = api_key
    print("‚úÖ API Key successfully loaded from Kaggle Secrets.")
except Exception as e:
    # Fallback: If secrets aren't set up, paste it manually
    print("‚ö†Ô∏è Could not find Kaggle Secret 'GEMINI_API_KEY'.")
    print("Please paste your API Key below:")
    os.environ["GEMINI_API_KEY"] = input("Enter your Gemini API Key: ").strip()

‚úÖ API Key successfully loaded from Kaggle Secrets.


In [29]:
import sys
# Add our source code to python path
sys.path.append("/kaggle/working/qcb_evidence_agent")

from src.orchestrator import EvidenceTracingOrchestrator

# Initialize the system
print("Initializing Orchestrator (Building Index)...")
orchestrator = EvidenceTracingOrchestrator("/kaggle/working/qcb_evidence_agent")

# Pick a claim to test
target_claim = "C075"

print(f"\n--- Processing Claim {target_claim} ---\n")
result = orchestrator.review_claim(target_claim)

# Display Results
print(f"Claim: {result['original_text']}")
print(f"Verdict: {result['verdict'].upper()}")
print(f"Explanation: {result['explanation']}")
print(f"Sources: {result['cited_appendices']}")

Initializing Orchestrator (Building Index)...

--- Processing Claim C075 ---

Claim: In 2023, satisfaction in the research engagement program dropped to around 3.95 with fewer than 40% of participants choosing high values, according to Appendix R3.
Verdict: UNSUPPORTED
Explanation: Appendix R3 shows the mean satisfaction in 2023 was 4.45 and 61.0% of participants chose high values (4-5). This contradicts the claim that satisfaction dropped to 3.95 and fewer than 40% chose high values.
Sources: ['R3']


### Evaluation 

In [30]:
%%writefile /kaggle/working/qcb_evidence_agent/src/evaluation.py
import pandas as pd
import time
from typing import Dict, Any, List
from sklearn.metrics import classification_report, accuracy_score
from .orchestrator import EvidenceTracingOrchestrator

def normalize_verdict(v: str) -> str:
    v = str(v).lower().strip()
    if v in ["supported", "true", "yes"]:
        return "supported"
    if v in ["unsupported", "false", "no"]:
        return "unsupported"
    return "partially_supported"

def normalize_gt(gt: str) -> str:
    return "supported" if str(gt) == "True" else "unsupported"

def run_batch_eval(data_dir: str, max_claims: int = 20) -> pd.DataFrame:
    print(f"Starting evaluation on {max_claims} claims...")
    
    orch = EvidenceTracingOrchestrator(data_dir)
    
    claims_path = f"{data_dir}/data/claims/claims_catalog.csv"
    gt_path = f"{data_dir}/data/claims/claims_ground_truth.csv"
    
    df_claims = pd.read_csv(claims_path)
    df_gt = pd.read_csv(gt_path)
    
    # 1. Prepare Ground Truth
    gt_subset = df_gt[['claim_id', 'is_correct']].copy()
    gt_subset = gt_subset.rename(columns={'is_correct': 'gt_is_correct'})
    
    # 2. Merge
    df_merged = pd.merge(df_claims, gt_subset, on='claim_id', how='inner')
    
    # 3. Sample
    df_sample = df_merged.sample(n=min(max_claims, len(df_merged)), random_state=42)
    
    results = []
    
    for idx, row in df_sample.iterrows():
        cid = row['claim_id']
        text = row['claim_text']
        gt_raw = row['gt_is_correct']
        
        try:
            # Run Agent
            prediction = orch.review_claim(cid)
            pred_verdict = prediction.get('verdict', 'error')
            
            y_pred = normalize_verdict(pred_verdict)
            y_true = normalize_gt(gt_raw)
            
            results.append({
                "claim_id": cid,
                "claim_text": text,
                "ground_truth_raw": gt_raw,
                "agent_verdict_raw": pred_verdict,
                "y_true": y_true,
                "y_pred": y_pred,
                "explanation": prediction.get('explanation', '')
            })
            print(f"Processed {cid}: GT={y_true} | Pred={y_pred}")
            
            # --- FIX: SLEEP TO RESPECT RATE LIMITS ---
            # 15 requests/min = 1 request every 4 seconds. 
            # We do ~2 requests per claim, so we need to wait ~8 seconds.
            time.sleep(10) 
            
        except Exception as e:
            print(f"Error processing {cid}: {e}")
            time.sleep(10) # Wait even on error to let quota reset
            
    return pd.DataFrame(results)

def print_report(df_results: pd.DataFrame):
    print("\n" + "="*40)
    print("EVALUATION REPORT")
    print("="*40)
    
    if df_results.empty:
        print("No results to show.")
        return

    y_true = df_results['y_true']
    y_pred = df_results['y_pred']
    
    valid_mask = y_pred != 'error'
    
    if valid_mask.sum() == 0:
        print("No valid predictions found.")
        return

    acc = accuracy_score(y_true[valid_mask], y_pred[valid_mask])
    print(f"Accuracy: {acc:.2%}")
    print("\nClassification Report:")
    print(classification_report(y_true[valid_mask], y_pred[valid_mask], zero_division=0))

Overwriting /kaggle/working/qcb_evidence_agent/src/evaluation.py


In [31]:
import sys
import importlib
sys.path.append("/kaggle/working/qcb_evidence_agent")

# Force Reload
import src.evaluation
importlib.reload(src.evaluation)

from src.evaluation import run_batch_eval, print_report

# Run again
df_results = run_batch_eval("/kaggle/working/qcb_evidence_agent", max_claims=20)
print_report(df_results)

Starting evaluation on 20 claims...
Processed C111: GT=unsupported | Pred=unsupported
Processed C420: GT=supported | Pred=supported
Processed C566: GT=supported | Pred=supported
Processed C078: GT=unsupported | Pred=unsupported
Processed C182: GT=supported | Pred=supported
Processed C285: GT=supported | Pred=supported
Processed C011: GT=unsupported | Pred=unsupported
Processed C470: GT=supported | Pred=supported
Processed C079: GT=unsupported | Pred=unsupported
Processed C350: GT=supported | Pred=supported
Processed C056: GT=unsupported | Pred=unsupported
Processed C119: GT=unsupported | Pred=unsupported
Processed C110: GT=unsupported | Pred=unsupported
Processed C589: GT=supported | Pred=supported
Processed C370: GT=supported | Pred=supported
Processed C235: GT=supported | Pred=unsupported
Processed C031: GT=unsupported | Pred=unsupported
Processed C213: GT=supported | Pred=supported
Processed C185: GT=supported | Pred=supported
Processed C087: GT=unsupported | Pred=unsupported

EVALU

### Refining?

### Personalized Claims Chatbot UI

In [32]:
%%writefile /kaggle/working/qcb_evidence_agent/src/orchestrator_2.py
from typing import Dict, Any
from .agents import get_gemini_model, ClaimUnderstandingAgent, EvidenceRetrievalAgent, EvidenceAnalysisAgent
from .tools import AppendixSearchTool

class ChatOrchestrator:
    def __init__(self, data_dir: str):
        self.data_dir = data_dir
        self.model = get_gemini_model()
        
        # Initialize Tools (Shared with the other orchestrator)
        self.search_tool = AppendixSearchTool(f"{data_dir}/data/appendices")
        
        # Initialize Agents
        self.understander = ClaimUnderstandingAgent(self.model)
        self.retriever = EvidenceRetrievalAgent(self.search_tool)
        self.analyzer = EvidenceAnalysisAgent(self.model)

    def review_freeform_claim(self, claim_text: str) -> Dict[str, Any]:
        """
        Takes raw user input, runs the agent pipeline, and returns the result.
        No CSV lookup required.
        """
        # 1. Understand (Use 'user_query' as a dummy ID)
        parsed = self.understander.understand_claim("user_query", claim_text)
        
        # 2. Retrieve
        context = self.retriever.retrieve_evidence(parsed)
        
        # 3. Analyze
        result = self.analyzer.analyze(context)
        
        # 4. Return structured response for the UI
        return {
            "original_text": claim_text,
            "verdict": result.get("verdict", "error"),
            "explanation": result.get("explanation", "No explanation provided."),
            "cited_appendices": result.get("evidence_appendix_ids", []),
            "evidence_snippets": context.get("candidate_evidence", []) 
        }

Overwriting /kaggle/working/qcb_evidence_agent/src/orchestrator_2.py


In [33]:
%%writefile /kaggle/working/qcb_evidence_agent/app.py
import streamlit as st
import os
import sys

# Ensure src is in pythonpath
sys.path.append(os.getcwd())

# Import the new Chat Orchestrator
from src.orchestrator_2 import ChatOrchestrator

# Config
DATA_DIR = "./qcb_evidence_agent" 
st.set_page_config(page_title="Evidence Agent Chat", layout="wide")

@st.cache_resource
def get_agent():
    # Load the chat-specific agent
    return ChatOrchestrator(DATA_DIR)

st.title("üïµÔ∏è‚Äç‚ôÄÔ∏è Evidence Tracing Chatbot")
st.markdown("Type any claim below, and I will check the synthetic evidence base for you.")

# Input Area
claim_text = st.text_area("Enter your claim:", height=100, placeholder="e.g., Student confidence in coding tasks improved significantly in 2024.")

if st.button("Verify Claim"):
    if not claim_text:
        st.warning("Please enter some text first.")
    else:
        agent = get_agent()
        
        with st.spinner("Analyzing evidence..."):
            # Call the freeform method
            result = agent.review_freeform_claim(claim_text)
        
        # 1. Show Verdict
        verdict = result['verdict'].upper()
        
        # Dynamic color coding
        if "SUPPORTED" in verdict and "PARTIALLY" not in verdict:
            color = "green"
        elif "UNSUPPORTED" in verdict:
            color = "red"
        else:
            color = "orange" # Partial or Error
            
        st.markdown(f"### Verdict: :{color}[{verdict}]")
        
        # 2. Show Explanation
        st.markdown(f"**Analysis:** {result['explanation']}")
        
        # 3. Show Cited Evidence
        if result['evidence_snippets']:
            st.divider()
            st.subheader("üìö Evidence Retrieved")
            for ev in result['evidence_snippets']:
                with st.expander(f"Appendix {ev['appendix_id']} (Relevance: {ev['score']:.2f})"):
                    st.info(f"Source File: {ev['filename']}")
                    st.text(ev['snippet'])
        else:
            st.info("No relevant evidence found in the appendices.")

Overwriting /kaggle/working/qcb_evidence_agent/app.py


In [34]:
import google.generativeai as genai
import os

# Ensure key is loaded
if "GEMINI_API_KEY" not in os.environ:
    from kaggle_secrets import UserSecretsClient
    os.environ["GEMINI_API_KEY"] = UserSecretsClient().get_secret("GEMINI_API_KEY")

genai.configure(api_key=os.environ["GEMINI_API_KEY"])

print("Checking available models...")
try:
    for m in genai.list_models():
        if 'generateContent' in m.supported_generation_methods:
            print(f"- {m.name}")
except Exception as e:
    print(f"Error listing models: {e}")

Checking available models...
- models/gemini-2.5-pro-preview-03-25
- models/gemini-2.5-flash
- models/gemini-2.5-pro-preview-05-06
- models/gemini-2.5-pro-preview-06-05
- models/gemini-2.5-pro
- models/gemini-2.0-flash-exp
- models/gemini-2.0-flash
- models/gemini-2.0-flash-001
- models/gemini-2.0-flash-exp-image-generation
- models/gemini-2.0-flash-lite-001
- models/gemini-2.0-flash-lite
- models/gemini-2.0-flash-lite-preview-02-05
- models/gemini-2.0-flash-lite-preview
- models/gemini-2.0-pro-exp
- models/gemini-2.0-pro-exp-02-05
- models/gemini-exp-1206
- models/gemini-2.0-flash-thinking-exp-01-21
- models/gemini-2.0-flash-thinking-exp
- models/gemini-2.0-flash-thinking-exp-1219
- models/gemini-2.5-flash-preview-tts
- models/gemini-2.5-pro-preview-tts
- models/learnlm-2.0-flash-experimental
- models/gemma-3-1b-it
- models/gemma-3-4b-it
- models/gemma-3-12b-it
- models/gemma-3-27b-it
- models/gemma-3n-e4b-it
- models/gemma-3n-e2b-it
- models/gemini-flash-latest
- models/gemini-flash-

In [35]:
### Text Only Checking (Initial Pass)
import sys
sys.path.append("/kaggle/working/qcb_evidence_agent")

from src.orchestrator_2 import ChatOrchestrator

print("Initializing Chat Agent...")
chat_agent = ChatOrchestrator("/kaggle/working/qcb_evidence_agent")

# Test a custom query
user_query = "In the 2023 mechanistic modeling session, the instructor frequently checked in with students and explicitly connected the computation to the experimental context, demonstrating consistently high instructor clarity"
print(f"\nUser Query: {user_query}")
print("-" * 40)
result = chat_agent.review_freeform_claim(user_query)

print(f"VERDICT: {result['verdict'].upper()}")
print(f"REASONING: {result['explanation']}")

user_query_2 = "In the 2023 summer school, participants consistently described a strong sense of improved belonging throughout the program"
print(f"\nUser Query: {user_query_2}")
print("-" * 40)
result_2 = chat_agent.review_freeform_claim(user_query_2)

print(f"VERDICT: {result_2['verdict'].upper()}")
print(f"REASONING: {result_2['explanation']}")

Initializing Chat Agent...

User Query: In the 2023 mechanistic modeling session, the instructor frequently checked in with students and explicitly connected the computation to the experimental context, demonstrating consistently high instructor clarity
----------------------------------------
VERDICT: SUPPORTED
REASONING: The claim states that the instructor frequently checked in with students and explicitly connected the computation to the experimental context. The provided evidence from Appendices A1, A3, A5, and P2 all contain observations noting that the instructor checked in with groups, and participants asked for clarification, and that the instructor explicitly connected the computation to the experimental context. This supports the claim.

User Query: In the 2023 summer school, participants consistently described a strong sense of improved belonging throughout the program
----------------------------------------
VERDICT: UNSUPPORTED
REASONING: The provided transcripts do not c

In [36]:
import os
import shutil
import zipfile

# 1. Create the project skeleton
base_dir = "/kaggle/working/qcb_evidence_agent"
src_dir = f"{base_dir}/src"
data_dir = f"{base_dir}/data"

os.makedirs(src_dir, exist_ok=True)
os.makedirs(data_dir, exist_ok=True)

# 2. Unzip your data (Assuming your zip is named 'qcb_capstone_data.zip' and in /kaggle/working/)
# If you uploaded it via the "Add Input" menu, it might be in /kaggle/input/
zip_path = "/kaggle/working/qcb_capstone_data.zip" 

if os.path.exists(zip_path):
    print("Unzipping data...")
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(data_dir)
    print("Data unzipped successfully.")
else:
    print(f"Warning: Could not find {zip_path}. Please ensure the file is uploaded.")
    # Check if we need to move files if the zip created a subfolder
    # (Adjust this logic if your zip structure is different)

Unzipping data...
Data unzipped successfully.


In [37]:
import os

# --- 1. Create __init__.py ---
with open("/kaggle/working/qcb_evidence_agent/src/__init__.py", "w") as f:
    f.write("")

# --- 2. Create data_index.py (Search Engine) ---
data_index_code = """
import os
from pathlib import Path
from typing import List, Dict, Any
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

class AppendixIndex:
    def __init__(self, appendices_dir: str):
        self.appendices_dir = Path(appendices_dir)
        self.filepaths: List[Path] = []
        self.texts: List[str] = []
        self.appendix_ids: List[str] = []
        self.vectorizer: TfidfVectorizer | None = None
        self.tfidf_matrix = None

    def load_appendices(self) -> None:
        # Search recursively in case of nested folders
        self.filepaths = sorted(list(self.appendices_dir.glob("**/*.txt")))
        self.texts = []
        self.appendix_ids = []

        for fp in self.filepaths:
            text = fp.read_text(encoding="utf-8", errors="ignore")
            self.texts.append(text)
            stem = fp.stem
            parts = stem.split("_")
            app_id = parts[1] if len(parts) > 1 else stem
            self.appendix_ids.append(app_id)
            
        if not self.texts:
            print(f"Warning: No .txt files found in {self.appendices_dir}")

    def build_index(self) -> None:
        if not self.texts:
             self.vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
             self.tfidf_matrix = self.vectorizer.fit_transform(["dummy"])
             return

        self.vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
        self.tfidf_matrix = self.vectorizer.fit_transform(self.texts)

    def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        if self.vectorizer is None:
            raise RuntimeError("Index not built.")

        q_vec = self.vectorizer.transform([query])
        scores = (self.tfidf_matrix @ q_vec.T).toarray().ravel()
        top_idx = np.argsort(scores)[::-1][:top_k]

        results = []
        for idx in top_idx:
            if scores[idx] <= 0 or idx >= len(self.texts): continue
            
            full_text = self.texts[idx]
            results.append({
                "appendix_id": self.appendix_ids[idx],
                "filename": str(self.filepaths[idx].name),
                "score": float(scores[idx]),
                "snippet": full_text[:800], 
                "full_text": full_text
            })
        return results
"""
with open("/kaggle/working/qcb_evidence_agent/src/data_index.py", "w") as f:
    f.write(data_index_code)

# --- 3. Create tools.py ---
tools_code = """
from typing import List, Dict, Any
from .data_index import AppendixIndex

class AppendixSearchTool:
    def __init__(self, appendices_dir: str):
        self.index = AppendixIndex(appendices_dir)
        self.index.load_appendices()
        self.index.build_index()

    def search_appendices(self, query_text: str, top_k: int = 5) -> List[Dict[str, Any]]:
        return self.index.search(query_text, top_k=top_k)
"""
with open("/kaggle/working/qcb_evidence_agent/src/tools.py", "w") as f:
    f.write(tools_code)

# --- 4. Create agents.py (Robust Version) ---
agents_code = """
import os
import json
import re
from typing import Any, Dict, List
import google.generativeai as genai
from .tools import AppendixSearchTool

def get_gemini_model(model_name: str = "gemini-2.0-flash") -> genai.GenerativeModel:
    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        print("Warning: GEMINI_API_KEY not found.")
    genai.configure(api_key=api_key)
    return genai.GenerativeModel(model_name, generation_config={"response_mime_type": "application/json"})

class ClaimUnderstandingAgent:
    def __init__(self, model):
        self.model = model

    def understand_claim(self, claim_id: str, claim_text: str) -> Dict[str, Any]:
        prompt = f'''Analyze this claim. Extract JSON. Claim: "{claim_text}"
        Output: {{ "claim_id": "{claim_id}", "claim_text": "{claim_text}", "key_concepts": [list of strings] }}'''
        
        try:
            resp = self.model.generate_content(prompt)
            parsed = json.loads(resp.text)
            if isinstance(parsed, list): parsed = parsed[0]
            return parsed
        except:
            return {"claim_id": claim_id, "claim_text": claim_text, "key_concepts": []}

class EvidenceRetrievalAgent:
    def __init__(self, search_tool):
        self.search_tool = search_tool

    def retrieve_evidence(self, parsed_claim: Dict[str, Any]) -> Dict[str, Any]:
        text = parsed_claim.get("claim_text", "")
        concepts = parsed_claim.get("key_concepts", [])
        if isinstance(concepts, list): concepts = " ".join(str(c) for c in concepts)
        query = f"{text} {concepts}"
        return {"parsed_claim": parsed_claim, "candidate_evidence": self.search_tool.search_appendices(query, top_k=4)}

class EvidenceAnalysisAgent:
    def __init__(self, model):
        self.model = model

    def analyze(self, retrieval_bundle: Dict[str, Any]) -> Dict[str, Any]:
        claim = retrieval_bundle["parsed_claim"]
        evidence = retrieval_bundle["candidate_evidence"]
        ev_text = "".join([f"\\n[Appendix {e['appendix_id']}]: {e['snippet']}..." for e in evidence])
        
        prompt = f'''Verify if claim is supported by evidence.
        Claim: "{claim.get('claim_text','')}"
        Evidence: {ev_text}
        Output JSON: {{ "verdict": "supported"|"unsupported"|"partially_supported", "evidence_appendix_ids": [], "explanation": "" }}'''
        
        try:
            resp = self.model.generate_content(prompt)
            parsed = json.loads(resp.text)
            if isinstance(parsed, list): parsed = parsed[0]
            return parsed
        except:
            return {"verdict": "error", "explanation": "Analysis failed", "evidence_appendix_ids": []}
"""
with open("/kaggle/working/qcb_evidence_agent/src/agents.py", "w") as f:
    f.write(agents_code)

# --- 5. Create orchestrator_2.py (Chat Logic) ---
orch_code = """
from typing import Dict, Any
from .agents import get_gemini_model, ClaimUnderstandingAgent, EvidenceRetrievalAgent, EvidenceAnalysisAgent
from .tools import AppendixSearchTool

class ChatOrchestrator:
    def __init__(self, data_dir: str):
        self.data_dir = data_dir
        self.model = get_gemini_model()
        self.search_tool = AppendixSearchTool(f"{data_dir}/data/appendices")
        self.understander = ClaimUnderstandingAgent(self.model)
        self.retriever = EvidenceRetrievalAgent(self.search_tool)
        self.analyzer = EvidenceAnalysisAgent(self.model)

    def review_freeform_claim(self, claim_text: str) -> Dict[str, Any]:
        parsed = self.understander.understand_claim("user_query", claim_text)
        context = self.retriever.retrieve_evidence(parsed)
        result = self.analyzer.analyze(context)
        return {
            "original_text": claim_text,
            "verdict": result.get("verdict", "error"),
            "explanation": result.get("explanation", "No explanation provided."),
            "evidence_snippets": context.get("candidate_evidence", [])
        }
"""
with open("/kaggle/working/qcb_evidence_agent/src/orchestrator_2.py", "w") as f:
    f.write(orch_code)

# --- 6. Create app.py (Streamlit UI) ---
app_code = """
import streamlit as st
import os, sys
sys.path.append(os.getcwd())
from src.orchestrator_2 import ChatOrchestrator

DATA_DIR = "./qcb_evidence_agent" 
st.set_page_config(page_title="Evidence Chat", layout="wide")

@st.cache_resource
def get_agent():
    return ChatOrchestrator(DATA_DIR)

st.title("üïµÔ∏è‚Äç‚ôÄÔ∏è Evidence Tracing Chatbot")
claim = st.text_area("Enter claim:")
if st.button("Verify") and claim:
    with st.spinner("Analyzing..."):
        res = get_agent().review_freeform_claim(claim)
    st.markdown(f"### Verdict: {res['verdict'].upper()}")
    st.write(res['explanation'])
    if res['evidence_snippets']:
        st.divider()
        for e in res['evidence_snippets']:
            with st.expander(f"Appendix {e['appendix_id']}"):
                st.text(e['snippet'])
"""
with open("/kaggle/working/qcb_evidence_agent/app.py", "w") as f:
    f.write(app_code)

print("‚úÖ All system files have been restored successfully!")

‚úÖ All system files have been restored successfully!


In [39]:
import shutil
from IPython.display import FileLink

# Zip the entire project folder
shutil.make_archive("/kaggle/working/evidence_agent_full_project", 'zip', "/kaggle/working/qcb_evidence_agent")

print("Click below to download your full project:")
display(FileLink('evidence_agent_full_project.zip'))

Click below to download your full project:


# FINAL SHOWDOWN

In [40]:
import os

# 1. Create Directories
os.makedirs("/kaggle/working/qcb_evidence_agent/src", exist_ok=True)
os.makedirs("/kaggle/working/qcb_evidence_agent/data", exist_ok=True)

# 2. Create __init__.py
with open("/kaggle/working/qcb_evidence_agent/src/__init__.py", "w") as f:
    f.write("")

# 3. Create agents.py (With Error Printing Logic)
agents_code = """
import os
import json
import re
import time
from typing import Any, Dict, List
import google.generativeai as genai
from .tools import AppendixSearchTool

def get_gemini_model(model_name: str = "gemini-2.0-flash") -> genai.GenerativeModel:
    api_key = os.environ.get("GEMINI_API_KEY")
    if not api_key:
        print("üö® CRITICAL WARNING: GEMINI_API_KEY is missing!")
    genai.configure(api_key=api_key)
    return genai.GenerativeModel(model_name, generation_config={"response_mime_type": "application/json"})

class ClaimUnderstandingAgent:
    def __init__(self, model):
        self.model = model

    def understand_claim(self, claim_id: str, claim_text: str) -> Dict[str, Any]:
        prompt = f'''Analyze this claim. Extract JSON. Claim: "{claim_text}"
        Output: {{ "claim_id": "{claim_id}", "claim_text": "{claim_text}", "key_concepts": [list of strings] }}'''
        try:
            resp = self.model.generate_content(prompt)
            parsed = json.loads(resp.text)
            if isinstance(parsed, list): parsed = parsed[0]
            return parsed
        except Exception as e:
            print(f"‚ùå ClaimUnderstanding Error: {e}")
            return {"claim_id": claim_id, "claim_text": claim_text, "key_concepts": []}

class EvidenceRetrievalAgent:
    def __init__(self, search_tool):
        self.search_tool = search_tool

    def retrieve_evidence(self, parsed_claim: Dict[str, Any]) -> Dict[str, Any]:
        text = parsed_claim.get("claim_text", "")
        concepts = parsed_claim.get("key_concepts", [])
        if isinstance(concepts, list): concepts = " ".join(str(c) for c in concepts)
        query = f"{text} {concepts}"
        return {"parsed_claim": parsed_claim, "candidate_evidence": self.search_tool.search_appendices(query, top_k=4)}

class EvidenceAnalysisAgent:
    def __init__(self, model):
        self.model = model

    def analyze(self, retrieval_bundle: Dict[str, Any]) -> Dict[str, Any]:
        evidence = retrieval_bundle.get("candidate_evidence", [])
        if not evidence:
            return {"verdict": "unsupported", "explanation": "No evidence found.", "evidence_appendix_ids": []}

        ev_text = "".join([f"\\n[Appendix {e['appendix_id']}]: {e['snippet']}..." for e in evidence])
        prompt = f'''Verify if claim is supported by evidence.
        Claim: "{retrieval_bundle['parsed_claim'].get('claim_text','')}"
        Evidence: {ev_text}
        Output JSON: {{ "verdict": "supported"|"unsupported"|"partially_supported", "evidence_appendix_ids": [], "explanation": "" }}'''
        
        try:
            resp = self.model.generate_content(prompt)
            parsed = json.loads(resp.text)
            if isinstance(parsed, list): parsed = parsed[0]
            return parsed
        except Exception as e:
            print(f"‚ùå Analysis Error: {e}")
            return {"verdict": "error", "explanation": f"Error: {e}", "evidence_appendix_ids": []}
"""
with open("/kaggle/working/qcb_evidence_agent/src/agents.py", "w") as f:
    f.write(agents_code)

# 4. Create orchestrator_2.py
orch_code = """
from typing import Dict, Any
from .agents import get_gemini_model, ClaimUnderstandingAgent, EvidenceRetrievalAgent, EvidenceAnalysisAgent
from .tools import AppendixSearchTool

class ChatOrchestrator:
    def __init__(self, data_dir: str):
        self.data_dir = data_dir
        self.model = get_gemini_model()
        self.search_tool = AppendixSearchTool(f"{data_dir}/data/appendices")
        self.understander = ClaimUnderstandingAgent(self.model)
        self.retriever = EvidenceRetrievalAgent(self.search_tool)
        self.analyzer = EvidenceAnalysisAgent(self.model)

    def review_freeform_claim(self, claim_text: str) -> Dict[str, Any]:
        parsed = self.understander.understand_claim("user_query", claim_text)
        context = self.retriever.retrieve_evidence(parsed)
        result = self.analyzer.analyze(context)
        return {
            "original_text": claim_text,
            "verdict": result.get("verdict", "error"),
            "explanation": result.get("explanation", "Analysis failed."),
            "evidence_snippets": context.get("candidate_evidence", [])
        }
"""
with open("/kaggle/working/qcb_evidence_agent/src/orchestrator_2.py", "w") as f:
    f.write(orch_code)

# 5. Create tools.py & data_index.py
data_index_code = """
import os
from pathlib import Path
from typing import List, Dict, Any
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer

class AppendixIndex:
    def __init__(self, appendices_dir: str):
        self.appendices_dir = Path(appendices_dir)
        self.filepaths: List[Path] = []
        self.texts: List[str] = []
        self.appendix_ids: List[str] = []
        self.vectorizer: TfidfVectorizer | None = None
        self.tfidf_matrix = None

    def load_appendices(self) -> None:
        self.filepaths = sorted(list(self.appendices_dir.glob("**/*.txt")))
        for fp in self.filepaths:
            self.texts.append(fp.read_text(encoding="utf-8", errors="ignore"))
            self.appendix_ids.append(fp.stem.split("_")[1] if "_" in fp.stem else fp.stem)

    def build_index(self) -> None:
        if not self.texts:
             self.vectorizer = TfidfVectorizer(max_features=100)
             self.tfidf_matrix = self.vectorizer.fit_transform(["dummy"])
             return
        self.vectorizer = TfidfVectorizer(stop_words="english", max_features=5000)
        self.tfidf_matrix = self.vectorizer.fit_transform(self.texts)

    def search(self, query: str, top_k: int = 5) -> List[Dict[str, Any]]:
        if self.vectorizer is None or not self.texts: return []
        q_vec = self.vectorizer.transform([query])
        scores = (self.tfidf_matrix @ q_vec.T).toarray().ravel()
        top_idx = np.argsort(scores)[::-1][:top_k]
        results = []
        for idx in top_idx:
            if scores[idx] > 0:
                results.append({
                    "appendix_id": self.appendix_ids[idx],
                    "filename": self.filepaths[idx].name,
                    "score": float(scores[idx]),
                    "snippet": self.texts[idx][:800]
                })
        return results
"""
with open("/kaggle/working/qcb_evidence_agent/src/data_index.py", "w") as f:
    f.write(data_index_code)

tools_code = """
from typing import List, Dict, Any
from .data_index import AppendixIndex

class AppendixSearchTool:
    def __init__(self, appendices_dir: str):
        self.index = AppendixIndex(appendices_dir)
        self.index.load_appendices()
        self.index.build_index()

    def search_appendices(self, query_text: str, top_k: int = 5) -> List[Dict[str, Any]]:
        return self.index.search(query_text, top_k=top_k)
"""
with open("/kaggle/working/qcb_evidence_agent/src/tools.py", "w") as f:
    f.write(tools_code)

print("‚úÖ Codebase fully restored.")

‚úÖ Codebase fully restored.


In [41]:
import zipfile
import os

zip_path = "/kaggle/working/qcb_capstone_data.zip"
data_dir = "/kaggle/working/qcb_evidence_agent/data"

if os.path.exists(zip_path):
    with zipfile.ZipFile(zip_path, 'r') as zip_ref:
        zip_ref.extractall(data_dir)
    print("‚úÖ Data restored.")
else:
    print("‚ö†Ô∏è ZIP file not found! Please re-upload 'qcb_capstone_data.zip' or check /kaggle/input/.")

‚úÖ Data restored.


In [42]:
import sys
sys.path.append("/kaggle/working/qcb_evidence_agent")
from src.orchestrator_2 import ChatOrchestrator

try:
    agent = ChatOrchestrator("/kaggle/working/qcb_evidence_agent")
    res = agent.review_freeform_claim("Participants liked the summer school.")
    print(f"Verdict: {res['verdict']}")
    print(f"Reasoning: {res['explanation']}")
except Exception as e:
    print(f"Test Failed: {e}")

Verdict: unsupported
Reasoning: The evidence provided indicates that participants felt the summer school experience shaped how they think about various aspects like cognitive overload, pacing, lab-computation connection, belonging, collaboration barriers, instructor clarity, mentoring support and interdisciplinarity. While these could be interpreted as related to liking the summer school, it does not directly support the claim that participants 'liked' it. The evidence focuses on how the summer school shaped their thinking, not necessarily their overall sentiment towards it.


In [43]:
import shutil
import os
from IPython.display import FileLink

# Define the source folder and the output zip name
folder_path = "/kaggle/working/qcb_evidence_agent"
output_filename = "evidence_agent_full_project"
zip_path = f"/kaggle/working/{output_filename}"

# Check if the folder exists before zipping
if os.path.exists(folder_path):
    print("Zipping project files...")
    shutil.make_archive(zip_path, 'zip', folder_path)
    print(f"‚úÖ Project successfully zipped: {output_filename}.zip")
    
    # Generate the clickable link
    print("\nüëá CLICK HERE TO DOWNLOAD üëá")
    display(FileLink(f'{output_filename}.zip'))
else:
    print("‚ùå Error: The 'qcb_evidence_agent' folder does not exist. Please run the restoration scripts first.")

Zipping project files...
‚úÖ Project successfully zipped: evidence_agent_full_project.zip

üëá CLICK HERE TO DOWNLOAD üëá
