# RAG Research Summarizer with Claude (Proof of Concept)
This script uses Anthropic's Claude to answer queries using relevant research summaries.

## Setup:
1. Add your API key to a file called ignore.py at the same directory level as this script:

    KEY = "your_claude_api_key_here"


In [3]:
%pip install torch sentence-transformers anthropic

Defaulting to user installation because normal site-packages is not writeable
You should consider upgrading via the '/Library/Developer/CommandLineTools/usr/bin/python3 -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


# imports

In [None]:
import json 
import ignore

import torch
from sentence_transformers import SentenceTransformer, util
import anthropic


  from .autonotebook import tqdm as notebook_tqdm


# extract documents

In [6]:
docs = json.load(open('./documents.json', 'r'))
docs = [f'{k}||{v}' for k, v in docs.items()]  # Make them a list with some metadata fusion.

for doc in docs[:5]:  # print a few docs as an example
    print(doc)

doc_001||A longitudinal study of 1,200 middle school students found that incorporating spaced repetition into mathematics curriculum improved long-term retention by 42% compared to massed practice. Students who reviewed concepts at intervals of 1, 3, and 7 days showed significantly better performance on assessments administered three months later.
doc_002||Research examining 500 undergraduate students revealed that handwritten notes led to 23% better conceptual understanding compared to laptop note-taking. The constraint of slower handwriting appeared to force students to process and synthesize information more deeply during lectures.
doc_003||A meta-analysis of 74 studies found that peer tutoring programs increased academic achievement by an average effect size of 0.59 standard deviations. Benefits were particularly pronounced when tutors were trained in questioning techniques and given structured materials.
doc_004||Investigation of 15 elementary schools implementing project-based le

# cosine similarity function definition

In [None]:
def search_top_k(model: SentenceTransformer, query: str, doc_embs: torch.Tensor, docs: list[str], k: int = 3) -> list[tuple[float, str]]:
    """
    Perform a cosine similarity search for a query against precomputed document embeddings.

    Args:
        model (SentenceTransformer): Preloaded Huggingface embedding model.
        query (str): Query string.
        doc_embs (torch.Tensor): Precomputed document embeddings (normalized).
        docs (List[str]): Original documents corresponding to embeddings.
        k (int, optional): Number of top results to return. Defaults to 3.

    Returns:
        list[Tuple[float, str]]: List of (similarity_score, document) tuples.
    """
    query_emb = model.encode([query], convert_to_tensor=True, normalize_embeddings=True)
    sims = util.cos_sim(query_emb, doc_embs)[0]  # shape: [num_docs]
    top_k = torch.topk(sims, k=k)
    return [(score.item(), docs[idx]) for idx, score in zip(top_k.indices, top_k.values)]



# build model 

In [None]:
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
doc_embs = model.encode(docs, convert_to_tensor=True, normalize_embeddings=True)

### example search usage

In [47]:
query = 'what do we know about outdoor education'
n = 3

results = search_top_k(model, query, doc_embs, docs, k=n)

for score, doc in results:
    print(f"{score:.4f} | {doc}")

0.5860 | doc_012||Research on outdoor education programs found that students who spent at least 2 hours per week in nature-based learning showed 26% reduction in stress markers and 18% improvement in creative problem-solving tasks compared to indoor-only control groups.
0.3282 | doc_004||Investigation of 15 elementary schools implementing project-based learning showed mixed results: while student engagement increased by 67%, standardized test scores showed no significant improvement in the first two years of implementation. Teachers reported needing 18-24 months to effectively adapt their practice.
0.2837 | doc_005||Analysis of online learning outcomes during 2020-2021 revealed that synchronous video classes with breakout rooms achieved 89% of the learning gains of in-person instruction, while asynchronous-only formats achieved only 64%. Student isolation was the primary predictor of poor outcomes.


# prompt building and generation

In [None]:
def generate_prompt(query: str, sources: int = 3, print_flag: bool = False) -> str:
    results = search_top_k(model, query, doc_embs, docs, k=sources)

    if print_flag:
        for score, doc in results:
            print(f"{score:.4f} | {doc}")


    rag_input = {
        "query": query,
        "research_summaries": [
            {
                "score": score,
                "id": text.split('||')[0],
                "text": text.split('||')[1]
            }
            for score, text in results[:n]
        ]
    }


    prompt = f"""
    You are an AI assisntant that uses retrieval augmented generation to answer questions about educational best practices

    == Relevant Information ==
    Reference Summaries: You will be provided with structured summaries of research papers.
    Relevance Filtering: Only use information from the summaries if it is directly relevant to the query.
    Answer Generation: Generate concise, accurate, and clear answers to the user query.
    Citation: When using information from a summary, include a reference to the summary’s ID.

    ==INPUT==
    {json.dumps(rag_input, indent=2)}

    ==EXAMPLE OUTPUT== 
    {{
    "answer": <"Answer based on relevant summaries.">,
    "used_summaries": <["id1", ..., "idn"]>
    }}

    ==IMPORTANT==
    - Only respond with the output JSON, nothing before or after; DO NOT inlude "```json" or other markdown in your response.
    - Maintain a professional and friendly tone.
    - Respond only by referencing the given input. If none of the input is relevant to the user query, then respond that you have nothing useful to say.
    - Do not elaborate at all in your response outside of the input data.
    - Be concise
    """


    return prompt


### putting it all together with claude

In [None]:

prompt = generate_prompt(query='Tell me about optimal class size?')

client = anthropic.Anthropic(api_key=ignore.KEY)

response = client.messages.create(
    model="claude-sonnet-4-5-20250929",    
    max_tokens=1024,
    messages=[
        {"role": "user", "content": prompt}
    ]
)
response_obj = json.loads(response.content[0].text)

print(response_obj)

{'answer': "Based on the available research, there is evidence that reducing class size from 25 to 15 students can have positive effects on student achievement, particularly in elementary reading where a 12% improvement was observed. However, the same study found no significant effect on mathematics achievement. It's worth noting that the research also indicated that cost-benefit analysis suggested targeted tutoring might be more efficient than class size reduction, suggesting that smaller classes alone may not be the most cost-effective intervention.", 'used_summaries': ['doc_016']}


# A more production style oop example

In [None]:
class RAGPromptGenerator:
    def __init__(self, docs: list[str], api_key: str, embedding_model: str = "all-MiniLM-L6-v2", claude_model: str = "claude-sonnet-4-5-20250929"):
        """
        Initialize the RAG prompt generator and embed the documents.

        Args:
            docs: List of documents with format "id||text".
            embedding_model: Name of the SentenceTransformer model to use for embeddings.
            claude_model: Which Claude model to use.
        """
        self.docs = docs
        self.model = SentenceTransformer(embedding_model)
        self.doc_embs = self.model.encode(docs, convert_to_tensor=True, normalize_embeddings=True)
        self.claude_model = claude_model
        self.client = anthropic.Anthropic(api_key=api_key)

    def search_top_k(self, query: str, k: int = 3) -> list[tuple[float, str]]:
        """Perform a cosine similarity search for a query against precomputed document embeddings."""
        query_emb = self.model.encode([query], convert_to_tensor=True, normalize_embeddings=True)
        sims = util.cos_sim(query_emb, self.doc_embs)[0]
        top_k = torch.topk(sims, k=k)
        return [(score.item(), self.docs[idx]) for idx, score in zip(top_k.indices, top_k.values)]

    def generate_prompt(self, query: str, sources: int = 3, print_flag: bool = False) -> str:
        """Generate a RAG-style prompt with top-k relevant research summaries."""
        results = self.search_top_k(query, k=sources)

        if print_flag:
            for score, doc in results:
                print(f"{score:.4f} | {doc}")

        rag_input = {
            "query": query,
            "research_summaries": [
                {
                    "score": score,
                    "id": text.split('||')[0],
                    "text": text.split('||')[1]
                }
                for score, text in results[:sources]
            ]
        }

        prompt = f"""
        You are an AI assistant that uses retrieval-augmented generation to answer questions about educational best practices.

        == Relevant Information ==
        Reference Summaries: You will be provided with structured summaries of research papers.
        Relevance Filtering: Only use information from the summaries if it is directly relevant to the query.
        Answer Generation: Generate concise, accurate, and clear answers to the user query.
        Citation: When using information from a summary, include a reference to the summary’s ID.

        ==INPUT==
        {json.dumps(rag_input, indent=2)}

        ==EXAMPLE OUTPUT==
        {{
        "answer": <"Answer based on relevant summaries.">,
        "used_summaries": <["id1", ..., "idn"]>
        }}

        ==IMPORTANT==
        - Only respond with the output JSON, nothing before or after; DO NOT inlude "```json" or other markdown in your response.
        - Maintain a professional and friendly tone.
        - Respond only by referencing the given input. If none of the input is relevant to the user query, then respond that you have nothing useful to say.
        - Do not elaborate at all in your response outside of the input data.
        - Be concise
        """
        return prompt

    def query_llm(self, query: str, sources: int = 3, print_flag: bool = False) -> dict:
        """
        Full pipeline: query -> retrieve top summaries -> generate prompt -> call Claude -> return JSON.
        """
        prompt = self.generate_prompt(query, sources=sources, print_flag=print_flag)

        response = self.client.messages.create(
            model=self.claude_model,
            max_tokens=1024,
            messages=[{"role": "user", "content": prompt}]
        )

        try:
            response_obj = json.loads(response.content[0].text)
        except json.JSONDecodeError:
            response_obj = {"error": "Failed to parse response JSON", "raw_text": response.content[0].text}

        return response_obj


In [None]:
rag_generator = RAGPromptGenerator(docs, 
                                   api_key=ignore.KEY,
                                   embedding_model='all-MiniLM-L6-v2', 
                                   claude_model='claude-sonnet-4-5-20250929')

query = "What do we know about classes that are outdoors?"
response = rag_generator.query_llm(query, sources=3)

print(json.dumps(response, indent=2))

{
  "answer": "Research shows that outdoor classes can have significant benefits for students. Students who participated in nature-based learning for at least 2 hours per week demonstrated a 26% reduction in stress markers and an 18% improvement in creative problem-solving tasks compared to students in indoor-only settings (doc_012).",
  "used_summaries": [
    "doc_012"
  ]
}
