In [1]:
import os
from sentence_transformers import SentenceTransformer
import anthropic
from context_retrieval import *

In [2]:
with open("data\\Main\\openbook.txt", "r", encoding="utf-8") as file:
        lines = file.readlines()
documents = [line.strip() for line in lines]

embedder = SentenceTransformer('all-MiniLM-L6-v2')

doc_embeddings = embedder.encode(documents, convert_to_numpy=True)

In [3]:
retrieve_context("The sun is responsible for", embedder, documents, doc_embeddings, k=1)

['"the sun is a source of heat called sunlight"']

### Testing on training set:

In [4]:
file_path = "data\\Main\\train.jsonl"  
data = read_jsonl(file_path)

In [5]:
questions = [question['question']['stem'] for question in data]
choices   = [question['question']['choices'] for question in data]

In [14]:
formatted_list = [
    " ".join(f"{item['label']}. {item['text']}" for item in sublist) for sublist in choices
]
formatted_list[:5]

['A. puppies learning new tricks B. children growing up and getting old C. flowers wilting in a vase D. plants sprouting, blooming and wilting',
 'A. the mountains seem very close B. the mountains are boring C. the mountains look the same as from up close D. the mountains seem smaller than in photographs',
 "A. the mind needs time to digest B. take a second to digest what I said C. nutrients are being deconstructed D. reader's digest is a body of works",
 'A. warm lights that float B. made out of nitrate C. great balls of gas burning billions of miles away D. lights in the sky',
 'A. straw B. Glass C. Candle D. mailing tube']

In [21]:
question = "The sun is responsible for"
context = retrieve_context(question, embedder, documents, doc_embeddings, k=1)

index = 0  
answers = formatted_list[index]  

prompt = f"Based purely on the context here: {context}, answer this question with just A, B, C, or D: {question} The possible answers are: {answers}"

print(prompt)

Based purely on the context here: ['"the sun is a source of heat called sunlight"'], answer this question with just A, B, C, or D: The sun is responsible for The possible answers are: A. puppies learning new tricks B. children growing up and getting old C. flowers wilting in a vase D. plants sprouting, blooming and wilting


### Using Claude to pick answer based on context:

In [16]:
claude_api_key = os.getenv("CLAUDE_API_KEY")
client = anthropic.Anthropic(api_key=claude_api_key)

In [22]:
qa = zip(questions, formatted_list)

prompt_batch = [
    f"""Based purely on the context here: {retrieve_context(question, embedder, documents, doc_embeddings, k=1)}, answer this question with just A, B, C, or D: {question} The possible answers are: {answers}""" for question, answers in qa
]

In [None]:
responses = []

for prompt in prompt_batch:
    response = client.messages.create(
        model="claude-3-opus-20240229",  
        max_tokens=100,  
        temperature=0.7,  
        messages=[{"role": "user", "content": prompt}]
    )
    
    responses.append(response.content[0].text)  

In [None]:
len(responses)

In [None]:
responses