In [None]:
pip install langchain langchain_community langchain_groq sentence-transformers

In [None]:
pip install chromadb ragas

In [36]:
import os
from google.colab import userdata

GROQ_API_KEY = userdata.get('GROQ_API_KEY')
os.environ['GROQ_API_KEY'] = GROQ_API_KEY

In [37]:
from langchain.document_loaders import TextLoader

docs = TextLoader('/content/state_of_the_union.txt').load()
len(docs)

1

In [38]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
chunks = splitter.split_documents(docs)
len(chunks)

96

In [39]:
from langchain.embeddings import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

In [40]:
from langchain_groq import ChatGroq
llm = ChatGroq(model='qwen/qwen3-32b')

In [41]:
from langchain.vectorstores import Chroma

vector_store = Chroma.from_documents(chunks, embeddings)

In [42]:
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

In [43]:
from langchain.prompts import ChatPromptTemplate
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
template = """
ou are an assistant for question-answering tasks.
Use the following pieces of retrieved context to answer the question.
If you don't know the answer, just say that you don't know.
Use two sentences maximum and keep the answer concise.

Question: {question}
Context: {context}
Answer:
"""
prompt = ChatPromptTemplate.from_template(template)

In [44]:
chain = (
    {"context": retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

chain.invoke("What did president say about Ketanji Brown Jackson?")

'<think>\nOkay, the user is asking what the president said about Ketanji Brown Jackson. Let me check the provided context. There are three documents, all from the same source, the State of the Union address.\n\nLooking at the first two documents, they have identical content. The president mentions that nominating someone to the Supreme Court is a serious responsibility and that he nominated Judge Ketanji Brown Jackson four days ago, calling her one of the nation\'s top legal minds who will continue Justice Breyer\'s legacy. The third document talks about Justice Breyer and voting rights acts but doesn\'t mention Jackson. \n\nSo the answer should be that the president nominated her to the Supreme Court, praising her legal expertise and linking her to Justice Breyer\'s legacy. I need to make sure the answer is concise, two sentences max, and only uses the given context. Since the first two documents are duplicates, I can just reference one. The key points are the nomination, her role as 

In [45]:
questions = [
    "What did president say about Justice Breyer?",
    "What did president say about Intel's CEO?",
    "What did president say about the gun violence?"
]

In [46]:
ground_truth = [
    "The president said that Justice Breyer has dedicated his life to serve the country and thanked him for his service.",
    "The president said that Pat Gelsinger is ready to increase Intel’s investment to $100 billion.",
    "The president asked Congress to pass proven measures to reduce gun violence."
    ]

In [47]:
answer = []
content = []

In [48]:
for query in questions:
  answer.append(chain.invoke(query))
  content.append([doc.page_content for doc in retriever.get_relevant_documents(query)])

In [49]:
content

[['One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.',
  'One of the most serious constitutional responsibilities a President has is nominating someone to serve on the United States Supreme Court. \n\nAnd I did that 4 days ago, when I nominated Circuit Court of Appeals Judge Ketanji Brown Jackson. One of our nation’s top legal minds, who will continue Justice Breyer’s legacy of excellence.',
  'We cannot let this happen. \n\nTonight. I call on the Senate to: Pass the Freedom to Vote Act. Pass the John Lewis Voting Rights Act. And while you’re at it, pass the Disclose Act so Americans can know who is funding our elections. \n\nTonight, I’d like to honor someone who has dedicated his life to serve this count

In [50]:
answer

['<think>\nOkay, the user is asking what the president said about Justice Breyer. Let me check the provided context.\n\nLooking at the documents, there\'s a part where the president mentions Justice Breyer. The key sentence is: "Justice Stephen Breyer—an Army veteran, Constitutional scholar, and retiring Justice of the United States Supreme Court. Justice Breyer, thank you for your service." Also, earlier the president talks about nominating Ketanji Brown Jackson to continue Breyer\'s legacy. So the president acknowledged Breyer\'s service and highlighted his background as a veteran and scholar. The answer should combine these points concisely.\n</think>\n\nThe President honored Justice Stephen Breyer as an Army veteran, Constitutional scholar, and retiring Supreme Court Justice, thanking him for his service. He also noted that Judge Ketanji Brown Jackson would continue Breyer’s legacy of excellence.',
 '<think>\nOkay, let\'s see. The user is asking what the president said about Intel\

In [51]:
data = {
    "question":questions,
    "ground_truth": ground_truth,
    "answer": answer,
    "context": content
}

In [62]:
data = {
    "question":questions,
    "ground_truth": ground_truth,
    "answer": answer,
    "retrieved_contexts": content # Renamed 'context' to 'retrieved_contexts'
}
from datasets import Dataset

dataset = Dataset.from_dict(data)

In [63]:
dataset

Dataset({
    features: ['question', 'ground_truth', 'answer', 'retrieved_contexts'],
    num_rows: 3
})

In [64]:
from ragas.metrics import (
        context_precision,
        faithfulness,
        answer_relevancy,
        context_recall,
)
from ragas import evaluate

ragas_metrics = [
    context_precision,
    faithfulness,
    answer_relevancy,
    context_recall,
]

In [65]:
result = evaluate(dataset,ragas_metrics, llm, embeddings)

Evaluating:   0%|          | 0/12 [00:00<?, ?it/s]

ERROR:ragas.executor:Exception raised in Job[0]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[1]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[4]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[5]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[8]: TimeoutError()
ERROR:ragas.executor:Exception raised in Job[9]: TimeoutError()


In [66]:
result

{'context_precision': nan, 'faithfulness': nan, 'answer_relevancy': 0.9292, 'context_recall': 0.6667}

In [68]:
df = result.to_pandas()
df

Unnamed: 0,user_input,retrieved_contexts,response,reference,context_precision,faithfulness,answer_relevancy,context_recall
0,What did president say about Justice Breyer?,[One of the most serious constitutional respon...,"<think>\nOkay, the user is asking what the pre...",The president said that Justice Breyer has ded...,,,0.94432,1.0
1,What did president say about Intel's CEO?,[So let’s not wait any longer. Send it to my d...,"<think>\nOkay, let's see. The user is asking w...",The president said that Pat Gelsinger is ready...,,,0.982107,0.0
2,What did president say about the gun violence?,[Ban assault weapons and high-capacity magazin...,"<think>\nOkay, let's see. The user is asking w...",The president asked Congress to pass proven me...,,,0.861184,1.0
