In [1]:
! pip install -U langchain-community sentence-transformers huggingface_hub accelerate transformers faiss-cpu datasets ragas

Collecting langchain-community
  Downloading langchain_community-0.3.30-py3-none-any.whl.metadata (3.0 kB)
Collecting sentence-transformers
  Downloading sentence_transformers-5.1.1-py3-none-any.whl.metadata (16 kB)
Collecting huggingface_hub
  Downloading huggingface_hub-0.35.3-py3-none-any.whl.metadata (14 kB)
[0mCollecting transformers
  Downloading transformers-4.56.2-py3-none-any.whl.metadata (40 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m40.1/40.1 kB[0m [31m1.6 MB/s[0m eta [36m0:00:00[0m
[?25hCollecting faiss-cpu
  Downloading faiss_cpu-1.12.0-cp312-cp312-manylinux_2_27_x86_64.manylinux_2_28_x86_64.whl.metadata (5.1 kB)
Collecting datasets
  Downloading datasets-4.1.1-py3-none-any.whl.metadata (18 kB)
Collecting ragas
  Downloading ragas-0.3.5-py3-none-any.whl.metadata (21 kB)
Collecting requests<3.0.0,>=2.32.5 (from langchain-community)
  Downloading requests-2.32.5-py3-none-any.whl.metadata (4.9 kB)
Collecting dataclasses-json<0.7.0,>=0.6.7 (from

In [13]:
from transformers import pipeline
from langchain.vectorstores import FAISS
from langchain.embeddings.base import Embeddings
from langchain.llms import HuggingFacePipeline
from sentence_transformers import SentenceTransformer
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
from langchain.prompts import PromptTemplate
from langchain.chat_models import ChatOpenAI
from langchain_openai import ChatOpenAI
from langchain.schema.runnable import RunnablePassthrough
from langchain.schema.output_parser import StrOutputParser
import os
from getpass import getpass

from datasets import Dataset
from ragas import evaluate, RunConfig
from ragas.metrics import (
    faithfulness,
    answer_relevancy,
    context_precision,
    context_recall
)


from huggingface_hub import login
import pandas as pd

login()

VBox(children=(HTML(value='<center> <img\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…

In [14]:
# https://huggingface.co/datasets/virattt/financial-qa-10K/viewer?row=0&views%5B%5D=train
df = pd.read_parquet("hf://datasets/virattt/financial-qa-10K/data/train-00000-of-00001.parquet")
df = df.loc[df.ticker == "NVDA"]
df = df[["question", "answer", "context"]]
print(df.shape)
df.head()

(100, 3)


Unnamed: 0,question,answer,context
0,What area did NVIDIA initially focus on before...,NVIDIA initially focused on PC graphics.,"Since our original focus on PC graphics, we ha..."
1,What are some of the recent applications of GP...,Recent applications of GPU-powered deep learni...,Some of the most recent applications of GPU-po...
2,What significant invention did NVIDIA create i...,NVIDIA invented the GPU in 1999.,Our invention of the GPU in 1999 defined moder...
3,How does NVIDIA's platform strategy contribute...,NVIDIA's platform strategy brings together har...,"NVIDIA has a platform strategy, bringing toget..."
4,What does NVIDIA's CUDA programming model enable?,NVIDIA's CUDA programming model opened the par...,With our introduction of the CUDA programming ...


In [15]:
class CustomEmbeddings(Embeddings):
    def __init__(self, model_name="sentence-transformers/all-MiniLM-L6-v2"):
        self.model = SentenceTransformer(model_name)

    def embed_documents(self, texts):
        return self.model.encode(texts, normalize_embeddings=True).tolist()

    def embed_query(self, query):
        return self.embed_documents([query])[0]

# Sample documents
docs = df.context.apply(lambda x: {"page_content": x}).to_list()

# Create vector store
embedding_model = CustomEmbeddings()
texts = [d["page_content"] for d in docs]
doc_embeddings = embedding_model.embed_documents(texts)
vector_store = FAISS.from_texts(texts, embedding_model)

In [16]:
llm_pipeline = pipeline("text2text-generation", model="meta-llama/Llama-3.2-1B-Instruct", model_kwargs={"temperature": 0.2})
llm = HuggingFacePipeline(pipeline=llm_pipeline)

Device set to use cpu
The model 'LlamaForCausalLM' is not supported for text2text-generation. Supported models are ['PeftModelForSeq2SeqLM', 'BartForConditionalGeneration', 'BigBirdPegasusForConditionalGeneration', 'BlenderbotForConditionalGeneration', 'BlenderbotSmallForConditionalGeneration', 'EncoderDecoderModel', 'FSMTForConditionalGeneration', 'GPTSanJapaneseForConditionalGeneration', 'GraniteSpeechForConditionalGeneration', 'LEDForConditionalGeneration', 'LongT5ForConditionalGeneration', 'M2M100ForConditionalGeneration', 'MarianMTModel', 'MBartForConditionalGeneration', 'MT5ForConditionalGeneration', 'MvpForConditionalGeneration', 'NllbMoeForConditionalGeneration', 'PegasusForConditionalGeneration', 'PegasusXForConditionalGeneration', 'PLBartForConditionalGeneration', 'ProphetNetForConditionalGeneration', 'Qwen2AudioForConditionalGeneration', 'SeamlessM4TForTextToText', 'SeamlessM4Tv2ForTextToText', 'SwitchTransformersForConditionalGeneration', 'T5ForConditionalGeneration', 'T5Ge

In [8]:
# Define a plain string prompt for text2text generation
prompt = PromptTemplate.from_template(
    "Answer the question using only the context provided.\n"
    "If the answer is not in the context, say 'I don't know'.\n\n"
    "Context:\n{context}\n\nQuestion:\n{input}\n\nAnswer:"
)

question_answer_chain = create_stuff_documents_chain(llm, prompt)

In [9]:
chain = create_retrieval_chain(vector_store.as_retriever(), question_answer_chain)
query = "What are some of the recent applications of GPU-powered deep learning as mentioned by NVIDIA?"
answer = chain.invoke({"input": query})
print("Respuesta:", answer["answer"].split("Answer:")[1])
print("Contexto:", answer["context"])

Setting `pad_token_id` to `eos_token_id`:128001 for open-end generation.


Respuesta:  
- Recommendation systems
- Large language models
- Generative AI
Contexto: [Document(id='b49dee5b-7906-4902-b221-9e720e0dcf96', metadata={}, page_content='Some of the most recent applications of GPU-powered deep learning include recommendation systems, which are AI algorithms trained to understand the preferences, previous decisions, and characteristics of people and products using data gathered about their interactions, large language models, which can recognize, summarize, translate, predict and generate text and other content based on knowledge gained from massive datasets, and generative AI, which uses algorithms that create new content, including audio, code, images, text, simulations, and videos, based on the data they have been trained on.'), Document(id='440d2831-b7ab-4166-958e-1e0134a4c84f', metadata={}, page_content='We provide a complete, end-to-end accelerated computing platform for deep learning and machine learning, addressing both training and inferencing. T

# LangChain chains

In [18]:
import os
from getpass import getpass

# Use getpass to securely prompt for the API key
os.environ["OPENAI_API_KEY"] = getpass("Enter your OpenAI API key: ")

Enter your OpenAI API key: ··········


In [20]:
!pip install -U langchain-openai



In [21]:
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.2)

new_chain = (
    {"context": vector_store.as_retriever(),  "input": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

print(new_chain.invoke(query))

Recommendation systems, large language models, and generative AI.


# RAGAS

In [22]:
question_answer_chain = create_stuff_documents_chain(llm, prompt)
chain = create_retrieval_chain(vector_store.as_retriever(), question_answer_chain)
retriever = vector_store.as_retriever()


samples = []
gt_contexts = []
retrieved_context_lists = []
for i, row in df.iterrows():
    question = row["question"]
    gt_answer = row["answer"]
    gt_context = row["context"]
    result = chain.invoke({"input": question})

    retrieved_docs = retriever.get_relevant_documents(question)
    contexts = [doc.page_content for doc in retrieved_docs]

    # Save for evaluation
    gt_contexts.append(gt_context.strip())
    retrieved_context_lists.append(contexts)


    samples.append({
        "question": question,
        "answer": result["answer"] if isinstance(result, dict) else result,
        "contexts": contexts,
        "ground_truth": gt_answer,
    })

    if i >= 2:  # Evaluate on a subset for speed
        break

# Convert to HuggingFace Dataset
dataset = Dataset.from_list(samples)

# Evaluate with RAGAS
ragas_results = evaluate(dataset, metrics=[faithfulness, context_precision, answer_relevancy])
print(ragas_results)

  retrieved_docs = retriever.get_relevant_documents(question)


Evaluating:   0%|          | 0/9 [00:00<?, ?it/s]

{'faithfulness': 1.0000, 'context_precision': 0.8056, 'answer_relevancy': 0.8251}


In [23]:
def has_overlap(gt, retrieved):
    return any(gt in ctx for ctx in retrieved)

retriever_hits = [has_overlap(gt, retrieved) for gt, retrieved in zip(gt_contexts, retrieved_context_lists)]
retriever_recall_at_k = sum(retriever_hits) / len(retriever_hits)

print(f"Retriever Recall@k: {retriever_recall_at_k:.2%}")

Retriever Recall@k: 100.00%


# Resources

In [None]:
# https://www.youtube.com/watch?v=sVcwVQRHIc8
# https://arxiv.org/pdf/2409.13731
# https://arxiv.org/html/2412.15605v1
# https://arxiv.org/pdf/2410.05779
# https://arxiv.org/pdf/2502.14902
# https://arxiv.org/pdf/2312.10997