In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")

os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [3]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('../llama2.pdf')

pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [4]:
len(pages)

77

In [5]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

chunks = splitter.split_documents(pages)
len(chunks)

615

In [6]:
chunks[:3]

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '../llama2.pdf', 'total_pages': 77, 'page': 0, 'page_label': '1'}, page_content='Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\nCynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\nHakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev'),
 Document(metadata={'producer': 'pdfTeX-1.

In [7]:
from langchain_openai import OpenAIEmbeddings

embeddings = OpenAIEmbeddings(model="text-embedding-3-small")
vec_hello_world = embeddings.embed_query("Hello, world!")

len(vec_hello_world)

1536

In [8]:
import faiss
from langchain_community.vectorstores import FAISS
from langchain_community.docstore.in_memory import InMemoryDocstore

In [9]:
index = faiss.IndexFlatIP(1536)

vector_store = FAISS(
    embedding_function=embeddings,
    index=index,
    docstore=InMemoryDocstore(),
    index_to_docstore_id={},
)

In [10]:
vector_store.add_documents(documents=chunks)

['415bb344-fd28-4825-9cc4-cf2d3e56aed0',
 '703c89f7-62c8-4a20-8c53-d74c01f4c0ea',
 'ff10c0e1-d832-4a72-8b66-8d160d2979d7',
 '5edeb411-1136-4d1a-85b1-793bfc014797',
 '1073a61d-64b5-4fcf-a0de-baf3e526176b',
 'e157d830-88a1-4980-b4ed-5556ec0e56f5',
 '7b7c9363-847a-4d30-b3ba-f80ca95800a3',
 '2c486e18-a620-400b-8484-daa3c63e9806',
 'a297e746-082d-4bda-8c0f-b223a11d7e65',
 '158b0f5a-25a4-4f72-983b-8be125b31b52',
 'c412d3ee-acc6-416f-918f-9ae2b7647973',
 '93721ffb-d642-4a1b-8097-c0e312cbf016',
 '3f4dd04d-415a-48c6-90ec-e8e7a7ad75aa',
 'ffc5e140-39f9-4d86-8583-94fd49756307',
 '405cdaf4-8381-4f18-8100-965c25e1c1fb',
 'c52dfd57-55a5-4566-848c-ac6fef0b8369',
 '47cd5501-193d-456a-a432-3674d14c9074',
 '572cdb8d-fb49-480c-8179-ffa05d8b17b3',
 '1b3e037c-b465-461d-8f9d-287fb05f0fea',
 'cea72f36-416d-40ec-8f7b-fa89b4152e25',
 '51c782cd-3a57-4711-9e37-d0d54d708fe6',
 '11022b62-0140-4be1-b662-1244b348f2d9',
 'aadb94cd-3611-4b0d-aff2-337cbe81f705',
 'e64c802b-8842-4e29-a04c-8b9587c78e74',
 '4a814425-1e9e-

In [11]:
retriever = vector_store.as_retriever(
    search_kwargs = {"k": 4}
)

In [12]:
retriever.invoke("what is llama model?")

[Document(id='47076a17-d2c0-4d12-a04b-afd660659175', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '../llama2.pdf', 'total_pages': 77, 'page': 76, 'page_label': '77'}, page_content='specific applications of the model. Please see the Responsible Use Guide available available at\nhttps://ai.meta.com/llama/responsible-user-guide\nTable 52: Model card forLlama 2.\n77'),
 Document(id='3399bef2-ab17-4171-ae06-81640146cb35', metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 

In [13]:
vector_store.save_local("../faiss_index_llama")

In [14]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model='gemini-1.5-flash')
llm

ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x00000216EDBE78C0>, default_metadata=())

In [15]:
from langchain import hub

prompt = hub.pull("rlm/rag-prompt")

In [16]:
import pprint

pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


In [17]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [18]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [19]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain

{
  context: VectorStoreRetriever(tags=['FAISS', 'OpenAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x00000216918D79E0>, search_kwargs={'k': 4})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, metadata={'lc_hub_owner': 'rlm', 'lc_hub_repo': 'rag-prompt', 'lc_hub_commit_hash': '50442af133e61576e74536c6556cefe1fac147cad032f4377b60c436e6cdcb6e'}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})])
| ChatGoogleGenerati

In [20]:
response = rag_chain.invoke("what is llama model?")
print(response)

Llama 2 is a large language model from Meta, intended for commercial and research use in English.  It comes in pretrained and tuned versions; the tuned models are designed for chat, while pretrained models are adaptable to various natural language tasks.  More information, including a responsible use guide, is available on Meta's website.


In [21]:
response = rag_chain.invoke("How was the LLaMA model trained?")
print(response)

LLaMA 2 was initially pre-trained using publicly available online sources.  Then, supervised fine-tuning created an initial version of LLaMA 2-Chat, which was further refined using Reinforcement Learning with Human Feedback (RLHF).  This involved iterative refinement through rejection sampling and Proximal Policy Optimization.
