In [1]:
import os
from dotenv import load_dotenv

In [2]:
load_dotenv()

os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
os.environ["OPENAI_API_KEY"] = os.getenv("OPENAI_API_KEY")
os.environ["ANTHROPIC_API_KEY"] = os.getenv("ANTHROPIC_API_KEY")
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["PINECONE_API_KEY"] = os.getenv("PINECONE_API_KEY")

os.environ["LANGCHAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_PROJECT"] = os.getenv("LANGCHAIN_PROJECT")
os.environ["LANGCHAIN_TRACING_V2"] = "true"

In [3]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(
    model='models/embedding-001'
)

In [4]:
vec = embeddings.embed_query("Hello, world!")
len(vec)

768

In [5]:
from pinecone import Pinecone

pc = Pinecone()
pc

<pinecone.pinecone.Pinecone at 0x1f1210d4800>

In [6]:
from pinecone import ServerlessSpec
# Serverless: Server will be Managed by the cloud provider

In [10]:
index_name = "rag-llama"

In [11]:
pc.has_index(index_name)

False

In [12]:
# creating an index

if not pc.has_index(index_name):
    pc.create_index(
    name=index_name,
    dimension=768,
    metric="cosine",
    spec=ServerlessSpec(cloud="aws",region="us-east-1")    
)

In [13]:
# loading the index
index = pc.Index(index_name)
index

<pinecone.db_data.index.Index at 0x1f121655e20>

In [14]:
from langchain_pinecone import PineconeVectorStore

In [16]:
vector_store = PineconeVectorStore(
    index = index,
    embedding = embeddings
)

vector_store

<langchain_pinecone.vectorstores.PineconeVectorStore at 0x1f1216556a0>

In [17]:
vector_store.similarity_search('what is a langchain?')

[]

In [18]:
from langchain_community.document_loaders import PyPDFLoader

loader = PyPDFLoader('../llama2.pdf')

pages = []
async for page in loader.alazy_load():
    pages.append(page)

In [19]:
from langchain_text_splitters import RecursiveCharacterTextSplitter

splitter = RecursiveCharacterTextSplitter(
    chunk_size = 500,
    chunk_overlap = 50
)

chunks = splitter.split_documents(pages)
len(chunks)

615

In [20]:
chunks[:3]

[Document(metadata={'producer': 'pdfTeX-1.40.25', 'creator': 'LaTeX with hyperref', 'creationdate': '2023-07-20T00:30:36+00:00', 'author': '', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'subject': '', 'title': '', 'trapped': '/False', 'source': '../llama2.pdf', 'total_pages': 77, 'page': 0, 'page_label': '1'}, page_content='Llama 2: Open Foundation and Fine-Tuned Chat Models\nHugo Touvron∗ Louis Martin† Kevin Stone†\nPeter Albert Amjad Almahairi Yasmine Babaei Nikolay Bashlykov Soumya Batra\nPrajjwal Bhargava Shruti Bhosale Dan Bikel Lukas Blecher Cristian Canton Ferrer Moya Chen\nGuillem Cucurull David Esiobu Jude Fernandes Jeremy Fu Wenyin Fu Brian Fuller\nCynthia Gao Vedanuj Goswami Naman Goyal Anthony Hartshorn Saghar Hosseini Rui Hou\nHakan Inan Marcin Kardas Viktor Kerkez Madian Khabsa Isabel Kloumann Artem Korenev'),
 Document(metadata={'producer': 'pdfTeX-1.

In [21]:
vector_store.add_documents(documents=chunks)

['fd877dd5-5482-45c9-afea-569cc355a201',
 '8e376529-7c3c-4e05-87b8-c9ab3ac56d1f',
 '477bb699-d69b-41f4-ad70-430fa9b23667',
 '9c40fcff-4df3-4a72-9b29-0f95c2d4b3c3',
 '1081a383-aa76-4c60-ab4c-d7fc5313b283',
 '158bea07-bb0f-4451-b340-9943b0b45f24',
 'dfebe400-edae-4512-91a3-e17119449b38',
 '38850707-1242-4905-b676-29d4416b0240',
 'c36be177-1df1-4f04-b866-e6d6f3db166d',
 '5d691b65-e1e5-4db7-b8a0-10f4401ee105',
 '125e24d3-298c-4d42-ad80-9044f3d47b69',
 'fa6750f0-e2d8-479b-9b1c-06ea391f9bd4',
 'bbba1136-5261-42bf-b48f-2424857d16d0',
 '245b8d20-b202-4f46-9b76-f6e88f273b1f',
 '2c70392a-ad16-4879-abdf-ba1561fd0013',
 '779270e7-39d2-4324-9fcf-cfe7fc54f20e',
 '22fa4ba5-c8ca-4869-b4fc-b5387ee94eba',
 'b628c5a3-e8f3-43e4-9354-ef539a5738fc',
 'a12e7330-d31a-4b90-ba88-3c6ed345f686',
 'ac688f30-9a70-48a9-9afe-92493bd2c5a8',
 'e1a5275d-1d73-4586-adc4-4164d58f10f1',
 'edfc8aad-6270-44f6-b007-012ef9c33ba7',
 '65755aee-1251-4343-a45a-53c81648dfbc',
 '03bf4b59-f370-44b0-a14a-59eacc860ee3',
 '79867908-d759-

In [22]:
vector_store.similarity_search(
    "what langchain provides to us?",
    k=4
)

[Document(id='c3aff96a-b098-41e0-8285-52aec2764c33', metadata={'author': '', 'creationdate': '2023-07-20T00:30:36+00:00', 'creator': 'LaTeX with hyperref', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'page': 23.0, 'page_label': '24', 'producer': 'pdfTeX-1.40.25', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'source': '../llama2.pdf', 'subject': '', 'title': '', 'total_pages': 77.0, 'trapped': '/False'}, page_content='preference data to train a safety reward model (see Section 3.2.2), and also reuse the adversarial prompts to\nsample from the model during the RLHF stage.\nBetter Long-Tail Safety Robustness without Hurting HelpfulnessSafety is inherently a long-tail problem,\nwhere the challenge comes from a small number of very specific cases. We investigate the impact of Safety\nRLHFbytakingtwointermediate Llama 2-Chatcheckpoints—onewithoutadversarialpromptsintheRLHF'),
 Document(id='82f396fa-0132-4e21-a0fd-9c0

In [23]:
vector_store.similarity_search(
    "what langchain provides to us?",
    filter={"source": "tweet"}
)

[]

In [24]:
retriever = vector_store.as_retriever(
    search_type="similarity_score_threshold",
    search_kwargs={"score_threshold": 0.8} 
)

In [25]:
retriever.invoke("langchain")

[Document(id='c3aff96a-b098-41e0-8285-52aec2764c33', metadata={'author': '', 'creationdate': '2023-07-20T00:30:36+00:00', 'creator': 'LaTeX with hyperref', 'keywords': '', 'moddate': '2023-07-20T00:30:36+00:00', 'page': 23.0, 'page_label': '24', 'producer': 'pdfTeX-1.40.25', 'ptex.fullbanner': 'This is pdfTeX, Version 3.141592653-2.6-1.40.25 (TeX Live 2023) kpathsea version 6.3.5', 'source': '../llama2.pdf', 'subject': '', 'title': '', 'total_pages': 77.0, 'trapped': '/False'}, page_content='preference data to train a safety reward model (see Section 3.2.2), and also reuse the adversarial prompts to\nsample from the model during the RLHF stage.\nBetter Long-Tail Safety Robustness without Hurting HelpfulnessSafety is inherently a long-tail problem,\nwhere the challenge comes from a small number of very specific cases. We investigate the impact of Safety\nRLHFbytakingtwointermediate Llama 2-Chatcheckpoints—onewithoutadversarialpromptsintheRLHF'),
 Document(id='a825a973-1051-4e44-81cc-f1d

In [26]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(model='gemini-1.5-flash')
llm

ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001F1393F3E30>, default_metadata=())

In [27]:
from langchain import hub
prompt = hub.pull("rlm/rag-prompt")

In [28]:
import pprint
pprint.pprint(prompt.messages)

[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]


[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:"), additional_kwargs={})]

In [29]:
from langchain_core.prompts import PromptTemplate

prompt = PromptTemplate(
    template="""You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:""",
    input_variables=['context', 'question']
)

prompt

PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:")

In [30]:
prompt.invoke({"question":"what is a langchain?","context":"langchain is very super framework for LLM."})

StringPromptValue(text="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: what is a langchain? \nContext: langchain is very super framework for LLM. \nAnswer:")

StringPromptValue(text="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: what is a langchain? \nContext: langchain is very super framework for LLM. \nAnswer:")

In [31]:
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

In [32]:
def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

In [33]:
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)

rag_chain

{
  context: VectorStoreRetriever(tags=['PineconeVectorStore', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x000001F1216556A0>, search_type='similarity_score_threshold', search_kwargs={'score_threshold': 0.8})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:")
| ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001F1393F3E30>, default_metadata=

{
  context: VectorStoreRetriever(tags=['PineconeVectorStore', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_pinecone.vectorstores.PineconeVectorStore object at 0x000001F1216556A0>, search_type='similarity_score_threshold', search_kwargs={'score_threshold': 0.8})
           | RunnableLambda(format_docs),
  question: RunnablePassthrough()
}
| PromptTemplate(input_variables=['context', 'question'], input_types={}, partial_variables={}, template="You are an assistant for question-answering tasks. Use the following pieces of retrieved context to answer the question. If you don't know the answer, just say that you don't know. Use three sentences maximum and keep the answer concise.\nQuestion: {question} \nContext: {context} \nAnswer:")
| ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', google_api_key=SecretStr('**********'), client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001F1393F3E30>, default_metadata=())
| StrOutputParser()

In [34]:
rag_chain.invoke("what is llama model?")

"Llama 2 is a large language model developed by Meta AI.  It comes in various sizes (7B, 13B, and 70B parameters) and is available in both pretrained and fine-tuned versions.  It's an autoregressive language model using an optimized transformer architecture."

"Llama 2 is a large language model developed by Meta AI.  It comes in various sizes (7B, 13B, and 70B parameters) and is available in both pretrained and fine-tuned versions.  It's an autoregressive language model using an optimized transformer architecture."

In [35]:
rag_chain.invoke(
    """
        How do sequence truncation artifacts at the end of fixed-length contexts affect token-level loss distribution during LLM pretraining, 
        and what mitigation strategies can be employed to reduce the 'final token loss spike' bias?
    """
    )

'The provided text does not describe how sequence truncation artifacts affect token-level loss distribution during LLM pretraining or mitigation strategies for a "final token loss spike".  Therefore, I cannot answer your question.'

'The provided text does not describe how sequence truncation artifacts affect token-level loss distribution during LLM pretraining or mitigation strategies for a "final token loss spike".  Therefore, I cannot answer your question.'

# Assignment:

# first complete the remaining part of this notebook(create a proper rag)



# second assisgnment is: take a multiple pdf with text,image,table
1. fetch the data from pdf
2. at lesat there should be 200 pages
3. if chunking(use the sementic chunking technique) required do chunking and then embedding
4. store it inside the vector database(use any of them 1. mongodb 2. astradb 3. opensearch 4.milvus) ## i have not discuss then you need to explore
5. create a index with all three index machnism(Flat, HNSW, IVF) ## i have not discuss then you need to explore
6. create a retriever pipeline
7. check the retriever time(which one is fastet)
8. print the accuray score of every similarity search
9. perform the reranking either using BM25 or MMR ## i have not discuss then you need to explore
10. then write a prompt template
11. generte a oputput through llm
12. render that output over the DOCx ## i have not discuss then you need to explore
as a additional tip: you can follow rag playlist from my youtube

after completing it keep it on your github and share that link on my  mail id:
snshrivas3365@gmail.com

and share the assignment in your community chat as well by tagging krish and sunny

deadline is: till friday 9PM
   