In [2]:
from langchain_core.documents import Document

# Create multiple Document instances
documents = [
    Document(page_content="Dogs are great champions.", metadata={"source": "mammal-pet-doc"}),
    Document(page_content="Cats are curious creatures.", metadata={"source": "mammal-pet-doc"}),
    Document(page_content="Elephants are the largest land animals.", metadata={"source": "wildlife-doc"}),
    Document(page_content="Whales are the giants of the ocean.", metadata={"source": "marine-life-doc"}),
    Document(page_content="Birds have feathers and can often fly.", metadata={"source": "avian-doc"})
]

# Print to verify
for doc in documents:
    print(doc)


page_content='Dogs are great champions.' metadata={'source': 'mammal-pet-doc'}
page_content='Cats are curious creatures.' metadata={'source': 'mammal-pet-doc'}
page_content='Elephants are the largest land animals.' metadata={'source': 'wildlife-doc'}
page_content='Whales are the giants of the ocean.' metadata={'source': 'marine-life-doc'}
page_content='Birds have feathers and can often fly.' metadata={'source': 'avian-doc'}


In [3]:
#Vector stores
from langchain_chroma import Chroma
import os
from langchain_groq import ChatGroq
from dotenv import load_dotenv
load_dotenv()

groq_api_key=os.getenv('GROQ_API_KEY')


In [4]:
llm=ChatGroq(groq_api_key=groq_api_key,model='Llama3-8b-8192')
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000236BE6D5090>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000236BE6D5D20>, model_name='Llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [5]:
#To store the data in the vectorestore we need to convert to embeddings
from langchain_community.embeddings import OllamaEmbeddings
embedding=OllamaEmbeddings(model="gemma:2b")


  embedding=OllamaEmbeddings(model="gemma:2b")


In [6]:
from langchain_chroma import Chroma
from langchain_community.vectorstores import FAISS
db=FAISS.from_documents(documents,embedding)
db
# #All documents get converted to vectore by considering embedding 
# vectorstore=Chroma.from_documents(documents,embedding=embedding)

<langchain_community.vectorstores.faiss.FAISS at 0x236fe9a65f0>

In [7]:
db.similarity_search('cat')

[Document(id='20ec3f40-50f2-4f7f-ab14-8d4850363bad', metadata={'source': 'mammal-pet-doc'}, page_content='Cats are curious creatures.'),
 Document(id='3ac4c7a5-5b86-4f97-be75-f0f8f0f71ef0', metadata={'source': 'mammal-pet-doc'}, page_content='Dogs are great champions.'),
 Document(id='4d60b5a0-65b1-4f0e-8647-0090ae806a77', metadata={'source': 'avian-doc'}, page_content='Birds have feathers and can often fly.'),
 Document(id='1ffcf363-7736-4f9b-b822-15c73da9d66e', metadata={'source': 'wildlife-doc'}, page_content='Elephants are the largest land animals.')]

In [8]:
db.similarity_search_with_score('cat')

[(Document(id='20ec3f40-50f2-4f7f-ab14-8d4850363bad', metadata={'source': 'mammal-pet-doc'}, page_content='Cats are curious creatures.'),
  4927.949),
 (Document(id='3ac4c7a5-5b86-4f97-be75-f0f8f0f71ef0', metadata={'source': 'mammal-pet-doc'}, page_content='Dogs are great champions.'),
  4935.481),
 (Document(id='4d60b5a0-65b1-4f0e-8647-0090ae806a77', metadata={'source': 'avian-doc'}, page_content='Birds have feathers and can often fly.'),
  5321.375),
 (Document(id='1ffcf363-7736-4f9b-b822-15c73da9d66e', metadata={'source': 'wildlife-doc'}, page_content='Elephants are the largest land animals.'),
  5337.596)]

In [9]:
#Retriever
#Vetorstore cannot be directly integrated with chain operations hence we need to convert to retriver 

retriever=db.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)

retriever.batch(['cat','dog'])

[[Document(id='20ec3f40-50f2-4f7f-ab14-8d4850363bad', metadata={'source': 'mammal-pet-doc'}, page_content='Cats are curious creatures.')],
 [Document(id='3ac4c7a5-5b86-4f97-be75-f0f8f0f71ef0', metadata={'source': 'mammal-pet-doc'}, page_content='Dogs are great champions.')]]

In [10]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough


message="""
Answer this question using provded context only 
{question}

Context:
{context}
"""


prompt=ChatPromptTemplate.from_messages([("human",message)])

#Using retriever in chain
rag_chain={"context":retriever,"question":RunnablePassthrough()} |prompt|llm


rag_chain.invoke("tell me about dogs")

AIMessage(content='According to the provided context, dogs are great champions.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 12, 'prompt_tokens': 80, 'total_tokens': 92, 'completion_time': 0.01, 'prompt_time': 0.015236715, 'queue_time': 0.05841866800000001, 'total_time': 0.025236715}, 'model_name': 'Llama3-8b-8192', 'system_fingerprint': 'fp_6a6771ae9c', 'finish_reason': 'stop', 'logprobs': None}, id='run-96272faa-2793-4c58-8ee9-34e636e0222c-0', usage_metadata={'input_tokens': 80, 'output_tokens': 12, 'total_tokens': 92})