Documents

LangChain implements a document abstraction, which is intended to representa unit of text and associated metadata.
* page_content: a string representing a content.
* metadata: a dict containing arbitrary metadata. The metadata attribute can capture information about the source of document, it's relationship to other documents, and other information.

In [5]:
import os
from dotenv import load_dotenv
load_dotenv()

groq_api_key= os.getenv("GROQ_API_KEY")
os.environ['HF_TOKEN'] = os.getenv("HF_TOKEN")

In [6]:
from langchain_groq import ChatGroq
model = ChatGroq(model="llama-3.1-8b-instant", api_key=groq_api_key)
model

  from .autonotebook import tqdm as notebook_tqdm


ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x0000029083784490>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000290837843A0>, model_name='llama-3.1-8b-instant', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [15]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings

HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, query_encode_kwargs={}, multi_process=False, show_progress=False)

In [16]:
from langchain_core.documents import Document

documents = [
    Document(page_content="Dogs are great companions, known for their loyalty and friendliness",
    metadata={"source": "mammal-pets-doc"}),
    Document(page_content="Cats are independent pets that often enjoy their own space.",
    metadata={"source": "mammal-pets-doc"}),
    Document(page_content="Goldfish are popular pets for beginners, requiring relatively simple care",
    metadata={"source":"fish-pets-doc"}),
    Document(page_content="Parrots are intelligent birds capable of mimicking human speech.",
    metadata={"source": "bird-pets-doc"}),
    Document(page_content="Rabbits are social animals that need plenty of space to hop around.",
    metadata={"source": "mammal-pets-doc"})
    ]

In [17]:
#Vectorstore

from langchain_chroma import Chroma
vectorstore= Chroma.from_documents(documents, embeddings)
vectorstore


<langchain_chroma.vectorstores.Chroma at 0x290a9567eb0>

In [18]:
vectorstore.similarity_search(query="dog")

[Document(id='eacf7ad7-03fe-4a53-91ee-df58e4772791', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness'),
 Document(id='cc777efb-e913-40b9-ae77-8d0a158df775', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='c9d7b294-c97d-48d9-b71b-a8ff34c7ce7f', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care'),
 Document(id='a49c028c-f27c-43d1-a56c-1579d9d1ebb7', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

In [19]:
vectorstore.similarity_search_with_score(query="dog")

[(Document(id='eacf7ad7-03fe-4a53-91ee-df58e4772791', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness'),
  1.1108397245407104),
 (Document(id='cc777efb-e913-40b9-ae77-8d0a158df775', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  1.5269932746887207),
 (Document(id='c9d7b294-c97d-48d9-b71b-a8ff34c7ce7f', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care'),
  1.6085363626480103),
 (Document(id='a49c028c-f27c-43d1-a56c-1579d9d1ebb7', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
  1.6842743158340454)]

In [21]:
#async function-parallel processing, sync function-one step after another

await vectorstore.asimilarity_search(query="dog")

[Document(id='eacf7ad7-03fe-4a53-91ee-df58e4772791', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness'),
 Document(id='cc777efb-e913-40b9-ae77-8d0a158df775', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='c9d7b294-c97d-48d9-b71b-a8ff34c7ce7f', metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care'),
 Document(id='a49c028c-f27c-43d1-a56c-1579d9d1ebb7', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

Retrievers

LangChain vectorstore objects do not subclass Runnable, and so cannot immediately be integrated into LCEL.

LangChain Retrievers are Runnables, so they implement a standard set of methods(e.g., synchronous and asynchronous invoke and batch operations) and are designed to be incorportaed in LCEL chains.


In [25]:
#runnable around function without subclassing retrievers

from langchain_core.runnables import RunnableLambda

runnable = RunnableLambda(vectorstore.similarity_search).bind(k=1)
runnable.batch(['dog', 'cat'])

[[Document(id='eacf7ad7-03fe-4a53-91ee-df58e4772791', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness')],
 [Document(id='cc777efb-e913-40b9-ae77-8d0a158df775', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')]]

Vectorstore implements as_retriever method will generate a retriever. These retrievers include specific search_type and search_kwargs attributes that identify what methods of underlying vectorstore to call, and how to parameterize them.

In [26]:
retriever = vectorstore.as_retriever(search_type="similarity", serach_kwargs={"k":1})

retriever.batch(["cat", "dog"])


[[Document(id='cc777efb-e913-40b9-ae77-8d0a158df775', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  Document(id='eacf7ad7-03fe-4a53-91ee-df58e4772791', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness'),
  Document(id='a49c028c-f27c-43d1-a56c-1579d9d1ebb7', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
  Document(id='c5a14104-839c-47a8-bd53-c33d40da5573', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')],
 [Document(id='eacf7ad7-03fe-4a53-91ee-df58e4772791', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness'),
  Document(id='cc777efb-e913-40b9-ae77-8d0a158df775', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are 

In [30]:
#Simple RAG Model

from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

prompt = ChatPromptTemplate.from_template('''
Answer this question {question} using provided context information.
<context>
{context}
</context>
''')



In [31]:
rag_chain = {"context":retriever, "question": RunnablePassthrough()}| prompt | model
rag_chain.invoke("tell me something about rabbits")

AIMessage(content='Based on the provided context information, we can deduce that rabbits are social animals that require a lot of space to hop around. This suggests that rabbits are active and need room to move around freely, making them a good fit for owners who have a large area available for their pets.', additional_kwargs={}, response_metadata={'token_usage': {'completion_tokens': 58, 'prompt_tokens': 276, 'total_tokens': 334, 'completion_time': 0.119250257, 'prompt_time': 0.019212119, 'queue_time': 0.051085111, 'total_time': 0.138462376}, 'model_name': 'llama-3.1-8b-instant', 'system_fingerprint': 'fp_e750f72ec9', 'service_tier': 'on_demand', 'finish_reason': 'stop', 'logprobs': None, 'model_provider': 'groq'}, id='lc_run--9986ddc2-278d-400a-ae77-898f21a87233-0', usage_metadata={'input_tokens': 276, 'output_tokens': 58, 'total_tokens': 334})