In [1]:
import os
from IPython.display import Markdown
from dotenv import load_dotenv
load_dotenv(r'D:\AI ML\aimlvenv\Scripts\active')
groq_api_key = os.getenv('GROQ_API_KEY')

In [2]:
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"source": "mammal-pets-doc"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"source": "fish-pets-doc"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"source": "bird-pets-doc"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"source": "mammal-pets-doc"},
    ),
]

In [3]:
from langchain_groq import ChatGroq

llm = ChatGroq(
    model='openai/gpt-oss-120b',
    api_key=groq_api_key
)

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
from langchain_huggingface.embeddings import HuggingFaceEmbeddings

embedding = HuggingFaceEmbeddings(
    model_name = "sentence-transformers/all-MiniLM-L6-v2"
)




In [5]:
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(documents,embedding)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x2ae9b7692b0>

In [6]:
vectorstore.similarity_search('cat')

[Document(id='91e8fea4-fd4f-432d-b6f5-41c68b113c5a', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='9f93b80d-31b2-443f-9e99-2522e7f32254', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='baedbeff-5231-461c-96f5-d33b3348a2a4', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='b43c6824-259f-41b0-a5fe-3ba4ddf6d356', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

In [7]:
## Async query

await vectorstore.asimilarity_search('Cat')

[Document(id='91e8fea4-fd4f-432d-b6f5-41c68b113c5a', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(id='9f93b80d-31b2-443f-9e99-2522e7f32254', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(id='baedbeff-5231-461c-96f5-d33b3348a2a4', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(id='b43c6824-259f-41b0-a5fe-3ba4ddf6d356', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

In [8]:
vectorstore.similarity_search_with_score('Cat')

[(Document(id='91e8fea4-fd4f-432d-b6f5-41c68b113c5a', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  0.9351053833961487),
 (Document(id='9f93b80d-31b2-443f-9e99-2522e7f32254', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
  1.5740896463394165),
 (Document(id='baedbeff-5231-461c-96f5-d33b3348a2a4', metadata={'source': 'mammal-pets-doc'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
  1.5956898927688599),
 (Document(id='b43c6824-259f-41b0-a5fe-3ba4ddf6d356', metadata={'source': 'bird-pets-doc'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
  1.6657921075820923)]

### Retrievers
LangChain VectorStore objects do not subclass Runnable, and so cannot immediately be integrated into LangChain Expression Language chains.

LangChain Retrievers are Runnables, so they implement a standard set of methods (e.g., synchronous and asynchronous invoke and batch operations) and are designed to be incorporated in LCEL chains.

We can create a simple version of this ourselves, without subclassing Retriever. If we choose what method we wish to use to retrieve documents, we can create a runnable easily. Below we will build one around the similarity_search method:

In [9]:
from typing import List
from langchain_core.documents import Document

from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)

retriever.batch(['Dog','Cat'])

[[Document(id='9f93b80d-31b2-443f-9e99-2522e7f32254', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')],
 [Document(id='91e8fea4-fd4f-432d-b6f5-41c68b113c5a', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')]]

Vectorstores implement an as_retriever method that will generate a Retriever, specifically a VectorStoreRetriever. These retrievers include specific search_type and search_kwargs attributes that identify what methods of the underlying vector store to call, and how to parameterize them. For instance, we can replicate the above with the following:

In [10]:
retriever = vectorstore.as_retriever(
    search_type = 'similarity',
    search_kwargs = {'k':1}
)

In [11]:
retriever.batch(['Cat','Dog'])

[[Document(id='91e8fea4-fd4f-432d-b6f5-41c68b113c5a', metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(id='9f93b80d-31b2-443f-9e99-2522e7f32254', metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')]]

In [13]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = '''
Answer this question using the provided context only

{question}

Context : 
{context}
'''


prompt = ChatPromptTemplate.from_messages([('human',message)])

rag_chain = {'context': retriever,'question': RunnablePassthrough()} | prompt | llm

rag_chain.invoke('Tell me about doges')

AIMessage(content='Based on the provided context, dogs are great companions, known for their loyalty and friendliness.', additional_kwargs={'reasoning_content': 'The user asks: "Answer this question using the provided context only. Tell me about doges". The context is a document about dogs (not doges). There\'s a typo: "doges" maybe means "dogs". We must answer using only the provided context. The context says: "Dogs are great companions, known for their loyalty and friendliness." So we can answer about dogs (or doges) using that sentence. Must not add external info. So answer: Dogs are great companions, known for loyalty and friendliness. Probably mention that\'s all we know from the context.'}, response_metadata={'token_usage': {'completion_tokens': 144, 'prompt_tokens': 148, 'total_tokens': 292, 'completion_time': 0.33761344, 'prompt_time': 0.009077922, 'queue_time': 0.099397827, 'total_time': 0.346691362, 'completion_tokens_details': {'reasoning_tokens': 117}}, 'model_name': 'opena