### Vector Stores and Retrievers

In [1]:
# Load env data
import os
from dotenv import load_dotenv
load_dotenv()

# Access API key
groq_api_key = os.getenv("GROQ_API_KEY")

os.environ["HF_TOKEN"] = os.getenv("HF_TOKEN")

In [2]:
# Create model
from langchain_groq import ChatGroq

model = ChatGroq(model="llama3-8b-8192", groq_api_key=groq_api_key)
model

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x7d3aa4ff8320>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x7d3aa4ff8e30>, model_name='llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [4]:
# Create embedding
from langchain_huggingface import HuggingFaceEmbeddings

embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
embeddings

HuggingFaceEmbeddings(model_name='all-MiniLM-L6-v2', cache_folder=None, model_kwargs={}, encode_kwargs={}, multi_process=False, show_progress=False)

In [6]:
# Documents
from langchain_core.documents import Document

documents = [
    Document(
        page_content="Dogs are great companions, known for their loyalty and friendliness.",
        metadata={"type": "dog", "trait": "loyalty"},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={"type": "cat", "trait": "independence"},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={"type": "fish", "trait": "low maintenance"},
    ),
    Document(
        page_content="Parrots are intelligent birds capable of mimicking human speech.",
        metadata={"type": "bird", "trait": "intelligence"},
    ),
    Document(
        page_content="Rabbits are social animals that need plenty of space to hop around.",
        metadata={"type": "rabbit", "trait": "social"},
    ),
]
documents

[Document(metadata={'type': 'dog', 'trait': 'loyalty'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'type': 'cat', 'trait': 'independence'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'type': 'fish', 'trait': 'low maintenance'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'type': 'bird', 'trait': 'intelligence'}, page_content='Parrots are intelligent birds capable of mimicking human speech.'),
 Document(metadata={'type': 'rabbit', 'trait': 'social'}, page_content='Rabbits are social animals that need plenty of space to hop around.')]

In [7]:
# Create vector store
from langchain_chroma import Chroma

chromaDB = Chroma.from_documents(documents=documents, embedding=embeddings)
chromaDB

<langchain_chroma.vectorstores.Chroma at 0x7d397a882420>

In [9]:
# Similarity search - Query
chromaDB.similarity_search("Cats are independent pets")

[Document(metadata={'trait': 'independence', 'type': 'cat'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'trait': 'loyalty', 'type': 'dog'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'trait': 'social', 'type': 'rabbit'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(metadata={'trait': 'intelligence', 'type': 'bird'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

In [10]:
# Async query
await chromaDB.asimilarity_search("Cats are independent pets")

[Document(metadata={'trait': 'independence', 'type': 'cat'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'trait': 'loyalty', 'type': 'dog'}, page_content='Dogs are great companions, known for their loyalty and friendliness.'),
 Document(metadata={'trait': 'social', 'type': 'rabbit'}, page_content='Rabbits are social animals that need plenty of space to hop around.'),
 Document(metadata={'trait': 'intelligence', 'type': 'bird'}, page_content='Parrots are intelligent birds capable of mimicking human speech.')]

### Retrievers

In [14]:
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(chromaDB.similarity_search).bind(k=1)
retriever.batch(['dogs', 'cats'])

[[Document(metadata={'trait': 'loyalty', 'type': 'dog'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')],
 [Document(metadata={'trait': 'independence', 'type': 'cat'}, page_content='Cats are independent pets that often enjoy their own space.')]]

In [15]:
retriever = chromaDB.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)

retriever.batch(['dogs', 'cats'])

[[Document(metadata={'trait': 'loyalty', 'type': 'dog'}, page_content='Dogs are great companions, known for their loyalty and friendliness.')],
 [Document(metadata={'trait': 'independence', 'type': 'cat'}, page_content='Cats are independent pets that often enjoy their own space.')]]

In [17]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
    Answer this question using the provided context only.
    {question}
    
    Context:
    {context}

"""

prompt = ChatPromptTemplate.from_messages([("human", message)])

reg_chain = {"context":retriever, "question": RunnablePassthrough()} | prompt | model

response = reg_chain.invoke("Tell me about dogs")
response.content

'According to the provided context, dogs are great companions, known for their loyalty and friendliness.'

In [19]:
response = reg_chain.invoke("Tell me about Parrots")
response.content

'According to the provided context, Parrots are intelligent birds capable of mimicking human speech.'