In [1]:
from langchain_core.documents import Document

In [2]:
documents = [
    Document(
        page_content="Dogs are great companions known for their loyalty and friendliness.",
        metadata={'source' : 'mammal-pets-doc'},
    ),
    Document(
        page_content="Cats are independent pets that often enjoy their own space.",
        metadata={'source' : 'mammal-pets-doc'},
    ),
    Document(
        page_content="Goldfish are popular pets for beginners, requiring relatively simple care.",
        metadata={'source': 'fish-pets-doc'},
    ),
    Document(
        page_content="Parrots arw intelligent birds capable of mimicking human speech.",
        metadata={'source': 'birds-pets-docs'},
    ),
    Document(
        page_content="Rabits are social animals that need plenty of space to hop around.",
        metadata={'source' : 'mammal-pets-docs'}
    ),
]

In [3]:
documents

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'fish-pets-doc'}, page_content='Goldfish are popular pets for beginners, requiring relatively simple care.'),
 Document(metadata={'source': 'birds-pets-docs'}, page_content='Parrots arw intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-docs'}, page_content='Rabits are social animals that need plenty of space to hop around.')]

In [4]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq
groq_api_key = os.getenv("GROQ_API_KEY")
os.environ['HF_TOKEN'] = os.getenv('HF_TOKEN')

llm = ChatGroq(groq_api_key=groq_api_key, model='Llama3-8b-8192')
llm

ChatGroq(client=<groq.resources.chat.completions.Completions object at 0x00000114BD4717E0>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x00000114BD24CB50>, model_name='Llama3-8b-8192', model_kwargs={}, groq_api_key=SecretStr('**********'))

In [5]:
from langchain_huggingface import HuggingFaceEmbeddings
embeddings = HuggingFaceEmbeddings(model_name = "all-MiniLM-L6-v2")

  from .autonotebook import tqdm as notebook_tqdm


In [6]:
# Vectorstores
from langchain_chroma import Chroma

vectorstore = Chroma.from_documents(documents, embedding=embeddings)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x114be4e5c90>

In [7]:
vectorstore.similarity_search("cat")

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'birds-pets-docs'}, page_content='Parrots arw intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-docs'}, page_content='Rabits are social animals that need plenty of space to hop around.')]

In [9]:
# Async query
await vectorstore.asimilarity_search("cat")

[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
 Document(metadata={'source': 'birds-pets-docs'}, page_content='Parrots arw intelligent birds capable of mimicking human speech.'),
 Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions known for their loyalty and friendliness.'),
 Document(metadata={'source': 'mammal-pets-docs'}, page_content='Rabits are social animals that need plenty of space to hop around.')]

In [10]:
vectorstore.similarity_search_with_score("cat")

[(Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.'),
  0.9351057410240173),
 (Document(metadata={'source': 'birds-pets-docs'}, page_content='Parrots arw intelligent birds capable of mimicking human speech.'),
  1.5786668062210083),
 (Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions known for their loyalty and friendliness.'),
  1.5856294631958008),
 (Document(metadata={'source': 'mammal-pets-docs'}, page_content='Rabits are social animals that need plenty of space to hop around.'),
  1.6589118242263794)]

#### Retrievers

In [11]:
from typing import List

from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda

retriever = RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriever.batch(["cat", "dog"])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions known for their loyalty and friendliness.')]]

In [12]:
retriever = vectorstore.as_retriever(
    search_type = "similarity",
    search_kwargs = {"k" : 1}
)
retriever.batch(["cat", "dog"])

[[Document(metadata={'source': 'mammal-pets-doc'}, page_content='Cats are independent pets that often enjoy their own space.')],
 [Document(metadata={'source': 'mammal-pets-doc'}, page_content='Dogs are great companions known for their loyalty and friendliness.')]]

In [18]:
# RAG
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """
        Answer this question using the provided context only.
        {question}
        Context:
        {context}
        """
prompt = ChatPromptTemplate.from_messages([("human", message)])
rag_chain = {"context" : retriever, "question" : RunnablePassthrough()} | prompt | llm
response = rag_chain.invoke("Tell me about dogs")
print(response.content)

According to the provided context, dogs are great companions known for their loyalty and friendliness.
