In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
groq_api_key = os.getenv("GROQ_API_KEY")
ollama_api_key = os.getenv("OLLAMA_API_KEY")
from langchain_groq import ChatGroq
llm = ChatGroq(model_name = "gemma2-9b-It",groq_api_key = groq_api_key)

In [3]:
from langchain_core.documents import Document
document = [
    Document(
        page_content="The quick brown fox jumps over the lazy dog.",
        metadata={"source": "example.txt", "author": "John Doe"}
    ),
    Document(
        page_content="The rain in Spain stays mainly in the plain.",
        metadata={"source": "example2.txt", "author": "Jane Smith"}
    ),
    Document(
        page_content="A journey of a thousand miles begins with a single step.",
        metadata={"source": "example3.txt", "author": "Lao Tzu"}
    ),
    Document(
        page_content="To be or not to be, that is the question.",
        metadata={"source": "example4.txt", "author": "William Shakespeare"}
    ),
    Document(
        page_content="All that glitters is not gold.",
        metadata={"source": "example5.txt", "author": "William Shakespeare"}
    ),
    Document(
        page_content="The only thing we have to fear is fear itself.",
        metadata={"source": "example6.txt", "author": "Franklin D. Roosevelt"}
    ),

]


In [4]:
from langchain_chroma import Chroma
from langchain_ollama.embeddings import OllamaEmbeddings
embeddings = OllamaEmbeddings(model="mxbai-embed-large")
vectorstore = Chroma.from_documents(document,
    embedding= embeddings,persist_directory = "chroma_db")

In [5]:
vectorstore.similarity_search("glitters")

[Document(id='47ced894-affb-4ca5-a8dc-0c2718c72ae8', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
 Document(id='56dc260b-beee-4d15-9dad-561aceb63998', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
 Document(id='baa4c8e1-ab18-4e36-8a61-d03aaa5ea111', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
 Document(id='026ebb94-b572-4356-a94c-0f8f84204471', metadata={'author': 'William Shakespeare', 'source': 'example4.txt'}, page_content='To be or not to be, that is the question.')]

In [6]:
# async query
await vectorstore.asimilarity_search("glitters")

[Document(id='47ced894-affb-4ca5-a8dc-0c2718c72ae8', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
 Document(id='56dc260b-beee-4d15-9dad-561aceb63998', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
 Document(id='baa4c8e1-ab18-4e36-8a61-d03aaa5ea111', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
 Document(id='026ebb94-b572-4356-a94c-0f8f84204471', metadata={'author': 'William Shakespeare', 'source': 'example4.txt'}, page_content='To be or not to be, that is the question.')]

In [7]:
vectorstore.similarity_search_with_score("glitters")

[(Document(id='47ced894-affb-4ca5-a8dc-0c2718c72ae8', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
  0.5230611012463176),
 (Document(id='56dc260b-beee-4d15-9dad-561aceb63998', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
  0.5230611012463176),
 (Document(id='baa4c8e1-ab18-4e36-8a61-d03aaa5ea111', metadata={'author': 'William Shakespeare', 'source': 'example5.txt'}, page_content='All that glitters is not gold.'),
  0.5230611012463176),
 (Document(id='026ebb94-b572-4356-a94c-0f8f84204471', metadata={'author': 'William Shakespeare', 'source': 'example4.txt'}, page_content='To be or not to be, that is the question.'),
  1.1452874034660148)]

In [8]:
retriever = vectorstore.as_retriever(search_type="similarity",
        search_kwargs={'k': 1})

retriever.batch({"cat","dogs"})

[[Document(id='43654bc5-81c5-4522-9f6f-0925b80e18f1', metadata={'author': 'William Shakespeare', 'source': 'example4.txt'}, page_content='To be or not to be, that is the question.')],
 [Document(id='173c1d13-dcd5-4a20-a26c-785205c0ad39', metadata={'author': 'John Doe', 'source': 'example.txt'}, page_content='The quick brown fox jumps over the lazy dog.')]]

In [11]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough

message = """

Answer this question using the provided context only.
{question}

Context:
{context}

"""

prompt = ChatPromptTemplate.from_messages([("human",message)])

rag_chain = {"context" : retriever, "question" : RunnablePassthrough()}|prompt|llm

response = rag_chain.invoke("Tell me about dogs")
print(response)

content='The provided text only states that a lazy dog was jumped over by a quick brown fox.  \n' response_metadata={'token_usage': {'completion_tokens': 22, 'prompt_tokens': 97, 'total_tokens': 119, 'completion_time': 0.04, 'prompt_time': 0.002668807, 'queue_time': 0.250150987, 'total_time': 0.042668807}, 'model_name': 'gemma2-9b-It', 'system_fingerprint': 'fp_10c08bf97d', 'finish_reason': 'stop', 'logprobs': None} id='run-1f0f93c6-76b1-4717-adcf-b3409cbf4c6d-0' usage_metadata={'input_tokens': 97, 'output_tokens': 22, 'total_tokens': 119}
