## Vector store and retrievers


In [1]:
from langchain_core.documents import Document

documents=[
    Document(page_content="Python is a programming language that lets you work quickly and integrate systems more effectively.",metadata={"source":"programming_language.txt"}),
    Document(page_content="The sky is blue and beautiful.",metadata={"source":"sky.txt"}),
    Document(page_content="The grass is green",metadata={"source":"grass.txt"}),
    Document(page_content="I love programming in Python",metadata={"source":"love.txt"}),
    Document(page_content="I hate programming in Python",metadata={"source":"hate.txt"}),
]

documents

[Document(metadata={'source': 'programming_language.txt'}, page_content='Python is a programming language that lets you work quickly and integrate systems more effectively.'),
 Document(metadata={'source': 'sky.txt'}, page_content='The sky is blue and beautiful.'),
 Document(metadata={'source': 'grass.txt'}, page_content='The grass is green'),
 Document(metadata={'source': 'love.txt'}, page_content='I love programming in Python'),
 Document(metadata={'source': 'hate.txt'}, page_content='I hate programming in Python')]

In [2]:
import os
from dotenv import load_dotenv
load_dotenv()
from langchain_groq import ChatGroq

groq_api_key=os.getenv("GROQ_API_KEY")
os.environ["HF_TOKEN"]=os.getenv("HF_TOKEN")
llm=ChatGroq(
    model_name="openai/gpt-oss-20b",
    api_key=groq_api_key,
    temperature=0
)

llm

  from .autonotebook import tqdm as notebook_tqdm


ChatGroq(profile={'max_input_tokens': 131072, 'max_output_tokens': 32768, 'image_inputs': False, 'audio_inputs': False, 'video_inputs': False, 'image_outputs': False, 'audio_outputs': False, 'video_outputs': False, 'reasoning_output': True, 'tool_calling': True}, client=<groq.resources.chat.completions.Completions object at 0x000002CA48D8C220>, async_client=<groq.resources.chat.completions.AsyncCompletions object at 0x000002CA48D8C130>, model_name='openai/gpt-oss-20b', temperature=1e-08, model_kwargs={}, groq_api_key=SecretStr('**********'))

In [4]:
## Vector store
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings
embeddings=HuggingFaceEmbeddings(
    model_name="sentence-transformers/all-MiniLM-L6-v2",
    model_kwargs={"device":"cpu"}
)


In [5]:
vectorstore=Chroma.from_documents(documents,embeddings)
vectorstore

<langchain_chroma.vectorstores.Chroma at 0x2ca5a3c8490>

In [6]:
vectorstore.similarity_search("I love programming in Python")

[Document(id='03c71d36-410f-4d7f-9468-43ad1760f73b', metadata={'source': 'love.txt'}, page_content='I love programming in Python'),
 Document(id='41ed0b30-1c8f-4e0b-9b74-03d32285d10c', metadata={'source': 'hate.txt'}, page_content='I hate programming in Python'),
 Document(id='b537bf57-9821-4b5d-a308-d8830b38e941', metadata={'source': 'programming_language.txt'}, page_content='Python is a programming language that lets you work quickly and integrate systems more effectively.'),
 Document(id='2238b5cc-200d-4f40-a16e-d621f045fa12', metadata={'source': 'sky.txt'}, page_content='The sky is blue and beautiful.')]

In [7]:
## Async query
await vectorstore.asimilarity_search("I love programming in Python")

[Document(id='03c71d36-410f-4d7f-9468-43ad1760f73b', metadata={'source': 'love.txt'}, page_content='I love programming in Python'),
 Document(id='41ed0b30-1c8f-4e0b-9b74-03d32285d10c', metadata={'source': 'hate.txt'}, page_content='I hate programming in Python'),
 Document(id='b537bf57-9821-4b5d-a308-d8830b38e941', metadata={'source': 'programming_language.txt'}, page_content='Python is a programming language that lets you work quickly and integrate systems more effectively.'),
 Document(id='2238b5cc-200d-4f40-a16e-d621f045fa12', metadata={'source': 'sky.txt'}, page_content='The sky is blue and beautiful.')]

In [8]:
vectorstore.similarity_search_with_score("I love programming in Python")

[(Document(id='03c71d36-410f-4d7f-9468-43ad1760f73b', metadata={'source': 'love.txt'}, page_content='I love programming in Python'),
  2.1934026589367356e-13),
 (Document(id='41ed0b30-1c8f-4e0b-9b74-03d32285d10c', metadata={'source': 'hate.txt'}, page_content='I hate programming in Python'),
  0.2075600028038025),
 (Document(id='b537bf57-9821-4b5d-a308-d8830b38e941', metadata={'source': 'programming_language.txt'}, page_content='Python is a programming language that lets you work quickly and integrate systems more effectively.'),
  0.45398521423339844),
 (Document(id='2238b5cc-200d-4f40-a16e-d621f045fa12', metadata={'source': 'sky.txt'}, page_content='The sky is blue and beautiful.'),
  1.796344518661499)]

In [11]:
from typing import List
from langchain_core.documents import Document
from langchain_core.runnables import RunnableLambda
retriever=RunnableLambda(vectorstore.similarity_search).bind(k=1)
retriever.batch(["I love","I hate"])

[[Document(id='03c71d36-410f-4d7f-9468-43ad1760f73b', metadata={'source': 'love.txt'}, page_content='I love programming in Python')],
 [Document(id='41ed0b30-1c8f-4e0b-9b74-03d32285d10c', metadata={'source': 'hate.txt'}, page_content='I hate programming in Python')]]

In [12]:
retriever=vectorstore.as_retriever(
    search_type="similarity",
    search_kwargs={"k":1}
)

retriever.batch(["I love","I hate"])

[[Document(id='03c71d36-410f-4d7f-9468-43ad1760f73b', metadata={'source': 'love.txt'}, page_content='I love programming in Python')],
 [Document(id='41ed0b30-1c8f-4e0b-9b74-03d32285d10c', metadata={'source': 'hate.txt'}, page_content='I hate programming in Python')]]

In [16]:
from langchain_core.prompts import ChatPromptTemplate
from langchain_core.runnables import RunnablePassthrough
from urllib3 import response

message="""
Answer the following question based on the context provided.
{question}

Context:
{context}
"""

prompt=ChatPromptTemplate.from_messages([("human",message)])
rag_chain={
    "context":retriever,
    "question":RunnablePassthrough()
} | prompt | llm 

response=rag_chain.invoke("Tell me about the weather in Tokyo")
print(response.content)


I’m sorry, but the information you provided only mentions that “the sky is blue and beautiful.” It doesn’t contain any details about Tokyo’s weather, so I can’t give you a specific update on that. If you have a source that includes Tokyo’s weather data, feel free to share it and I’ll gladly help interpret it.
