In [1]:
!pip install langchain langchain-community chromadb




In [2]:
from langchain_core.documents import Document
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma


In [3]:
documents = [
    Document(
        page_content="The Ganga is the longest river in India and flows into the Bay of Bengal.",
        metadata={"river": "Ganga"}
    ),
    Document(
        page_content="The Yamuna is a major tributary of the Ganga and flows through Delhi.",
        metadata={"river": "Yamuna"}
    ),
    Document(
        page_content="The Brahmaputra flows through Assam and causes frequent floods.",
        metadata={"river": "Brahmaputra"}
    ),
    Document(
        page_content="The Godavari is the second longest river in India and flows in the south.",
        metadata={"river": "Godavari"}
    ),
    Document(
        page_content="The Narmada is a west-flowing river that empties into the Arabian Sea.",
        metadata={"river": "Narmada"}
    ),
]


In [7]:
embeddings = OllamaEmbeddings(model="gemma:2b")
embeddings

OllamaEmbeddings(base_url='http://localhost:11434', model='gemma:2b', embed_instruction='passage: ', query_instruction='query: ', mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None, show_progress=False, headers=None, model_kwargs=None)

In [8]:
vectorstore = Chroma.from_documents(
    documents=documents,
    embedding=embeddings
)

vectorstore

<langchain_community.vectorstores.chroma.Chroma at 0x229bbf06ad0>

In [9]:
# create a mmr retriever
retriever = vectorstore.as_retriever(
    search_type="mmr",
    search_kwargs={
        "k": 3,          # final results
        "fetch_k": 5,    # candidates before MMR
        "lambda_mult": 0.5  # diversity vs relevance
    }
)


In [10]:
query = "Important rivers in North India"

results = retriever.invoke(query)

for i, doc in enumerate(results, 1):
    print(f"\nResult {i}:")
    print(doc.page_content)



Result 1:
The Godavari is the second longest river in India and flows in the south.

Result 2:
The Yamuna is a major tributary of the Ganga and flows through Delhi.

Result 3:
The Brahmaputra flows through Assam and causes frequent floods.
