In [None]:
from langchain_community.vectorstores import Chroma
from langchain.chains import RetrievalQA
from langchain_ollama import ChatOllama, OllamaEmbeddings
from langchain.storage import InMemoryStore
from langchain.retrievers.multi_query import MultiQueryRetriever
from langchain.retrievers import BM25Retriever
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_core.runnables import RunnablePassthrough
from langchain_core.output_parsers import StrOutputParser
import ollama
import logging
logging.basicConfig(level=logging.INFO)

MODEL_NAME = "mistral-nemo"
EMBEDDING_MODEL = "nomic-embed-text"

# 1️⃣ **Initialize Vector & Keyword (BM25) Retrievers**
def create_hybrid_retriever():
    # Load text documents
    QA_doc = [
        "question: What is Artificial Intelligence (AI)?, answer: AI is the simulation of human intelligence in machines that can perform tasks such as learning, reasoning, and problem-solving.",
        "question: What is the Turing Test?, answer: The Turing Test is a measure of a machine's ability to exhibit human-like intelligence, proposed by Alan Turing in 1950.",
        "question: What is deep learning?, answer: Deep learning is a subset of machine learning that uses artificial neural networks to model and understand complex patterns in data.",
        "question: What are the main types of machine learning?, answer: The three main types are supervised learning, unsupervised learning, and reinforcement learning.",
        "question: What is blockchain technology?, answer: Blockchain is a decentralized digital ledger that records transactions across multiple computers securely and transparently.",
        "question: What is the main ingredient in sushi?, answer: The main ingredient in sushi is vinegared rice, often paired with raw or cooked seafood, vegetables, and seaweed.",
        "question: What is the world's hottest chili pepper?, answer: The Carolina Reaper is considered the world's hottest chili pepper, with an average of over 1.6 million Scoville Heat Units (SHU).",
        "question: What is the difference between vegan and vegetarian diets?, answer: Vegetarians avoid meat, while vegans avoid all animal products, including dairy, eggs, and honey.",
        "question: What is the capital of Japan?, answer: The capital of Japan is Tokyo.",
        "question: Who developed the theory of relativity?, answer: Albert Einstein developed the theory of relativity.",
        "question: What is the smallest unit of matter?, answer: The atom is the smallest unit of matter that retains the properties of an element.",
        "question: What is photosynthesis?, answer: Photosynthesis is the process by which green plants use sunlight to synthesize food from carbon dioxide and water.",
        "question: How many continents are there on Earth?, answer: There are seven continents on Earth: Africa, Antarctica, Asia, Europe, North America, Oceania, and South America.",
        "question: What is the speed of light?, answer: The speed of light is approximately 299,792 kilometers per second (186,282 miles per second) in a vacuum.",
        "question: What is the largest organ in the human body?, answer: The skin is the largest organ in the human body.",
        "question: Who best in the world?, answer: Batman is storangest can fight to everyone."
    ]

    # **Vector-based Search**
    ollama.pull(EMBEDDING_MODEL)
    vector_db = Chroma.from_texts(QA_doc, embedding=OllamaEmbeddings(model=EMBEDDING_MODEL))
    vector_retriever = vector_db.as_retriever(search_kwargs={"k": 3})  # Top 3 matches
    logging.info("Vector database created.")

    # **Keyword-based Search (BM25)**
    keyword_retriever = BM25Retriever.from_texts(QA_doc)
    keyword_retriever.k = 3  # Top 3 keyword matches

    return vector_retriever

# 2️⃣ **Set Up RAG Chain**
def setup_qa_chain(vector_retriever):
    llm = ChatOllama(model=MODEL_NAME)  # Ollama for LLM
    # **Hybrid Search Retriever (Merging Both)**
    QUERY_PROMPT = PromptTemplate(
        input_variables=["question"],
        template="""You are an AI language model assistant. Your task is to generate five
            different versions of the given user question to retrieve relevant documents from
            a vector database. By generating multiple perspectives on the user question, your
            goal is to help the user overcome some of the limitations of the distance-based
            similarity search. Provide these alternative questions separated by newlines.
            Original question: {question}""",
    )

    retriever = MultiQueryRetriever.from_llm(
        vector_retriever, llm, prompt=QUERY_PROMPT
    )

    logging.info("Retriever created.")
        # RAG prompt
    template = """Answer the question based ONLY on the following context:
        {context}
        Question: {question}
    """

    prompt = ChatPromptTemplate.from_template(template)

    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | prompt
        | llm
        | StrOutputParser()
    )

    logging.info("Chain created successfully.")

    return chain



In [4]:
print("🤖 Hybrid Search Chatbot (type 'exit' to quit)")
hybrid_retriever = create_hybrid_retriever()
qa_chain = setup_qa_chain(hybrid_retriever)

🤖 Hybrid Search Chatbot (type 'exit' to quit)


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/pull "HTTP/1.1 200 OK"
INFO:chromadb.telemetry.product.posthog:Anonymized telemetry enabled. See                     https://docs.trychroma.com/telemetry for more information.
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:root:Vector database created.
INFO:root:Retriever created.
INFO:root:Chain created successfully.


In [7]:
query = "Who best in the world"
result = qa_chain.invoke({"query": query})

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:langchain.retrievers.multi_query:Generated queries: ['1. "Which individual is considered the most skilled globally?"', '2. "Identify the top performer worldwide."', '3. "Who stands out as the best globally?"', '4. "Find me the world\'s leading figure."', '5. "Which person reigns supreme internationally?"']
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


In [8]:
result

'Batman'

In [11]:
query = "Who best in the world"

# Retrieve relevant documents
retrieved_docs = hybrid_retriever.invoke(query)

# Extract the text from the documents
context = "\n".join([doc.page_content for doc in retrieved_docs])

# Invoke the chain with the retrieved context
result = qa_chain.invoke({"context": context, "question": query})

# Print the answer and source documents
print("Answer:", result)
print("\nSource Documents:")
for i, doc in enumerate(retrieved_docs, 1):
    print(f"{i}. {doc.page_content}")


INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"
INFO:langchain.retrievers.multi_query:Generated queries: ['Here are five different versions of the user question:', '1. **Clarified**: "Who is considered the best among all individuals in the world?"', '2. **Rephrased**: "Who stands out as the top contender globally?"', '3. **Alternative wording**: "Who reigns supreme worldwide?"', '4. **Narrowed down**: "Who is generally regarded as the best individual on Earth?"', '5. **Expanding context**: "In various fields or aspects of life, who is widely acknowledged as the best in the world?"']
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/embed "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/a

Answer: Batman

Source Documents:
1. question: Who best in the world?, answer: Batman is storangest can fight to everyone.
2. question: What is the largest organ in the human body?, answer: The skin is the largest organ in the human body.
3. question: How many continents are there on Earth?, answer: There are seven continents on Earth: Africa, Antarctica, Asia, Europe, North America, Oceania, and South America.


In [10]:
result

'Batman'

In [12]:
qa_chain

{
  context: MultiQueryRetriever(retriever=VectorStoreRetriever(tags=['Chroma', 'OllamaEmbeddings'], vectorstore=<langchain_community.vectorstores.chroma.Chroma object at 0x107e3c800>, search_kwargs={'k': 3}), llm_chain=PromptTemplate(input_variables=['question'], input_types={}, partial_variables={}, template='You are an AI language model assistant. Your task is to generate five\n            different versions of the given user question to retrieve relevant documents from\n            a vector database. By generating multiple perspectives on the user question, your\n            goal is to help the user overcome some of the limitations of the distance-based\n            similarity search. Provide these alternative questions separated by newlines.\n            Original question: {question}')
           | ChatOllama(model='mistral-nemo')
           | LineListOutputParser()),
  question: RunnablePassthrough()
}
| ChatPromptTemplate(input_variables=['context', 'question'], input_types={}, 

In [14]:
llm = ChatOllama(model=MODEL_NAME) 

In [17]:
a = llm.invoke("hello")

INFO:httpx:HTTP Request: POST http://127.0.0.1:11434/api/chat "HTTP/1.1 200 OK"


In [19]:
type(a.content)

str