In [4]:
from langchain_community.embeddings import OllamaEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.prompts import ChatPromptTemplate, PromptTemplate
from langchain_community.chat_models import ChatOllama
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from langchain.retrievers.multi_query import MultiQueryRetriever

In [13]:
persist_directory = "models"
embedding_function = OllamaEmbeddings(model="nomic-embed-text", show_progress=True)

# Load the persisted vector store from the specified directory
vector_db = Chroma(
    persist_directory=persist_directory,  # Loading from the persistence directory
    embedding_function=embedding_function,
    collection_name="local-rag"
)

print(f"Vector database loaded from {persist_directory}")


Vector database loaded from models


In [14]:
# Load the language model
local_model = "mistral"
llm = ChatOllama(model=local_model)

# Create a prompt template for generating multiple query variations
QUERY_PROMPT = PromptTemplate(
    input_variables=["question"],
    template="""You are an AI language model assistant. Your task is to generate five
    different versions of the given user question to retrieve relevant documents from
    a vector database. By generating multiple perspectives on the user question, your
    goal is to help the user overcome some of the limitations of the distance-based
    similarity search. Provide these alternative questions separated by newlines.
    Original question: {question}"""
)

In [15]:

# Retrieve relevant chunks with embeddings applied only at this stage
retriever = vector_db.as_retriever(
    search_type="similarity",  # You can also use other search types such as "mmr"
    embedding=OllamaEmbeddings(model="nomic-embed-text", show_progress=True)
)

# Use multi-query retriever to generate multiple query variations
multi_retriever = MultiQueryRetriever.from_llm(
    retriever=retriever, 
    llm=llm,
    prompt=QUERY_PROMPT
)

# Define a prompt template for the final RAG-based answer generation
template = """Answer the question based ONLY on the following context:
{context}
Question: {question}
"""

In [16]:
# Create the final prompt and chain for the RAG system
prompt = ChatPromptTemplate.from_template(template)
chain = (
    {"context": multi_retriever, "question": RunnablePassthrough()}
    | prompt
    | llm
    | StrOutputParser()
)


In [17]:
chain.invoke("What safety precautions should i keep in mind?")

OllamaEmbeddings: 100%|██████████| 1/1 [00:04<00:00,  4.72s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.08s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.10s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.19s/it]
OllamaEmbeddings: 100%|██████████| 1/1 [00:02<00:00,  2.33s/it]


'1. For body-worn operation, the device holder should not contain metal and should provide at least the stated separation distance from the body. The mobile device may be transmitting even when you are not making a voice call.\n\n2. Always obey all local laws. Your first consideration while driving should be road safety; keep your hands free to operate the vehicle when driving.\n\n3. All wireless devices may be susceptible to interference, which could affect performance.\n\n4. When using the device, avoid touching electronic components while changing any covers. Store and use the device with any covers attached.\n\n5. The device may contain parts that are magnetic. Metallic materials may be attracted to the device, so do not place credit cards or other magnetic stripe cards near the device for extended periods of time, as the cards may be damaged.\n\n6. Switch the device off when mobile phone use is not allowed or when it may cause interference or danger, such as in aircraft, hospitals