In [None]:
import pickle
pickle_file_path = "/content/drive/MyDrive/chunks.pkl"
with open(pickle_file_path, "rb") as f:
    loaded_chunks = pickle.load(f)
print("Chunks loaded successfully.")


In [None]:
from langchain_huggingface import HuggingFaceEmbeddings

model_name = "sentence-transformers/all-mpnet-base-v2"

embeddings_model = HuggingFaceEmbeddings(model_name=model_name)

In [None]:
!pip install chromadb
!pip install langchain-chroma

In [None]:
from langchain_chroma import Chroma

# Initialize the database connection
# If database exist, it will connect with the collection_name and persist_directory
# Otherwise a new collection will be created
db = Chroma(collection_name="vector_database",
            embedding_function=embedding_model,
            persist_directory="/content/drive/MyDrive/chroma_db_")

# We can check the already existing values
print(len(db.get()["ids"]))

In [None]:
retriever = db.as_retriever(search_type="similarity", search_kwargs={"k": 3})

In [None]:
# Step 3: Initialize a Chat Prompt Template

from langchain_core.prompts import ChatPromptTemplate

PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
Answer the question based on the above context: {question}.
Provide a detailed answer.
Don’t justify your answers.
Don’t give information not mentioned in the CONTEXT INFORMATION.
Do not say "according to the context" or "mentioned in the context" or similar.
"""

prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)

In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-pro",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
    # other parameters as needed...
)

In [None]:
from langchain_core.output_parsers import StrOutputParser

parser = StrOutputParser()

In [None]:
# Step 6: Define a RAG Chain
from langchain_core.runnables import RunnablePassthrough

def format_docs(docs):
    return "\n\n".join(doc.page_content for doc in docs)

# The chain is defined as:
#   { "context": retriever | format_docs, "question": RunnablePassthrough() }
#   piped through the prompt template, then through the chat model (llm), then the output parser.
rag_chain = (
    {"context": retriever | format_docs, "question": RunnablePassthrough()}
    | prompt_template
    | llm
    | parser
)

In [None]:
query = 'Who is Rachem?'

rag_chain.invoke(query)