In [9]:
cd /content/drive/MyDrive/Hybrid_RAG/Hybrid_RAG

/content/drive/MyDrive/Hybrid_RAG/Hybrid_RAG


In [18]:
!pip install -r requirements.txt

Collecting transformers==4.50.1 (from -r requirements.txt (line 12))
  Using cached transformers-4.50.1-py3-none-any.whl.metadata (39 kB)
Using cached transformers-4.50.1-py3-none-any.whl (10.2 MB)
Installing collected packages: transformers
  Attempting uninstall: transformers
    Found existing installation: transformers 4.52.4
    Uninstalling transformers-4.52.4:
      Successfully uninstalled transformers-4.52.4
Successfully installed transformers-4.50.1


In [11]:
import os
from dotenv import load_dotenv
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_groq import ChatGroq
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.chains import create_retrieval_chain
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.chains import LLMChain
from langchain_core.output_parsers import StrOutputParser
from langchain.prompts import PromptTemplate
from langchain_core.prompts import MessagesPlaceholder
from langchain.chains import create_history_aware_retriever
from langchain_core.messages import HumanMessage, AIMessage
from sentence_transformers import CrossEncoder
from langchain.retrievers.document_compressors import CrossEncoderReranker
from typing import List
from langchain.retrievers import ContextualCompressionRetriever, EnsembleRetriever
from langchain_core.documents import Document
from langchain.retrievers import BM25Retriever
from langchain_community.cross_encoders import HuggingFaceCrossEncoder


ModuleNotFoundError: No module named 'dotenv'

In [None]:
os.environ['GROQ_API_KEY'] = "---------------------------------------"

In [None]:
# Initialize the LLM
llm = ChatGroq(
    model = "llama3-70b-8192",
    temperature=0
    )

In [None]:
# Function to load documents from a folder
def load_documents(folder_path: str) -> List[Document]:
    documents = []
    for filename in os.listdir(folder_path):
        file_path = os.path.join(folder_path, filename)
        if filename.endswith('.pdf'):
            loader = PyPDFLoader(file_path)
        elif filename.endswith('.docx'):
            loader = Docx2txtLoader(file_path)
        else:
            print(f"Unsupported file type: {filename}")
            continue
        documents.extend(loader.load())
    return documents

In [None]:
# Load documents from a specified folder
folder_path = "/content/drive/MyDrive/Hybrid_RAG/books"
documents = load_documents(folder_path)
print(f"Loaded {len(documents)} documents from the folder.")

In [None]:
# Split documents into smaller chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)

splits = text_splitter.split_documents(documents)
print(f"Split the documents into {len(splits)} chunks.")

In [None]:
# Initialize HuggingFace embeddings
embedding_model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=embedding_model_name)

# Create a FAISS vector store
vectorstore = FAISS.from_documents(splits, embeddings)

print(f"Number of documents loaded and chunked: {len(splits)}")

In [None]:
#BM25 Retriever (Term-based)
bm25_retriever = BM25Retriever.from_documents(splits)
bm25_retriever.k = 10 # Number of documents to retrieve for BM25

#Vector Store Retriever (Vector-based)
vectorstore_retriever = vectorstore.as_retriever(search_kwargs={"k": 10}) # Number of documents to retrieve for Vector Search

# Create an EnsembleRetriever to combine results
hybrid_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, vectorstore_retriever],
    weights=[0.5, 0.5]
)

print("Hybrid retriever initialized.")

In [None]:
cross_encoder_model_name = "cross-encoder/ms-marco-MiniLM-L-6-v2"

cross_encoder = HuggingFaceCrossEncoder(
    model_name=cross_encoder_model_name,
    model_kwargs={'device': 'cpu'} # Use 'cuda' if you have a GPU
)

# Create the CrossEncoderReranker compressor
reranker = CrossEncoderReranker(model=cross_encoder, top_n=3)

# Wrap your hybrid retriever with ContextualCompressionRetriever
compression_retriever = ContextualCompressionRetriever(
    base_compressor=reranker,
    base_retriever=hybrid_retriever
)

print("Reranker and compression retriever initialized.")


In [None]:
prompt = ChatPromptTemplate.from_template("""
Answer the question based on the following context.
If the answer is not in the context, say "I don't have enough information to answer this question."

Context:
{context}

Question:
{question}
""")


def pretty_print_docs(docs):
    print("\n--- Retrieved Documents (Reranked) ---")
    for i, doc in enumerate(docs):
        print(f"\nDocument {i+1}:")
        print(f"Content: {doc.page_content}") # Print first 200 chars
        if "relevance_score" in doc.metadata:
            print(f"Relevance Score: {doc.metadata['relevance_score']:.4f}")
        print(f"Metadata: {doc.metadata['title']}")
    print("\n-------------------------------------")


In [None]:
query = "chunking strategies"

In [None]:
retrieved_docs = compression_retriever.invoke(query)
pretty_print_docs(retrieved_docs)

In [None]:
def create_final_chain(reranking_retriever):
    """
    Creates the final, history-aware RAG chain.
    """
    # 3.1: Query Reformulation Chain
    # This chain takes the user's question and chat history, then creates a
    # new, standalone question to send to the retriever.
    contextualize_q_system_prompt = (
        "Given a chat history and the latest user question "
        "which might reference context in the chat history, "
        "formulate a standalone question which can be understood "
        "without the chat history. Do NOT answer the question, "
        "just reformulate it if needed and otherwise return it as is."
    )

    contextualize_q_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", contextualize_q_system_prompt),
            ("placeholder", "{chat_history}"),
            ("human", "{input}"),
        ]
    )

    history_aware_retriever = create_history_aware_retriever(
        llm, reranking_retriever, contextualize_q_prompt
    )

    # 3.2: Answering Chain
    # This chain takes the reformulated query and the retrieved documents
    # to generate the final answer.
    qa_system_prompt = (
        "You are an assistant for question-answering tasks. "
        "Use the following pieces of retrieved context to answer the question. "
        "If you don't know the answer, just say that you don't know. "
        "Use three sentences maximum and keep the answer concise."
        "\n\n"
        "{context}"
    )

    qa_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", qa_system_prompt),
            ("placeholder", "{chat_history}"),
            ("human", "{input}"),
        ]
    )

    # This chain combines documents into a single string ("stuffing").
    question_answer_chain = create_stuff_documents_chain(llm, qa_prompt)

    # 3.3: Final Retrieval Chain
    # This orchestrates the entire process:
    # 1. Calls the history_aware_retriever (which reformulates and retrieves).
    # 2. Passes the retrieved documents and query to the question_answer_chain.
    rag_chain = create_retrieval_chain(history_aware_retriever, question_answer_chain)

    print("Full RAG chain created successfully.")
    return rag_chain

In [None]:
rag_chain = create_final_chain(compression_retriever)
# 4. Start the conversation
chat_history = []

print("\n--- Chatbot is ready! Type 'exit' to end the conversation. ---")
while True:
    query = input("\nYou: ")
    if query.lower() == 'exit':
        break

    # Invoke the chain
    result = rag_chain.invoke({"input": query, "chat_history": chat_history})

    # Print the answer
    print(f"AI: {result['answer']}")

    # Update chat history
    chat_history.append(HumanMessage(content=query))
    chat_history.append(AIMessage(content=result['answer']))

In [13]:
print("Tunnel Password:")
!wget -q -O - ipv4.icanhazip.com

Tunnel Password:
34.16.153.187


In [14]:
!npm install localtunnel

[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K⠇[1G[0K⠏[1G[0K⠋[1G[0K⠙[1G[0K⠹[1G[0K⠸[1G[0K⠼[1G[0K⠴[1G[0K⠦[1G[0K⠧[1G[0K
added 22 packages in 4s
[1G[0K⠧[1G[0K
[1G[0K⠧[1G[0K3 packages are looking for funding
[1G[0K⠧[1G[0K  run `npm fund` for details
[1G[0K⠧[1G[0K

In [None]:
! streamlit run app.py & npx localtunnel --port 8501

[1G[0K⠙
Collecting usage statistics. To deactivate, set browser.gatherUsageStats to false.
[0m
[1G[0K[0m
[34m[1m  You can now view your Streamlit app in your browser.[0m
[0m
[34m  Local URL: [0m[1mhttp://localhost:8501[0m
[34m  Network URL: [0m[1mhttp://172.28.0.12:8501[0m
[34m  External URL: [0m[1mhttp://34.16.153.187:8501[0m
[0m
your url is: https://sour-parts-repair.loca.lt
  embedding_model = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
2025-06-28 18:03:55.008695: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:477] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
E0000 00:00:1751133835.032139    4638 cuda_dnn.cc:8310] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
E0000 00:00:1751133835.039209    4638 cuda_blas.cc:1418] Unable to register cuBLAS factory: Attempting to register fa