In [1]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [2]:
# Step 1: Force a clean re-installation of all required libraries
print("Uninstalling existing versions to avoid conflicts...")
!pip uninstall -y langchain langchain-community langchain-huggingface sentence-transformers faiss-cpu pydantic

print("\nInstalling fresh, compatible versions...")
!pip install --quiet langchain==0.2.5 langchain-community==0.2.5 langchain-huggingface==0.0.3 sentence-transformers==2.7.0 faiss-cpu==1.8.0 pydantic==2.7.4

print("\nInstallation complete. Please RESTART THE RUNTIME NOW.")

Uninstalling existing versions to avoid conflicts...
Found existing installation: langchain 0.2.5
Uninstalling langchain-0.2.5:
  Successfully uninstalled langchain-0.2.5
Found existing installation: langchain-community 0.2.5
Uninstalling langchain-community-0.2.5:
  Successfully uninstalled langchain-community-0.2.5
Found existing installation: langchain-huggingface 0.0.3
Uninstalling langchain-huggingface-0.0.3:
  Successfully uninstalled langchain-huggingface-0.0.3
Found existing installation: sentence-transformers 2.7.0
Uninstalling sentence-transformers-2.7.0:
  Successfully uninstalled sentence-transformers-2.7.0
Found existing installation: faiss-cpu 1.8.0
Uninstalling faiss-cpu-1.8.0:
  Successfully uninstalled faiss-cpu-1.8.0
Found existing installation: pydantic 2.7.4
Uninstalling pydantic-2.7.4:
  Successfully uninstalled pydantic-2.7.4

Installing fresh, compatible versions...
[31mERROR: pip's dependency resolver does not currently take into account all the packages that a

In [3]:
import json
import os
from google.colab import drive # Make sure to re-run your mount cell first!

# Define the path to your project folder in Google Drive
PROJECT_DIR = "/content/drive/My Drive/Agentic-RAG project/"

# Make sure the project directory exists
if not os.path.exists(PROJECT_DIR):
    print(f"ERROR: Project directory not found at {PROJECT_DIR}")
    print("Please make sure you have mounted your Google Drive and the path is correct.")
else:
    # Now, import the rest of the libraries
    from langchain_huggingface import HuggingFaceEmbeddings
    from langchain_community.vectorstores import FAISS

    # --- Load the processed data from Google Drive ---
    jsonl_path = os.path.join(PROJECT_DIR, "processed_chunks.jsonl")

    print(f"Loading processed chunks from: {jsonl_path}")
    chunks_with_metadata = []
    with open(jsonl_path, 'r', encoding='utf-8') as f:
        for line in f:
            chunks_with_metadata.append(json.loads(line))

    texts = [chunk['text'] for chunk in chunks_with_metadata]
    metadatas = [chunk['metadata'] for chunk in chunks_with_metadata]
    print(f"Loaded {len(texts)} text chunks.")

    # --- Initialize the Embedding Model ---
    model_name = "BAAI/bge-large-en-v1.5"
    print(f"Loading embedding model: {model_name}...")
    embeddings = HuggingFaceEmbeddings(
        model_name=model_name,
        model_kwargs={'device': 'cuda'},
        encode_kwargs={'normalize_embeddings': True}
    )
    print("Embedding model loaded successfully.")

    # --- Create the FAISS Vector Store ---
    print("Creating FAISS vector store... This may take a few minutes...")
    vectorstore = FAISS.from_texts(texts, embeddings, metadatas=metadatas)
    print("Vector store created successfully!")

    # --- Save the Vector Store to your Google Drive ---
    index_path = os.path.join(PROJECT_DIR, "faiss_index")
    vectorstore.save_local(index_path)
    print(f"Vector store saved to your Google Drive at: '{index_path}'")

    # --- Test the Retrieval System ---
    print("\n--- Running a sample retrieval test ---")
    query = "What are a consumer's rights to delete personal information under CCPA?"
    results = vectorstore.similarity_search_with_score(query, k=3)

    print(f"\nQuery: '{query}'\n")
    for i, (doc, score) in enumerate(results):
        print(f"--- Result {i+1} (Score: {score:.4f}) ---")
        print(f"Source: {doc.metadata.get('source_document', 'N/A')}")
        print(f"Heading: {doc.metadata.get('heading', 'N/A')}")
        print(f"Content: \n{doc.page_content}")
        print("-" * 20)

Loading processed chunks from: /content/drive/My Drive/Agentic-RAG project/processed_chunks.jsonl
Loaded 1376 text chunks.
Loading embedding model: BAAI/bge-large-en-v1.5...


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/52.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/779 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.34G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/366 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/125 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]

Embedding model loaded successfully.
Creating FAISS vector store... This may take a few minutes...
Vector store created successfully!
Vector store saved to your Google Drive at: '/content/drive/My Drive/Agentic-RAG project/faiss_index'

--- Running a sample retrieval test ---

Query: 'What are a consumer's rights to delete personal information under CCPA?'

--- Result 1 (Score: 0.4078) ---
Source: ccpa_statute.pdf
Heading: 1798.105. Consumers’ Right to Delete Personal Information
Content: 
(a) 
A consumer shall have the right to request that a business delete any personal 
information about the consumer which the business has collected from the consumer. 
(b) 
A business that collects personal information about consumers shall disclose, pursuant to 
Section 1798.130, the consumer’s rights to request the deletion of the consumer’s 
personal information. 
(c) 
(1) 
A business that receives a verifiable consumer request from a consumer to delete 
the consumer’s personal information pursua

In [4]:
# Install the library for BM25 keyword search
!pip install --quiet rank_bm25

In [5]:
import json
import os
# Imports from the last step
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
# New imports for Hybrid Search
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.docstore.document import Document

# --- Step 1: Define the path to your project folder ---
PROJECT_DIR = "/content/drive/My Drive/Agentic-RAG project/"
INDEX_PATH = os.path.join(PROJECT_DIR, "faiss_index")
JSONL_PATH = os.path.join(PROJECT_DIR, "processed_chunks.jsonl")

# --- Step 2: Load the saved FAISS index and the documents ---
print("Loading FAISS index and documents...")

# Load the embeddings model
model_name = "BAAI/bge-large-en-v1.5"
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cuda'},
    encode_kwargs={'normalize_embeddings': True}
)

# Load the FAISS index from your Google Drive
vectorstore = FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True)

# Load the raw documents from the JSONL file
chunks_with_metadata = []
with open(JSONL_PATH, 'r', encoding='utf-8') as f:
    for line in f:
        chunks_with_metadata.append(json.loads(line))

# We need to create LangChain Document objects for the BM25 retriever
# The FAISS retriever already has them, but BM25 needs them explicitly
documents = [
    Document(page_content=chunk['text'], metadata=chunk['metadata'])
    for chunk in chunks_with_metadata
]
print("Loading complete.")

# --- Step 3: Initialize the Retrievers ---
print("Initializing retrievers...")

# Initialize FAISS retriever
faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 5}) # Get top 5 semantic results

# Initialize BM25 retriever (keyword search)
bm25_retriever = BM25Retriever.from_documents(documents)
bm25_retriever.k = 5 # Get top 5 keyword results
print("Retrievers initialized.")

# --- Step 4: Initialize the Ensemble Retriever ---
# The EnsembleRetriever will combine results from both retrievers and re-rank them.
# We can adjust the weights to prioritize one retriever over the other.
ensemble_retriever = EnsembleRetriever(
    retrievers=[bm25_retriever, faiss_retriever],
    weights=[0.5, 0.5] # Give equal weight to both keyword and semantic search
)
print("Ensemble retriever created.")

# --- Step 5: Test the Hybrid Search ---
print("\n--- Running a HYBRID SEARCH retrieval test ---")
query = "What are a consumer's rights to delete personal information under CCPA?"

# The ensemble retriever automatically handles querying both and combining results
hybrid_results = ensemble_retriever.invoke(query)

print(f"\nQuery: '{query}'\n")
for i, doc in enumerate(hybrid_results):
    print(f"--- Result {i+1} ---")
    print(f"Source: {doc.metadata.get('source_document', 'N/A')}")
    print(f"Heading: {doc.metadata.get('heading', 'N/A')}")
    print(f"Content: \n{doc.page_content}")
    print("-" * 20)

Loading FAISS index and documents...
Loading complete.
Initializing retrievers...
Retrievers initialized.
Ensemble retriever created.

--- Running a HYBRID SEARCH retrieval test ---

Query: 'What are a consumer's rights to delete personal information under CCPA?'

--- Result 1 ---
Source: ccpa_statute.pdf
Heading: 1798.105. Consumers’ Right to Delete Personal Information
Content: 
(a) 
A consumer shall have the right to request that a business delete any personal 
information about the consumer which the business has collected from the consumer. 
(b) 
A business that collects personal information about consumers shall disclose, pursuant to 
Section 1798.130, the consumer’s rights to request the deletion of the consumer’s 
personal information. 
(c) 
(1) 
A business that receives a verifiable consumer request from a consumer to delete 
the consumer’s personal information pursuant to subdivision (a) of this section shall 
delete the consumer’s personal information from its records, notif

In [6]:
# The cross-encoder models are part of the sentence-transformers library
!pip install --quiet sentence-transformers

In [7]:
!pip install --quiet ragas datasets

[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m68.0/68.0 kB[0m [31m4.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m279.1/279.1 kB[0m [31m15.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m45.5/45.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m155.5/155.5 kB[0m [31m15.7 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m444.8/444.8 kB[0m [31m25.6 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m2.0/2.0 MB[0m [31m75.0 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m51.5/51.5 kB[0m [31m4.5 MB/s[0m eta [36m0:00:00[0m
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m8.0/8.0 MB[0m [31m53.3 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver doe

In [8]:
from sentence_transformers import CrossEncoder
import operator

# --- Step 1: Initialize the Cross-Encoder Model ---
# This is a small but powerful model trained specifically for ranking tasks.
# It will be downloaded from Hugging Face.
print("Loading Cross-Encoder model...")
cross_encoder = CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')
print("Model loaded.")

# --- Step 2: Prepare Document Pairs for Re-ranking ---
# The cross-encoder needs pairs of [query, document_text] to score.
query = "What are a consumer's rights to delete personal information under CCPA?"
# We use the results from our previous hybrid search run.
# Assuming 'hybrid_results' is still in memory from the last cell.
doc_texts = [doc.page_content for doc in hybrid_results]

# Create pairs of [query, document]
query_doc_pairs = [[query, doc_text] for doc_text in doc_texts]

# --- Step 3: Get New Relevance Scores ---
print("Calculating new relevance scores with the Cross-Encoder...")
new_scores = cross_encoder.predict(query_doc_pairs)

# --- Step 4: Combine Documents with New Scores and Sort ---
# Combine the original documents with their new scores
scored_docs = list(zip(new_scores, hybrid_results))

# Sort the documents by the new score in descending order
scored_docs.sort(key=operator.itemgetter(0), reverse=True)
print("Re-ranking complete!")

# --- Step 5: Display and Compare Results ---
print("\n--- ORIGINAL Hybrid Search Results (Top 4) ---")
for i, doc in enumerate(hybrid_results[:4]):
    print(f"{i+1}. Heading: {doc.metadata.get('heading', 'N/A')}")

print("\n--- RE-RANKED Cross-Encoder Results (Top 4) ---")
for i, (score, doc) in enumerate(scored_docs[:4]):
    print(f"{i+1}. [Score: {score:.4f}] Heading: {doc.metadata.get('heading', 'N/A')}")

print("\n--- Top Re-Ranked Document ---")
score, doc = scored_docs[0]
print(f"Source: {doc.metadata.get('source_document', 'N/A')}")
print(f"Heading: {doc.metadata.get('heading', 'N/A')}")
print(f"Content: \n{doc.page_content}")

Loading Cross-Encoder model...


config.json:   0%|          | 0.00/794 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/132 [00:00<?, ?B/s]

Model loaded.
Calculating new relevance scores with the Cross-Encoder...
Re-ranking complete!

--- ORIGINAL Hybrid Search Results (Top 4) ---
1. Heading: 1798.105. Consumers’ Right to Delete Personal Information
2. Heading: 1798.105. Consumers’ Right to Delete Personal Information
3. Heading: 1798.145. Exemptions
4. Heading: 1798.105. Consumers’ Right to Delete Personal Information

--- RE-RANKED Cross-Encoder Results (Top 4) ---
1. [Score: 7.2623] Heading: 1798.105. Consumers’ Right to Delete Personal Information
2. [Score: 5.2440] Heading: 1798.105. Consumers’ Right to Delete Personal Information
3. [Score: 5.0098] Heading: 1798.145. Exemptions
4. [Score: 4.5393] Heading: 1798.145. Exemptions

--- Top Re-Ranked Document ---
Source: ccpa_statute.pdf
Heading: 1798.105. Consumers’ Right to Delete Personal Information
Content: 
(a) 
A consumer shall have the right to request that a business delete any personal 
information about the consumer which the business has collected from the cons

In [10]:
# The full, correct installation command including the missing langchain-groq package
!pip install --quiet langchain==0.2.5 langchain-community==0.2.5 langchain-huggingface==0.0.3 sentence-transformers==2.7.0 faiss-cpu==1.8.0 pydantic==2.7.4 langchain-groq==0.1.5

[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/134.9 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m134.9/134.9 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25h[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
instructor 1.11.3 requires pydantic<3.0.0,>=2.8.0, but you have pydantic 2.7.4 which is incompatible.
thinc 8.3.6 requires numpy<3.0.0,>=2.0.0, but you have numpy 1.26.4 which is incompatible.
albumentations 2.0.8 requires pydantic>=2.9.2, but you have pydantic 2.7.4 which is incompatible.
mcp 1.13.1 requires pydantic<3.0.0,>=2.11.0, but you have pydantic 2.7.4 which is incompatible.[0m[31m
[0m

In [11]:
import os
import json
from google.colab import userdata

# LangChain imports
from langchain_groq import ChatGroq
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# Imports from our previous steps
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.docstore.document import Document

# --- Step 1: Set up API Key ---
try:
    GROQ_API_KEY = userdata.get('GROQ_API_KEY')
    os.environ["GROQ_API_KEY"] = GROQ_API_KEY
    print("GROQ_API_KEY loaded successfully.")
except Exception as e:
    print("Could not load GROQ_API_KEY. Please add it to Colab's secrets (key icon on the left).")

# --- Step 2: Re-create our Advanced Retriever from Phase 2 ---
print("Re-creating the advanced retriever...")

PROJECT_DIR = "/content/drive/My Drive/Agentic-RAG project/"
INDEX_PATH = os.path.join(PROJECT_DIR, "faiss_index")
JSONL_PATH = os.path.join(PROJECT_DIR, "processed_chunks.jsonl")

# Load documents
chunks_with_metadata = []
with open(JSONL_PATH, 'r', encoding='utf-8') as f:
    for line in f:
        chunks_with_metadata.append(json.loads(line))
documents = [Document(page_content=chunk['text'], metadata=chunk['metadata']) for chunk in chunks_with_metadata]

# Load embeddings model
embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", model_kwargs={'device': 'cuda'}, encode_kwargs={'normalize_embeddings': True})

# Load FAISS index
vectorstore = FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True)

# Create retrievers
faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
bm25_retriever = BM25Retriever.from_documents(documents)
bm25_retriever.k = 5
ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5])
print("Retriever re-created.")

# --- Step 3: Define the RAG Prompt Template ---
rag_template = """
Use the following context to answer the user's question. If you cannot answer the question from the context, say that you don't know. Do not make up information.

Context:
{context}

Question:
{question}

Answer:
"""

rag_prompt = PromptTemplate.from_template(rag_template)

# --- Step 4: Initialize the Groq LLM ---
llm = ChatGroq(
    # UPDATED: Changed to a current, active model on Groq
    model="llama-3.1-8b-instant",
    temperature=0  # Set to 0 for deterministic, fact-based answers
)

# --- Step 5: Create the Full RAG Chain using LCEL ---
def format_docs(docs):
    return "\n\n".join(f"Source: {os.path.basename(doc.metadata.get('source_document', ''))} - Heading: {doc.metadata.get('heading', '')}\nContent: {doc.page_content}" for doc in docs)

rag_chain = (
    {"context": ensemble_retriever | format_docs, "question": RunnablePassthrough()}
    | rag_prompt
    | llm
    | StrOutputParser()
)

# --- Step 6: Ask a Question! ---
print("\n--- Asking the RAG chain a question ---")
query = "What are a consumer's rights to delete personal information under CCPA?"
response = rag_chain.invoke(query)

print(f"Query: {query}\n")
print("Response:")
print(response)

GROQ_API_KEY loaded successfully.
Re-creating the advanced retriever...
Retriever re-created.

--- Asking the RAG chain a question ---
Query: What are a consumer's rights to delete personal information under CCPA?

Response:
A consumer shall have the right to request that a business delete any personal information about the consumer which the business has collected from the consumer. 

This right is specified in Section 1798.105 of the CCPA statute, which states that a consumer shall have the right to request that a business delete any personal information about the consumer which the business has collected from the consumer.


In [13]:
# Cell 1: Install all application dependencies
!pip install --quiet streamlit==1.36.0 \
langchain==0.2.5 \
langchain-community==0.2.5 \
langchain-huggingface==0.0.3 \
sentence-transformers==2.7.0 \
faiss-cpu==1.8.0 \
pydantic==2.7.4 \
langchain-groq==0.1.5 \
langgraph==0.0.69 \
openrouter-py==0.5.2 \
rank_bm25 \
python-dotenv

[31mERROR: Ignored the following versions that require a different python version: 0.55.2 Requires-Python <3.5[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement openrouter-py==0.5.2 (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for openrouter-py==0.5.2[0m[31m
[0m

In [14]:
%%writefile app.py

import streamlit as st
import os
import json
from typing import List, TypedDict

# --- API Key Setup ---
# This setup is designed to work in Colab by accessing secrets.
# For local development, it would use a .env file.
try:
    from google.colab import userdata
    GROQ_API_KEY = userdata.get('GROQ_API_KEY')
    OPENROUTER_API_KEY = userdata.get('OPENROUTER_API_KEY')
    os.environ["GROQ_API_KEY"] = GROQ_API_KEY
    os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
except (ImportError, KeyError):
    st.error("API keys not found in Colab secrets. Please add GROQ_API_KEY and OPENROUTER_API_KEY.")
    st.stop()


# --- LangChain & LangGraph Imports ---
from langchain_groq import ChatGroq
from langchain_community.chat_models import ChatOpenRouter
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.docstore.document import Document
from langgraph.graph import END, StateGraph

# --- Caching the Retriever ---
# This is a key step for performance. Streamlit's cache decorator
# ensures that this complex object is only loaded once.
@st.cache_resource(show_spinner="Loading Retriever and Embedding Model...")
def load_retriever():
    """
    Loads the retriever components and assembles the ensemble retriever.
    This function is cached to avoid reloading on every user interaction.
    """
    # Define paths - it will look inside the Colab environment for your mounted Drive
    PROJECT_DIR = "/content/drive/My Drive/Agentic-RAG project/"
    INDEX_PATH = os.path.join(PROJECT_DIR, "faiss_index")
    JSONL_PATH = os.path.join(PROJECT_DIR, "processed_chunks.jsonl")

    chunks = [json.loads(line) for line in open(JSONL_PATH, 'r', encoding='utf-8')]
    documents = [Document(page_content=chunk['text'], metadata=chunk['metadata']) for chunk in chunks]

    # Use CPU for embeddings in the deployed app to save GPU memory
    embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5", model_kwargs={'device': 'cpu'})
    vectorstore = FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True)

    faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    bm25_retriever = BM25Retriever.from_documents(documents)
    bm25_retriever.k = 5

    return EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5])

# --- LangGraph Agent Definition with TWO Models ---
class GraphState(TypedDict):
    question: str
    documents: List[Document]
    generation: str

def build_agentic_rag_app(retriever):
    """
    Builds the LangGraph agent application.
    """
    grader_llm = ChatGroq(model="llama-3.1-8b-instant", temperature=0)
    generator_llm = ChatOpenRouter(model_name="mistralai/mixtral-8x7b-instruct", temperature=0)

    def retrieve_documents(state):
        question = state["question"]
        documents = retriever.invoke(question)
        return {"documents": documents, "question": question}

    def grade_documents(state):
        question = state["question"]
        documents = state["documents"]
        prompt = PromptTemplate.from_template("Grade relevance of document to question. Score 'yes' or 'no'. JSON out: {{\"score\": \"yes\"}}. Document: {document}\nQuestion: {question}")
        grader = prompt | grader_llm | JsonOutputParser()
        filtered_docs = []
        for d in documents:
            try:
                score = grader.invoke({"question": question, "document": d.page_content})
                if score.get('score', 'no').lower() == "yes":
                    filtered_docs.append(d)
            except Exception:
                pass
        return {"documents": filtered_docs}

    def generate_answer(state):
        question = state["question"]
        documents = state["documents"]
        rag_template = "Answer the question based only on the context provided:\n\nContext:\n{context}\n\nQuestion:\n{question}"
        rag_prompt = PromptTemplate.from_template(rag_template)
        def format_docs(docs):
            return "\n\n".join(f"Source: {os.path.basename(doc.metadata.get('source_document', ''))}\nContent: {doc.page_content}" for doc in docs)
        rag_chain = {"context": format_docs, "question": lambda x: x} | rag_prompt | generator_llm | StrOutputParser()
        generation = rag_chain.invoke({"context": documents, "question": question})
        return {"generation": generation}

    def decide_to_generate(state):
        return "generate" if state["documents"] else "end"

    workflow = StateGraph(GraphState)
    workflow.add_node("retrieve", retrieve_documents)
    workflow.add_node("grade_documents", grade_documents)
    workflow.add_node("generate", generate_answer)
    workflow.set_entry_point("retrieve")
    workflow.add_edge("retrieve", "grade_documents")
    workflow.add_conditional_edges("grade_documents", decide_to_generate, {"generate": "generate", "end": END})
    workflow.add_edge("generate", END)
    return workflow.compile()

# --- Streamlit UI ---

def main():
    st.title("⚖️ Cogni-Compliance: Agentic RAG Chatbot")
    st.caption("Ask me questions about GDPR, HIPAA, or CCPA.")

    retriever = load_retriever()
    app = build_agentic_rag_app(retriever)

    if "messages" not in st.session_state:
        st.session_state.messages = []

    for message in st.session_state.messages:
        with st.chat_message(message["role"]):
            st.markdown(message["content"])

    if prompt := st.chat_input("What is your question?"):
        st.session_state.messages.append({"role": "user", "content": prompt})
        with st.chat_message("user"):
            st.markdown(prompt)

        with st.chat_message("assistant"):
            with st.spinner("Thinking..."):
                inputs = {"question": prompt}
                final_response = "I don't have enough information to answer that."
                for output in app.stream(inputs):
                    if "generation" in output:
                        final_response = output["generation"]
                st.markdown(final_response)
        st.session_state.messages.append({"role": "assistant", "content": final_response})

if __name__ == "__main__":
    main()

Writing app.py


In [22]:
# Master Installation Cell for the Entire Project (Corrected)
!pip install --quiet streamlit==1.36.0 \
langchain==0.2.5 \
langchain-community==0.2.5 \
langchain-huggingface==0.0.3 \
sentence-transformers==2.7.0 \
faiss-cpu==1.8.0 \
pydantic==2.7.4 \
langchain-groq==0.1.5 \
langgraph==0.0.69 \
langchain-openrouter==0.2.2 \
rank_bm25 \
python-dotenv \
ragas==0.1.9 \
datasets==2.19.2

[31mERROR: Ignored the following versions that require a different python version: 0.55.2 Requires-Python <3.5[0m[31m
[0m[31mERROR: Could not find a version that satisfies the requirement langchain-openrouter==0.2.2 (from versions: 0.0.1)[0m[31m
[0m[31mERROR: No matching distribution found for langchain-openrouter==0.2.2[0m[31m
[0m

In [23]:
import os
import json
from google.colab import userdata
from datasets import Dataset
from ragas import evaluate
from ragas.metrics import faithfulness, answer_relevancy, context_recall, context_precision
import pandas as pd

# LangChain Imports
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import FAISS
from langchain.retrievers import BM25Retriever, EnsembleRetriever
from langchain.docstore.document import Document
from langchain_openrouter import ChatOpenRouter
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough

# --- Step 1: Set up API Keys from Colab Secrets ---
try:
    OPENROUTER_API_KEY = userdata.get('OPENROUTER_API_KEY')
    os.environ["OPENROUTER_API_KEY"] = OPENROUTER_API_KEY
    print("API keys loaded successfully.")
except Exception as e:
    print("Could not load API keys. Please ensure OPENROUTER_API_KEY is in Colab's secrets.")


# --- Step 2: Define the Golden Dataset ---
golden_dataset = [
    {
        "question": "What is the timeframe for notifying users of a data breach under GDPR?",
        "ground_truth": "Under GDPR Article 33, a controller must notify the supervisory authority of a personal data breach without undue delay and, where feasible, not later than 72 hours after having become aware of it. If the breach is likely to result in a high risk to the rights and freedoms of individuals, the controller must also communicate the breach to the data subject without undue delay, as stated in Article 34."
    },
    {
        "question": "Does CCPA apply to employee data?",
        "ground_truth": "The CCPA has specific exemptions for personal information collected by a business about a natural person in the course of the natural person acting as a job applicant or employee. As stated in section 1798.145(m), this information is exempt to the extent it is collected and used by the business solely within the context of the person's role with the business."
    },
    {
        "question": "What are the rules for marketing under HIPAA?",
        "ground_truth": "Under HIPAA, marketing is a communication about a product or service that encourages recipients to purchase or use it. A covered entity must obtain an individual's authorization for any use or disclosure of protected health information for marketing purposes, with a few exceptions such as face-to-face communications or promotional gifts of nominal value."
    }
]

# --- Step 3: Build the RAG chain for evaluation ---
def get_rag_chain():
    PROJECT_DIR = "/content/drive/My Drive/Agentic-RAG project/"
    INDEX_PATH = os.path.join(PROJECT_DIR, "faiss_index")
    JSONL_PATH = os.path.join(PROJECT_DIR, "processed_chunks.jsonl")

    chunks = [json.loads(line) for line in open(JSONL_PATH, 'r', encoding='utf-8')]
    documents = [Document(page_content=chunk['text'], metadata=chunk['metadata']) for chunk in chunks]

    embeddings = HuggingFaceEmbeddings(model_name="BAAI/bge-large-en-v1.5")
    vectorstore = FAISS.load_local(INDEX_PATH, embeddings, allow_dangerous_deserialization=True)

    faiss_retriever = vectorstore.as_retriever(search_kwargs={"k": 5})
    bm25_retriever = BM25Retriever.from_documents(documents)
    bm25_retriever.k = 5
    retriever = EnsembleRetriever(retrievers=[bm25_retriever, faiss_retriever], weights=[0.5, 0.5])

    llm = ChatOpenRouter(model_name="mistralai/mixtral-8x7b-instruct")
    prompt = PromptTemplate.from_template("Answer the question based only on the context provided:\nContext: {context}\nQuestion: {question}")

    # RAGAS needs the 'answer' and 'contexts' keys in the output
    chain = (
        {"context": retriever, "question": RunnablePassthrough()}
        | {"documents": (lambda x: x["context"]), "question": (lambda x: x["question"])}
        | {"answer": prompt | llm | StrOutputParser(), "contexts": (lambda x: x["documents"])}
    )
    return chain

# --- Step 4: Run the Evaluation ---
rag_chain = get_rag_chain()

print("Generating answers for the golden dataset...")
generated_data = []
for item in golden_dataset:
    print(f"  - Answering question: '{item['question']}'")
    result = rag_chain.invoke(item['question'])
    generated_data.append({
        "question": item['question'],
        "ground_truth": item['ground_truth'],
        "answer": result['answer'],
        "contexts": [doc.page_content for doc in result['contexts']]
    })
print("Answer generation complete.")

# Convert to a Hugging Face Dataset for RAGAS
dataset = Dataset.from_list(generated_data)

# Define the RAGAS metrics
metrics = [faithfulness, answer_relevancy, context_precision, context_recall]

print("\nRunning RAGAS evaluation... This may take a moment...")
# is_async=False is often more stable in Colab environments
result = evaluate(dataset, metrics, is_async=False)

print("\n--- RAGAS Evaluation Results ---")
# Display results as a pandas DataFrame for better readability
df_results = result.to_pandas()
print(df_results)

ModuleNotFoundError: No module named 'langchain_openrouter'