In [None]:
!pip install google-generativeai langchain langchain_google_genai faiss-cpu langchain-text-splitters

Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.4-py3-none-any.whl.metadata (5.2 kB)
Collecting faiss-cpu
  Downloading faiss_cpu-1.11.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.8 kB)
Collecting filetype<2.0.0,>=1.2.0 (from langchain_google_genai)
  Downloading filetype-1.2.0-py2.py3-none-any.whl.metadata (6.5 kB)
INFO: pip is looking at multiple versions of langchain-google-genai to determine which version is compatible with other requirements. This could take a while.
Collecting langchain_google_genai
  Downloading langchain_google_genai-2.1.3-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.2-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.1-py3-none-any.whl.metadata (4.7 kB)
  Downloading langchain_google_genai-2.1.0-py3-none-any.whl.metadata (3.6 kB)
  Downloading langchain_google_genai-2.0.11-py3-none-any.whl.metadata (3.6 kB)
  Downloading langchain_google_genai-2.0.10-py3-none-any.whl.met

In [None]:
!pip install sentence-transformers langchain-community

Collecting langchain-community
  Downloading langchain_community-0.3.24-py3-none-any.whl.metadata (2.5 kB)
Collecting dataclasses-json<0.7,>=0.5.7 (from langchain-community)
  Downloading dataclasses_json-0.6.7-py3-none-any.whl.metadata (25 kB)
Collecting pydantic-settings<3.0.0,>=2.4.0 (from langchain-community)
  Downloading pydantic_settings-2.9.1-py3-none-any.whl.metadata (3.8 kB)
Collecting httpx-sse<1.0.0,>=0.4.0 (from langchain-community)
  Downloading httpx_sse-0.4.0-py3-none-any.whl.metadata (9.0 kB)
Collecting marshmallow<4.0.0,>=3.18.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading marshmallow-3.26.1-py3-none-any.whl.metadata (7.3 kB)
Collecting typing-inspect<1,>=0.4.0 (from dataclasses-json<0.7,>=0.5.7->langchain-community)
  Downloading typing_inspect-0.9.0-py3-none-any.whl.metadata (1.5 kB)
Collecting python-dotenv>=0.21.0 (from pydantic-settings<3.0.0,>=2.4.0->langchain-community)
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB

In [2]:
# from google.colab import drive
# drive.mount('/content/drive')
#optional if running on GDrive

In [None]:
import os
os.environ['GOOGLE_API_KEY'] = "AIzaSyABvQiCcX_uM_e_EyBkUG1wnEg6pa1sfow"

In [None]:
import json
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter # Good general purpose splitter

# --- 1. Load Knowledge Base ---
def load_knowledge_base(file_path="/content/drive/MyDrive/Trinity_AI/knowledge.json"):
    with open(file_path, 'r') as f:
        data = json.load(f)

    all_docs = []
    for category_key, items in data["knowledge_base"].items():
        for item in items:
            # Create a meaningful text content for each item
            # Combining title and content
            page_content = f"Title: {item.get('title', '')}\n"
            page_content += f"Category: {item.get('category', '')}\n"
            page_content += f"Content: {item.get('content', '')}\n"

            # Include other relevant fields if they exist
            if 'contact' in item: page_content += f"Contact: {item.get('contact')}\n"
            if 'office_hours' in item: page_content += f"Office Hours: {item.get('office_hours')}\n"
            if 'location' in item: page_content += f"Location: {item.get('location')}\n"
            if 'address' in item: page_content += f"Address: {item.get('address')}\n"
            if 'hours' in item: page_content += f"Hours: {item.get('hours')}\n"
            if 'phone' in item: page_content += f"Phone: {item.get('phone')}\n"
            if 'website' in item: page_content += f"Website: {item.get('website')}\n"
            if 'emergency' in item: page_content += f"Emergency: {item.get('emergency')}\n"

            metadata = {
                "id": item.get("id"),
                "title": item.get("title"),
                "category": item.get("category")
                # Add any other metadata you want to preserve and potentially filter on
            }
            all_docs.append(Document(page_content=page_content.strip(), metadata=metadata))
    return all_docs

print("Loading and processing knowledge base...")
documents = load_knowledge_base()
print(f"Loaded {len(documents)} documents.")
if documents:
    print("\nSample document:")
    print(f"Content: {documents[0].page_content[:300]}...") # Print first 300 chars
    print(f"Metadata: {documents[0].metadata}")
else:
    print("No documents loaded. Check your knowledge.json structure or path.")

# --- 2. Chunking (Optional but recommended for longer documents) ---
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,  # Max characters per chunk
    chunk_overlap=150 # Overlap between chunks
)
chunked_documents = text_splitter.split_documents(documents)
print(f"\nSplit into {len(chunked_documents)} chunks.")
if chunked_documents:
    print("\nSample chunk:")
    print(f"Content: {chunked_documents[0].page_content[:300]}...")
    print(f"Metadata: {chunked_documents[0].metadata}")


# --- 3. Embedding and Storing in FAISS ---
print("\nInitializing Hugging Face Embeddings...")

# Choose a model from Hugging Face Model Hub (https://huggingface.co/models?library=sentence-transformers)
# Some popular choices:
# - 'all-MiniLM-L6-v2': Very fast, good quality for its size.
# - 'all-mpnet-base-v2': Higher quality, larger model.
# - 'multi-qa-MiniLM-L6-cos-v1': Good for QA tasks.
model_name = "sentence-transformers/all-MiniLM-L6-v2" # A good default
# model_name = "sentence-transformers/all-mpnet-base-v2" # A larger, potentially better model

# You can specify the device to run on (e.g., 'cuda' for GPU, 'cpu' for CPU)
# If 'device' is not specified, it will try to use GPU if available, otherwise CPU.
model_kwargs = {'device': 'cpu'} # Or 'cuda' if you have a GPU and PyTorch with CUDA installed
encode_kwargs = {'normalize_embeddings': True} # Normalize embeddings for better similarity search with cosine

hf_embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs
)
print(f"Hugging Face embedding model '{model_name}' loaded.")

print("Creating FAISS vector store with Hugging Face embeddings (this might take a moment)...")
if chunked_documents:
    vector_store = FAISS.from_documents(chunked_documents, hf_embeddings) # Use hf_embeddings here
    print("FAISS vector store created successfully.")
    # vector_store.save_local("faiss_index_city_info_hf") # Save with a different name if you want
    # print("FAISS index saved.")
else:
    print("No documents to create vector store from. Exiting.")
    exit()


Loading and processing knowledge base...
Loaded 27 documents.

Sample document:
Content: Title: Building Permit Application Process
Category: permits
Content: To apply for a building permit in Metro City, you need to submit Form BP-101 along with architectural drawings, site plans, and structural calculations. The process takes 15-30 business days for review. Required documents include:...
Metadata: {'id': 'CS001', 'title': 'Building Permit Application Process', 'category': 'permits'}

Split into 27 chunks.

Sample chunk:
Content: Title: Building Permit Application Process
Category: permits
Content: To apply for a building permit in Metro City, you need to submit Form BP-101 along with architectural drawings, site plans, and structural calculations. The process takes 15-30 business days for review. Required documents include:...
Metadata: {'id': 'CS001', 'title': 'Building Permit Application Process', 'category': 'permits'}

Initializing Hugging Face Embeddings...


  hf_embeddings = HuggingFaceEmbeddings(
The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.5k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Hugging Face embedding model 'sentence-transformers/all-MiniLM-L6-v2' loaded.
Creating FAISS vector store with Hugging Face embeddings (this might take a moment)...
FAISS vector store created successfully.


In [None]:
from langchain_google_genai import ChatGoogleGenerativeAI
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# --- 4. Setup Retriever ---
# The retriever's job is to fetch relevant documents from the vector store
retriever = vector_store.as_retriever(
    search_type="similarity", # Other options: "mmr", "similarity_score_threshold"
    search_kwargs={"k": 3}    # Retrieve top 3 most similar documents
)
print("\nRetriever configured.")

# --- 5. Setup LLM for Generation ---
# Using a Gemini model for chat/Q&A
# gemini-1.5-flash-latest is fast and capable
llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash-latest",
    temperature=0.3, # Controls creativity. Lower is more factual.
)
print("Gemini LLM (gemini-1.5-flash-latest) initialized.")

# --- 6. Create a Prompt Template ---
# This template guides the LLM on how to use the retrieved context
prompt_template = """You are a Smart City Information Assistant for Metro City.
Your goal is to provide helpful, accurate, and concise answers to citizen's questions based ONLY on the provided context.
If the information is not in the context, say "I don't have information on that topic based on the provided documents." Do not make up answers.

Context:
{context}

Question: {question}

Helpful Answer:"""

PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# --- 7. Create the RAG Chain (RetrievalQA) ---
# This chain combines the retriever, LLM, and prompt
rag_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",  # "stuff" puts all retrieved docs into the context.
                         # Other types: "map_reduce", "refine", "map_rerank" for longer contexts
    retriever=retriever,
    chain_type_kwargs={"prompt": PROMPT},
    return_source_documents=True # Set to True to see which documents were retrieved
)
print("RAG chain created.")

# --- 8. Test the RAG Pipeline ---
def ask_assistant(query):
    print(f"\nQuery: {query}")
    try:
        result = rag_chain.invoke({"query": query}) # Use invoke for newer Langchain
        print(f"\nAssistant: {result['result']}")

        print("\nRetrieved Source Documents:")
        for i, doc in enumerate(result['source_documents']):
            print(f"--- Document {i+1} (ID: {doc.metadata.get('id', 'N/A')}, Title: {doc.metadata.get('title', 'N/A')}) ---")
            print(f"{doc.page_content[:350]}...\n") # Print a snippet of the source
        return result
    except Exception as e:
        print(f"Error during RAG chain execution: {e}")
        # More detailed error logging if needed
        return None

# Example Test Cases
print("\n--- TESTING RAG PIPELINE ---")
test_queries = [
    "How do I apply for a building permit?",
    "What are the library hours?",
    "When is garbage collection in Zone A?",
    "What's the process for starting a small business?",
    "What is the phone number for Animal Control?",
    "Are dogs allowed in Riverside Park?",
    "My power is out, who do I call?"
]

for q in test_queries:
    ask_assistant(q)
    print("------------------------------------")

# Test with a query where info might be missing
ask_assistant("What's the best pizza place in Metro City?")
print("------------------------------------")


Retriever configured.
Gemini LLM (gemini-1.5-flash-latest) initialized.
RAG chain created.

--- TESTING RAG PIPELINE ---

Query: How do I apply for a building permit?

Assistant: To apply for a building permit, submit Form BP-101, architectural drawings, site plans, and structural calculations.  You will also need a property deed, contractor license, engineering reports (for structures over 2 stories), and an environmental clearance certificate.  Fees range from $500-$5000 depending on project size.  Submit your application online through the city portal or in person at City Hall, Room 205.  A simplified fast-track process is available for residential projects under 1000 sq ft (7-10 days).  The standard review time is 15-30 business days.

Retrieved Source Documents:
--- Document 1 (ID: CS001, Title: Building Permit Application Process) ---
Title: Building Permit Application Process
Category: permits
Content: To apply for a building permit in Metro City, you need to submit Form BP-101