# Persistent RAG System on Google Colab

This notebook implements a RAG system that **saves its knowledge base to Google Drive**.
This means you only have to process your PDFs/Text files once. Subsequent runs will load the existing database.

### **Features:**
1. **Google Drive Integration**: Mounts your drive to save data.
2. **Smart Loading**: Checks if a vector DB exists before creating a new one.
3. **API Server**: Exposes the chat API via ngrok for your Mobile App.

In [None]:
# --- 1. INSTALL DEPENDENCIES ---
!pip install -q -U requests==2.32.4 "opentelemetry-sdk<1.39.0" transformers accelerate bitsandbytes langchain langchain-community sentence-transformers chromadb pypdf unstructured networkx flask flask-cors pyngrok

In [None]:
# --- 2. IMPORTS ---
import os
import torch
from google.colab import drive
from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig, pipeline
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.document_loaders import DirectoryLoader, PyPDFLoader, TextLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain_community.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from flask import Flask, request, jsonify
from flask_cors import CORS
from pyngrok import ngrok
import threading

In [None]:
# --- 3. MOUNT DRIVE & CONFIG ---
drive.mount('/content/drive')

# Paths on Google Drive
DRIVE_ROOT = "/content/drive/MyDrive/rag_smart_parking"
DATA_PATH = os.path.join(DRIVE_ROOT, "documents")
DB_PATH = os.path.join(DRIVE_ROOT, "chroma_db")

# Ensure directories exist
os.makedirs(DATA_PATH, exist_ok=True)

print(f"📂 Working Directory: {DRIVE_ROOT}")
print(f"📄 Put your .txt/.pdf files here: {DATA_PATH}")

In [None]:
# --- 4. INITIALIZE EMBEDDINGS ---
EMBEDDING_MODEL_NAME = "sentence-transformers/all-MiniLM-L6-v2"
print("Loading Embedding Model...")
embeddings = HuggingFaceEmbeddings(
    model_name=EMBEDDING_MODEL_NAME,
    model_kwargs={'device': 'cuda'}
)

In [None]:
# --- 5. LOAD OR CREATE DATABASE ---
# Check if DB exists by looking for files in the DB_PATH
db_exists = os.path.exists(DB_PATH) and os.listdir(DB_PATH)

if db_exists:
    print("✅ Found existing Vector Database on Drive. Loading...")
    vector_store = Chroma(persist_directory=DB_PATH, embedding_function=embeddings)
    print("Database Loaded!")
else:
    print("⚠️ No existing database found. Processing new data...")
    
    # Check for documents
    if not any(fname.endswith(('.txt', '.pdf')) for fname in os.listdir(DATA_PATH)):
        print("Creating demo file since folder is empty...")
        with open(os.path.join(DATA_PATH, "demo_info.txt"), "w") as f:
            f.write("Smart Parking System Knowledge Base. Users can book spots via the app. Payment is handled via ABA PayWay.")
    
    # Load Docs
    documents = []
    documents.extend(DirectoryLoader(DATA_PATH, glob="**/*.txt", loader_cls=TextLoader).load())
    try:
        documents.extend(DirectoryLoader(DATA_PATH, glob="**/*.pdf", loader_cls=PyPDFLoader).load())
    except: pass
    
    print(f"Loaded {len(documents)} documents.")
    
    # Split
    text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=50)
    chunks = text_splitter.split_documents(documents)
    
    # Create DB
    print(f"Creating Embeddings for {len(chunks)} chunks...")
    vector_store = Chroma.from_documents(
        documents=chunks,
        embedding=embeddings,
        persist_directory=DB_PATH
    )
    print("✅ Database Created and Saved to Drive!")

In [None]:
# --- 6. LOAD LLM (Mistral/Zephyr) ---
MODEL_NAME = "HuggingFaceH4/zephyr-7b-beta"

bnb_config = BitsAndBytesConfig(
    load_in_4bit=True,
    bnb_4bit_use_double_quant=True,
    bnb_4bit_quant_type="nf4",
    bnb_4bit_compute_dtype=torch.bfloat16
)

# Path for Model Cache
MODEL_CACHE_PATH = os.path.join(DRIVE_ROOT, "models")
os.makedirs(MODEL_CACHE_PATH, exist_ok=True)

print(f"Loading {MODEL_NAME}...")
print(f"(First run will download to {MODEL_CACHE_PATH}, subsequent runs will be fast)")
tokenizer = AutoTokenizer.from_pretrained(MODEL_NAME, cache_dir=MODEL_CACHE_PATH)
model = AutoModelForCausalLM.from_pretrained(
    MODEL_NAME,
    quantization_config=bnb_config,
    device_map="auto",
    cache_dir=MODEL_CACHE_PATH
)

text_generation_pipeline = pipeline(
    model=model,
    tokenizer=tokenizer,
    task="text-generation",
    temperature=0.2,
    do_sample=True,
    repetition_penalty=1.1,
    return_full_text=False,
    max_new_tokens=256,
)

llm = HuggingFacePipeline(pipeline=text_generation_pipeline)

In [None]:
# --- 7. SETUP RETRIEVAL CHAIN ---
retriever = vector_store.as_retriever(search_kwargs={"k": 3})

prompt_template = """
<|system|>
You are a helpful AI assistant for the Smart Parking App. Answer strictly based on the context provided.
Context: {context}</s>
<|user|>
{question}</s>
<|assistant|>
"""

PROMPT = PromptTemplate(
    template=prompt_template,
    input_variables=["context", "question"]
)

qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=retriever,
    return_source_documents=True,
    chain_type_kwargs={"prompt": PROMPT}
)

In [None]:
# --- 8. START API SERVER ---
app = Flask(__name__)
CORS(app)

@app.route('/api/chat', methods=['POST'])
def chat_endpoint():
    data = request.json
    query = data.get('query', '')
    if not query: return jsonify({"error": "No query"}), 400
    
    try:
        result = qa_chain.invoke({"query": query})
        return jsonify({"answer": result['result']})
    except Exception as e:
        return jsonify({"error": str(e)}), 500

# !ngrok config add-authtoken <YOUR_TOKEN_HERE>
public_url = ngrok.connect(5000).public_url
print(f"\n\n🌟 YOUR PUBLIC URL: {public_url} 🌟\n\n")
app.run(port=5000)