Load the vector Dataset

In [1]:
# @title STEP 1: Install & Import (Run First)
!pip install chromadb
import chromadb



In [2]:
# @title STEP 2: Load Saved Vector Database
# This SKIPS all processing - loads your ready-to-use database
persistent_path = "./financial_policy_chromadb"
client = chromadb.PersistentClient(path=persistent_path)
collection = client.get_collection("financial_policy")

print(f"✅ Database loaded! {collection.count()} chunks ready")
print("🚀 Skip to chatbot phase directly!")

✅ Database loaded! 19 chunks ready
🚀 Skip to chatbot phase directly!


In [3]:
# @title Install LLM Libraries
!pip install transformers torch accelerate bitsandbytes



In [4]:
# @title Install Required Libraries (Run This First)
!pip install sentence-transformers transformers torch accelerate bitsandbytes



In [5]:
# @title Verify Installations
try:
    from sentence_transformers import SentenceTransformer
    from transformers import AutoTokenizer, AutoModelForCausalLM
    import torch
    print("✅ All libraries installed successfully!")
except ImportError as e:
    print(f"❌ Installation error: {e}")
    print("Please run the install cell again.")

✅ All libraries installed successfully!


In [6]:
# @title Load Your Existing Vector Database
import chromadb

# Load your pre-built vector database
persistent_path = "./financial_policy_chromadb"
client = chromadb.PersistentClient(path=persistent_path)
collection = client.get_collection("financial_policy")

print(f"✅ Vector database loaded!")
print(f"📊 Ready with {collection.count()} chunks")

✅ Vector database loaded!
📊 Ready with 19 chunks


In [7]:
# @title Use Simpler, Faster Model
from transformers import pipeline

# Use a much smaller and faster model
print("🧠 Loading faster model...")

try:
    # Try a smaller model first
    text_generator = pipeline(
        "text-generation",
        model="distilgpt2",  # Much smaller, faster model
        max_length=200,
        truncation=True,
        device=-1  # Use CPU to avoid GPU memory issues
    )
    print("✅ Model loaded successfully!")
except Exception as e:
    print(f"❌ Model loading failed: {e}")
    text_generator = None

🧠 Loading faster model...


Device set to use cpu


✅ Model loaded successfully!


In [9]:
# @title Reliable Answer Generator
def reliable_llm_answer(question, context):
    """Generate answers with better error handling"""
    
    if text_generator is None:
        # Fallback to rule-based answers if LLM fails
        return f"Based on the financial policy document: {context[:250]}..."
    
    prompt = f"""You are an expert in finance. Answer this financial question using only the context below.

Context: {context}

Question: {question}

Answer:"""
    
    try:
        response = text_generator(
            prompt,
            max_new_tokens=100,
            temperature=0.7,
            do_sample=True,
            truncation=True,
            pad_token_id=50256
        )
        
        answer = response[0]['generated_text'].split("Answer:")[-1].strip()
        return answer
        
    except Exception as e:
        print(f"LLM error: {e}")
        return f"According to the document: {context[:200]}..."

def get_reliable_response(question):
    """Get reliable response with error handling"""
    # Search vector database
    results = collection.query(
        query_texts=[question],
        n_results=2,
        include=['metadatas', 'documents']
    )
    
    if results and results['documents']:
        context = ""
        for doc, metadata in zip(results['documents'][0], results['metadatas'][0]):
            context += f"[Page {metadata['page_number']}]: {doc}\n"
        
        return reliable_llm_answer(question, context)
    else:
        return "I couldn't find information about that topic."

In [14]:
# @title Test Reliable Version
test_question = "What is budget?"

print(f"❓ Question: {test_question}")
print("=" * 50)

response = get_reliable_response(test_question)
print(f"💡 Answer:")
print(response)
print("=" * 50)

❓ Question: What is budget?
💡 Answer:
Budget is a fiscal instrument that is used to monitor the economic performance of the State and the Territory. In short, the Budget is a budget instrument that is used to monitor the economic performance of the State and the Territory. The Budget provides an aggregate surplus over four years. Short and long-term financial indicators are sound.
Figure 1: Fiscal instrument
Figure 2: Budget: budgetary instrument
Figure 3: Budget: budget
Figure 4: Budget: budget
Figure 5: Budget: budget
