<a href="https://colab.research.google.com/github/Bandinaresh01/sepecific_domain_chat_bot/blob/main/projectmini.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
!pip install flask google-generativeai faiss-cpu sentence-transformers python-dotenv

Collecting faiss-cpu
  Downloading faiss_cpu-1.10.0-cp311-cp311-manylinux_2_28_x86_64.whl.metadata (4.4 kB)
Collecting python-dotenv
  Downloading python_dotenv-1.1.0-py3-none-any.whl.metadata (24 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch>=1.11.0->sentence-transformers)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch>=1.11.0->

In [4]:
# ======================
# 📚 ENHANCED DOCUMENT Q&A SYSTEM
# ======================

# @title 🛠️ SETUP (Run this first!)
!pip install -q google-generativeai faiss-cpu sentence-transformers ipywidgets
print("✅ Packages installed successfully!")

# @title 🔑 STEP 1: Configure API
import google.generativeai as genai
import ipywidgets as widgets
from IPython.display import display, clear_output, HTML
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import numpy as np
import os

# Your configuration
genai.configure(api_key="AIzaSyBNOxE0ia-roj-IfrFWyPQHE1T6-PSI37M")
VECTOR_DB_PATH = "/content/drive/MyDrive/Educational_PDF/vectorstore"

# @title 🖥️ STEP 2: Create Interactive Interface
# Initialize models
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
gemini_model = genai.GenerativeModel('gemini-1.5-flash')

# UI Elements
subject_dropdown = widgets.Dropdown(
    options=[f for f in os.listdir(VECTOR_DB_PATH)
             if f.endswith(('.faiss','.index')) and
             os.path.exists(f"{VECTOR_DB_PATH}/{f.split('.')[0]}_texts.pkl")],
    description='Subject:',
    style={'description_width': 'initial'},
    layout=widgets.Layout(width='500px')
)

question_input = widgets.Textarea(
    placeholder='Type your question here...',
    layout=widgets.Layout(width='100%', height='100px')
)

submit_button = widgets.Button(
    description='Ask Question',
    button_style='primary',
    layout=widgets.Layout(width='150px')
)

output_area = widgets.Output()

# Style for UI
style = """
<style>
.qa-card {
    border: 1px solid #e0e0e0;
    border-radius: 10px;
    padding: 15px;
    margin: 10px 0;
    background: white;
    box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.question {
    font-weight: bold;
    color: #202124;
    font-size: 1.1em;
    margin-bottom: 8px;
}
.answer {
    color: #1a73e8;
    margin: 10px 0;
}
.context {
    color: #5f6368;
    font-size: 0.9em;
    border-top: 1px dashed #ddd;
    padding-top: 10px;
    margin-top: 10px;
}
</style>
"""

# Display the UI
display(HTML(style))
display(widgets.VBox([
    widgets.HTML("<h2 style='color:#202124;'>📚 Document Q&A System</h2>"),
    subject_dropdown,
    widgets.HTML("<h3 style='margin-top:20px;'>Ask a Question</h3>"),
    question_input,
    submit_button,
    output_area
]))

# @title 🚀 STEP 3: Question Answering Logic
def load_database(subject):
    base_name = subject.split('.')[0]
    try:
        faiss_index = faiss.read_index(f"{VECTOR_DB_PATH}/{subject}")
        with open(f"{VECTOR_DB_PATH}/{base_name}_texts.pkl", "rb") as f:
            text_chunks = pickle.load(f)
        return faiss_index, text_chunks
    except Exception as e:
        print(f"❌ Error loading {subject}: {str(e)}")
        return None, None

def on_submit_button_clicked(b):
    with output_area:
        clear_output()
        subject = subject_dropdown.value
        question = question_input.value.strip()

        if not question:
            print("⚠️ Please enter a question")
            return

        print("🔍 Searching for answer...")

        # Load database
        faiss_index, text_chunks = load_database(subject)
        if not faiss_index:
            return

        try:
            # Search documents
            query_vec = embedding_model.encode(question)
            _, indices = faiss_index.search(np.array([query_vec]), k=3)
            context = "\n\n".join([text_chunks[i] for i in indices[0] if i < len(text_chunks)])

            # Generate answer
            prompt = f"""Answer using ONLY this context:
            {context}

            Question: {question}

            Rules:
            1. Be concise (1-2 sentences)
            2. If unsure, say "Not covered in materials"
            3. Never invent information

            Answer:"""

            answer = gemini_model.generate_content(prompt).text

            # Display results
            display(HTML(f"""
            <div class='qa-card'>
                <div class='question'>❓ {question}</div>
                <div class='answer'>🔍 {answer}</div>
                <details>
                    <summary>View supporting text</summary>
                    <div class='context'>{context[:300]}{'...' if len(context)>300 else ''}</div>
                </details>
            </div>
            """))

        except Exception as e:
            display(HTML(f"<div style='color:red;'>⚠️ Error: {str(e)}</div>"))

        question_input.value = ''  # Clear question box

submit_button.on_click(on_submit_button_clicked)
print("✅ System ready! Select a subject and ask questions.")

✅ Packages installed successfully!


VBox(children=(HTML(value="<h2 style='color:#202124;'>📚 Document Q&A System</h2>"), Dropdown(description='Subj…

✅ System ready! Select a subject and ask questions.


In [5]:
# @title 📦 Install Packages
!pip install -q google-generativeai faiss-cpu sentence-transformers ipywidgets
print("✅ Packages installed!")

✅ Packages installed!


In [11]:
from google.colab import drive
drive.mount('/content/drive')

# Create the directory if it doesn't exist
!mkdir -p "/content/drive/MyDrive/Educational_PDF/vectorstore"

# Now upload your files to this folder using Colab's file browser

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [15]:
from google.colab import drive
drive.mount('/content/drive')

# Create directory (if it doesn't exist)
!mkdir -p "/content/drive/MyDrive/Educational_PDF1"

# Now upload files using Colab's file browser

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


In [16]:
# @title 🔧 Create New FAISS Databases
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import os

# Create directory if needed
os.makedirs("/content/drive/MyDrive/Educational_PDF1", exist_ok=True)

# Sample documents - REPLACE THESE WITH YOUR ACTUAL CONTENT
big_data_docs = [
    "Big data refers to extremely large datasets...",
    "Hadoop is a framework for distributed processing...",
    "Spark provides faster cluster computing...",
]

computer_vision_docs = [
    "Computer vision enables computers to interpret images...",
    "Convolutional Neural Networks (CNNs) are commonly used...",
    "OpenCV is a popular computer vision library...",
]

# Initialize model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def create_database(docs, subject_name):
    """Create and save FAISS database for a subject"""
    # Generate embeddings
    embeddings = model.encode(docs)

    # Create FAISS index
    dimension = embeddings.shape[1]
    index = faiss.IndexFlatL2(dimension)
    index.add(np.array(embeddings))

    # Save files
    faiss.write_index(index, f"/content/drive/MyDrive/Educational_PDF1/{subject_name}_faiss.index")
    with open(f"/content/drive/MyDrive/Educational_PDF1/{subject_name}_texts.pkl", "wb") as f:
        pickle.dump(docs, f)
    print(f"✅ Created {subject_name} database at:")
    print(f"/content/drive/MyDrive/Educational_PDF1/{subject_name}_faiss.index")
    print(f"/content/drive/MyDrive/Educational_PDF1/{subject_name}_texts.pkl")

# Create databases
create_database(big_data_docs, "big_data")
create_database(computer_vision_docs, "computer_vision")

✅ Created big_data database at:
/content/drive/MyDrive/Educational_PDF1/big_data_faiss.index
/content/drive/MyDrive/Educational_PDF1/big_data_texts.pkl
✅ Created computer_vision database at:
/content/drive/MyDrive/Educational_PDF1/computer_vision_faiss.index
/content/drive/MyDrive/Educational_PDF1/computer_vision_texts.pkl


In [17]:
# @title 🔍 Verify Files Now Exist
import os

required_files = [
    "big_data_faiss.index",
    "big_data_texts.pkl",
    "computer_vision_faiss.index",
    "computer_vision_texts.pkl"
]

print("Final verification:")
all_exist = True
for file in required_files:
    path = f"/content/drive/MyDrive/Educational_PDF1/{file}"
    exists = os.path.exists(path)
    print(f"  {'✅' if exists else '❌'} {file.ljust(25)}")
    if not exists:
        all_exist = False

if all_exist:
    print("\n🎉 All files are ready! You can now run your Q&A system.")
else:
    print("\n❌ Still missing files. Please try uploading again.")

Final verification:
  ✅ big_data_faiss.index     
  ✅ big_data_texts.pkl       
  ✅ computer_vision_faiss.index
  ✅ computer_vision_texts.pkl

🎉 All files are ready! You can now run your Q&A system.


In [18]:
# @title 🛠️ Create New FAISS Databases
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np
import pickle
import os

# 1. Create directory if needed
os.makedirs("/content/drive/MyDrive/Educational_PDF1", exist_ok=True)

# 2. Add YOUR documents here (replace these examples)
BIG_DATA_DOCS = [
    "Big data involves analyzing large datasets that are too complex for traditional systems",
    "Hadoop is an open-source framework for distributed storage and processing of big data",
    "Spark provides faster big data processing through in-memory computation",
    "MapReduce is a programming model for processing large datasets in parallel"
]

COMPUTER_VISION_DOCS = [
    "Computer vision enables computers to interpret and understand visual information",
    "Convolutional Neural Networks (CNNs) are deep learning models for image processing",
    "OpenCV is a popular library for real-time computer vision applications",
    "Image segmentation partitions an image into multiple meaningful regions"
]

# 3. Initialize embedding model
model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")

def create_database(documents, subject_name):
    print(f"\n🔧 Creating {subject_name} database...")

    # Generate embeddings
    embeddings = model.encode(documents)

    # Create FAISS index
    index = faiss.IndexFlatL2(embeddings.shape[1])
    index.add(np.array(embeddings))

    # Save files
    faiss.write_index(index, f"/content/drive/MyDrive/Educational_PDF1/{subject_name}_faiss.index")
    with open(f"/content/drive/MyDrive/Educational_PDF1/{subject_name}_texts.pkl", "wb") as f:
        pickle.dump(documents, f)

    print(f"✅ Saved {subject_name}_faiss.index")
    print(f"✅ Saved {subject_name}_texts.pkl")

# 4. Create both databases
create_database(BIG_DATA_DOCS, "big_data")
create_database(COMPUTER_VISION_DOCS, "computer_vision")

print("\n🎉 Both databases created successfully!")


🔧 Creating big_data database...
✅ Saved big_data_faiss.index
✅ Saved big_data_texts.pkl

🔧 Creating computer_vision database...
✅ Saved computer_vision_faiss.index
✅ Saved computer_vision_texts.pkl

🎉 Both databases created successfully!


In [19]:
# @title ✅ Verify Database Creation
import os

files_to_check = [
    ("big_data_faiss.index", "Big Data Vector Index"),
    ("big_data_texts.pkl", "Big Data Texts"),
    ("computer_vision_faiss.index", "Computer Vision Vector Index"),
    ("computer_vision_texts.pkl", "Computer Vision Texts")
]

print("Verifying files in /content/drive/MyDrive/Educational_PDF1/:")
all_ok = True

for file, description in files_to_check:
    path = f"/content/drive/MyDrive/Educational_PDF1/{file}"
    exists = os.path.exists(path)
    print(f"{'✅' if exists else '❌'} {description.ljust(25)} - {file}")
    if not exists:
        all_ok = False

if all_ok:
    print("\n🌟 All files are ready! Proceed to Q&A system.")
else:
    print("\n❌ Some files are missing. Re-run the creation cell.")

Verifying files in /content/drive/MyDrive/Educational_PDF1/:
✅ Big Data Vector Index     - big_data_faiss.index
✅ Big Data Texts            - big_data_texts.pkl
✅ Computer Vision Vector Index - computer_vision_faiss.index
✅ Computer Vision Texts     - computer_vision_texts.pkl

🌟 All files are ready! Proceed to Q&A system.


In [22]:
# @title 🌐 Interactive Q&A Interface
from IPython.display import display, HTML
import ipywidgets as widgets

# Custom CSS styling
css = """
<style>
.qa-container {
    font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
    max-width: 800px;
    margin: 20px auto;
    padding: 20px;
    border-radius: 10px;
    background: #f9f9f9;
    box-shadow: 0 4px 8px rgba(0,0,0,0.1);
}

.header {
    color: #2c3e50;
    text-align: center;
    margin-bottom: 25px;
    border-bottom: 2px solid #3498db;
    padding-bottom: 10px;
}

.subject-selector {
    background: #ecf0f1;
    padding: 15px;
    border-radius: 8px;
    margin-bottom: 20px;
}

.question-box {
    width: 100%;
    padding: 12px;
    border: 2px solid #bdc3c7;
    border-radius: 6px;
    font-size: 16px;
    margin-bottom: 15px;
    transition: border 0.3s;
}

.question-box:focus {
    border-color: #3498db;
    outline: none;
}

.submit-btn {
    background: #3498db;
    color: white;
    border: none;
    padding: 12px 25px;
    border-radius: 6px;
    cursor: pointer;
    font-size: 16px;
    transition: background 0.3s;
}

.submit-btn:hover {
    background: #2980b9;
}

.answer-card {
    background: white;
    border-left: 4px solid #3498db;
    padding: 15px;
    margin-top: 20px;
    border-radius: 6px;
    box-shadow: 0 2px 4px rgba(0,0,0,0.05);
}

.context-toggle {
    color: #3498db;
    cursor: pointer;
    margin-top: 10px;
    display: inline-block;
}

.context-text {
    background: #f8f9fa;
    padding: 10px;
    border-radius: 5px;
    margin-top: 10px;
    font-size: 14px;
    border-left: 3px solid #95a5a6;
}
</style>
"""

# HTML Structure
html = """
<div class="qa-container">
    <div class="header">
        <h1>📚 Document Q&A System</h1>
        <p>Get answers from your uploaded documents</p>
    </div>

    <div class="subject-selector">
        <h3>Step 1: Select Subject</h3>
        <select id="subjectSelect" class="question-box">
            <option value="big_data">Big Data</option>
            <option value="computer_vision">Computer Vision</option>
        </select>
    </div>

    <div>
        <h3>Step 2: Ask a Question</h3>
        <textarea id="questionInput" class="question-box"
                  placeholder="Type your question here..."></textarea>
        <button id="submitBtn" class="submit-btn">Get Answer</button>
    </div>

    <div id="answerContainer"></div>
</div>

<script>
document.getElementById("submitBtn").onclick = function() {
    const question = document.getElementById("questionInput").value;
    const subject = document.getElementById("subjectSelect").value;

    if(!question) {
        alert("Please enter a question first!");
        return;
    }

    google.colab.kernel.invokeFunction('notebook.get_answer',
        [question, subject], {});
};
</script>
"""

display(HTML(css + html))

In [21]:
# @title 🧠 Q&A Backend Logic
import google.generativeai as genai
from sentence_transformers import SentenceTransformer
import faiss
import pickle
import numpy as np
import os

# Initialize models
embedding_model = SentenceTransformer("sentence-transformers/all-MiniLM-L6-v2")
gemini_model = genai.GenerativeModel('gemini-1.5-flash')

# Database paths
VECTOR_DB_PATH = "/content/drive/MyDrive/Educational_PDF1"

def get_answer(question, subject):
    try:
        # Load database
        faiss_index = faiss.read_index(f"{VECTOR_DB_PATH}/{subject}_faiss.index")
        with open(f"{VECTOR_DB_PATH}/{subject}_texts.pkl", "rb") as f:
            text_chunks = pickle.load(f)

        # Search documents
        query_vec = embedding_model.encode(question)
        _, indices = faiss_index.search(np.array([query_vec]), k=3)
        context = "\n\n".join([text_chunks[i] for i in indices[0] if i < len(text_chunks)])

        # Generate answer
        prompt = f"""Answer using ONLY this context:
        {context}

        Question: {question}

        Rules:
        1. Be concise (1-2 sentences)
        2. If unsure, say "Not covered in materials"
        3. Never invent information

        Answer:"""

        answer = gemini_model.generate_content(prompt).text

        # Format output
        display(HTML(f"""
        <div class="answer-card">
            <div style="font-weight:bold;">❓ Question: {question}</div>
            <div style="color:#2c3e50; margin:10px 0;">🔍 Answer: {answer}</div>
            <div class="context-toggle" onclick="this.nextElementSibling.style.display =
                this.nextElementSibling.style.display === 'none' ? 'block' : 'none'">
                ▼ Show supporting context
            </div>
            <div class="context-text" style="display:none;">
                {context[:500]}{'...' if len(context)>500 else ''}
            </div>
        </div>
        """))

    except Exception as e:
        display(HTML(f"""
        <div style="color:red; padding:10px; background:#ffeeee; border-radius:5px;">
            ⚠️ Error: {str(e)}
        </div>
        """))

# Register the function
from google.colab import output
output.register_callback('notebook.get_answer', get_answer)