In [1]:
import os
import google.generativeai as genai

# Configure Gemini API
genai.configure(api_key="AIzaSyChUzmOrRlZRCtmY7nv90suM86bcUj1z58")

# Function to chunk text into smaller pieces
def chunk_text(text, chunk_size=1000):
    words = text.split()
    return [" ".join(words[i:i+chunk_size]) for i in range(0, len(words), chunk_size)]

# Read and process each file in the dataset folder
dataset_folder = "dataset"
chunks = {}

for file in os.listdir(dataset_folder):
    if file.endswith(".txt"):
        with open(os.path.join(dataset_folder, file), "r", encoding="utf-8") as f:
            text = f.read()
        
        chunks[file] = chunk_text(text)

print("✅ Preprocessing Complete! Text chunks created successfully.")

# Save preprocessed chunks (optional)
import json

with open("dataset/preprocessed_chunks.json", "w", encoding="utf-8") as f:
    json.dump(chunks, f, ensure_ascii=False, indent=4)

print("✅ Preprocessed data saved as 'preprocessed_chunks.json'")


✅ Preprocessing Complete! Text chunks created successfully.
✅ Preprocessed data saved as 'preprocessed_chunks.json'


  from .autonotebook import tqdm as notebook_tqdm


In [2]:
import json

# Load preprocessed data
with open("dataset/preprocessed_chunks.json", "r", encoding="utf-8") as f:
    chunks = json.load(f)

# Clean data
cleaned_chunks = {}
for file, text_chunks in chunks.items():
    unique_chunks = list(set(chunk.strip() for chunk in text_chunks if len(chunk.strip()) > 10))  # Remove empty/short
    cleaned_chunks[file] = unique_chunks

# Save cleaned data
with open("dataset/cleaned_chunks.json", "w", encoding="utf-8") as f:
    json.dump(cleaned_chunks, f, ensure_ascii=False, indent=4)

print("✅ Preprocessed data cleaned and saved as 'cleaned_chunks.json'")


✅ Preprocessed data cleaned and saved as 'cleaned_chunks.json'


In [3]:
pip install faiss-cpu sentence-transformers google-generativeai


Note: you may need to restart the kernel to use updated packages.


In [4]:
import os
import json
import faiss
import numpy as np
from sentence_transformers import SentenceTransformer

# Load cleaned data
with open("dataset/cleaned_chunks.json", "r", encoding="utf-8") as f:
    cleaned_chunks = json.load(f)

# Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# Prepare data for FAISS
text_list = []
vector_list = []

for file, text_chunks in cleaned_chunks.items():
    for chunk in text_chunks:
        text_list.append(chunk)
        vector_list.append(model.encode(chunk))  # Convert text to embedding

# Convert to numpy array
vector_array = np.array(vector_list, dtype=np.float32)

# Initialize FAISS index
index = faiss.IndexFlatL2(vector_array.shape[1])
index.add(vector_array)  # Add vectors to FAISS

# Save FAISS index and text mappings
faiss.write_index(index, "dataset/faiss_index_cleaned.bin")

with open("dataset/text_mappings_cleaned.json", "w", encoding="utf-8") as f:
    json.dump(text_list, f, ensure_ascii=False, indent=4)

print("✅ FAISS index and cleaned text mappings saved!")


✅ FAISS index and cleaned text mappings saved!


In [6]:
import faiss
import numpy as np
import json
import os
from sentence_transformers import SentenceTransformer
import google.generativeai as genai

# ✅ Configure Gemini API with error handling
API_KEY = "AIzaSyChUzmOrRlZRCtmY7nv90suM86bcUj1z58"  # Replace with your actual API key
if not API_KEY or "AIza" not in API_KEY:
    raise ValueError("❌ Invalid API Key! Please provide a valid Gemini API key.")

genai.configure(api_key=API_KEY)

# ✅ Load FAISS index & text mappings
FAISS_INDEX_PATH = "dataset/faiss_index_cleaned.bin"
TEXT_MAPPING_PATH = "dataset/text_mappings_cleaned.json"

if not os.path.exists(FAISS_INDEX_PATH):
    raise FileNotFoundError(f"❌ FAISS index not found at {FAISS_INDEX_PATH}")

if not os.path.exists(TEXT_MAPPING_PATH):
    raise FileNotFoundError(f"❌ Text mapping file not found at {TEXT_MAPPING_PATH}")

index = faiss.read_index(FAISS_INDEX_PATH)

with open(TEXT_MAPPING_PATH, "r", encoding="utf-8") as f:
    text_list = json.load(f)

# ✅ Load embedding model
model = SentenceTransformer("all-MiniLM-L6-v2")

# ✅ Function to retrieve relevant text using FAISS
def retrieve_relevant_chunks(query, top_k=3):
    query_vector = np.array([model.encode(query)], dtype=np.float32)
    distances, indices = index.search(query_vector, top_k)
    
    return [text_list[i] for i in indices[0] if i < len(text_list)]  # Ensure valid indices

# ✅ Function to query Gemini API
def query_gemini(user_query):
    context = "\n\n".join(retrieve_relevant_chunks(user_query))  # Get relevant text
    
    prompt = f"Use the following context to answer:\n\n{context}\n\nUser Question: {user_query}"
    
    model = genai.GenerativeModel("gemini-1.5-pro-latest")
    response = model.generate_content(prompt)
    
    return response.text if response and hasattr(response, "text") else "❌ No response from Gemini API."

# ✅ Example query
user_question = "What are the courses offered at KSSEM?"
answer = query_gemini(user_question)

print("\nChatbot Response:", answer)



Chatbot Response: KSSEM offers the following courses:

* **Undergraduate:**
    * Mechanical Engineering
    * Civil Engineering
    * Electrical and Electronics Engineering
    * Computer Science and Engineering (CSE)
    * Computer Science and Business Systems

* **Postgraduate:**
    * Master of Technology (M.Tech.) in Computer Science and Engineering
    * Master of Technology (M.Tech.) in Structural Engineering
    * Master of Business Administration (MBA)



In [7]:
import google.generativeai as genai

genai.configure(api_key="AIzaSyChUzmOrRlZRCtmY7nv90suM86bcUj1z58")

models = genai.list_models()
for model in models:
    print(model.name)


models/chat-bison-001
models/text-bison-001
models/embedding-gecko-001
models/gemini-1.0-pro-vision-latest
models/gemini-pro-vision
models/gemini-1.5-pro-latest
models/gemini-1.5-pro-001
models/gemini-1.5-pro-002
models/gemini-1.5-pro
models/gemini-1.5-flash-latest
models/gemini-1.5-flash-001
models/gemini-1.5-flash-001-tuning
models/gemini-1.5-flash
models/gemini-1.5-flash-002
models/gemini-1.5-flash-8b
models/gemini-1.5-flash-8b-001
models/gemini-1.5-flash-8b-latest
models/gemini-1.5-flash-8b-exp-0827
models/gemini-1.5-flash-8b-exp-0924
models/gemini-2.0-flash-exp
models/gemini-2.0-flash
models/gemini-2.0-flash-001
models/gemini-2.0-flash-lite-001
models/gemini-2.0-flash-lite
models/gemini-2.0-flash-lite-preview-02-05
models/gemini-2.0-flash-lite-preview
models/gemini-2.0-pro-exp
models/gemini-2.0-pro-exp-02-05
models/gemini-exp-1206
models/gemini-2.0-flash-thinking-exp-01-21
models/gemini-2.0-flash-thinking-exp
models/gemini-2.0-flash-thinking-exp-1219
models/learnlm-1.5-pro-experim

In [9]:
print("FAISS Index Size:", index.ntotal)
print("Sample Text Chunk:", text_list[:2])  # Print first 2 chunks


FAISS Index Size: 12
Sample Text Chunk: ["1. Principal – K.S. School of Engineering and Management (KSSEM): Name: Dr. K. Rama Narasimha Position: Principal & Director, K.S. School of Engineering and Management Qualifications: Not publicly available Profile: Dr. K. Rama Narasimha is the Principal and Director of KSSEM. The institution has been shaping and molding professional engineers who serve society in various capacities. Under his leadership, KSSEM continues to focus on academic excellence, research, and holistic student development. Here are the Heads of Departments (HODs) at K.S. School of Engineering and Management (KSSEM): 1. Department of Mechanical Engineering Head: Dr. B. Balaji Qualifications: M.Tech., MISTE., MIE., Ph.D. Profile: Dr. Balaji has been leading the Mechanical Engineering Department since its inception in 2010. Under his guidance, the department offers both undergraduate and postgraduate programs, emphasizing research and practical application in areas like The

In [10]:
user_question = "What are the courses offered at KSSEM?"
retrieved_chunks = retrieve_relevant_chunks(user_question)

print("🔹 Retrieved Chunks:")
for i, chunk in enumerate(retrieved_chunks, 1):
    print(f"{i}. {chunk}")

answer = query_gemini(user_question)
print("\n🤖 Chatbot Response:", answer)


AttributeError: 'Model' object has no attribute 'encode'

In [11]:
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer

# Load FAISS index & text mappings
index = faiss.read_index("dataset/faiss_index_cleaned.bin")
with open("dataset/text_mappings_cleaned.json", "r", encoding="utf-8") as f:
    text_list = json.load(f)

# Load embedding model correctly
model = SentenceTransformer("all-MiniLM-L6-v2")

# Function to retrieve relevant chunks using FAISS
def retrieve_relevant_chunks(query, top_k=3):
    query_vector = np.array([model.encode(query)], dtype=np.float32)
    distances, indices = index.search(query_vector, top_k)
    return [text_list[i] for i in indices[0]]  # Return top-k chunks

# Function to query Gemini API (Ensure this is defined)
def query_gemini(user_query):
    context = "\n\n".join(retrieve_relevant_chunks(user_query))  # Get relevant text
    
    prompt = f"""You are an AI chatbot providing accurate information about an educational institution.  
    Use the context below to answer the user's question:  

    ### Context:  
    {context}  

    ### User Question:  
    {user_query}  

    ### Answer:"""

    model = genai.GenerativeModel("gemini-1.5-pro")
    response = model.generate_content(prompt)
    
    return response.text if response else "❌ I couldn't find an answer. Try rephrasing!"

# Test the function
user_question = "What are the courses offered at KSSEM?"
retrieved_chunks = retrieve_relevant_chunks(user_question)

print("🔹 Retrieved Chunks:")
for i, chunk in enumerate(retrieved_chunks, 1):
    print(f"{i}. {chunk}")

answer = query_gemini(user_question)
print("\n🤖 Chatbot Response:", answer)


🔹 Retrieved Chunks:
1. 1. Principal – K.S. School of Engineering and Management (KSSEM): Name: Dr. K. Rama Narasimha Position: Principal & Director, K.S. School of Engineering and Management Qualifications: Not publicly available Profile: Dr. K. Rama Narasimha is the Principal and Director of KSSEM. The institution has been shaping and molding professional engineers who serve society in various capacities. Under his leadership, KSSEM continues to focus on academic excellence, research, and holistic student development. Here are the Heads of Departments (HODs) at K.S. School of Engineering and Management (KSSEM): 1. Department of Mechanical Engineering Head: Dr. B. Balaji Qualifications: M.Tech., MISTE., MIE., Ph.D. Profile: Dr. Balaji has been leading the Mechanical Engineering Department since its inception in 2010. Under his guidance, the department offers both undergraduate and postgraduate programs, emphasizing research and practical application in areas like Thermal, Design, Manuf

In [12]:
import faiss
import numpy as np
import json
from sentence_transformers import SentenceTransformer

# Load FAISS index & text mappings
index = faiss.read_index("dataset/faiss_index_cleaned.bin")
with open("dataset/text_mappings_cleaned.json", "r", encoding="utf-8") as f:
    text_list = json.load(f)

# Load embedding model correctly
model = SentenceTransformer("all-MiniLM-L6-v2")

# Improved retrieval function
def retrieve_relevant_chunks(query, top_k=3):
    query_vector = np.array([model.encode(query)], dtype=np.float32)
    distances, indices = index.search(query_vector, top_k)
    
    retrieved_texts = [text_list[i] for i in indices[0]]

    # Keyword filtering to improve relevance
    keywords = ["course", "program", "degree", "B.E", "B.Tech", "M.Tech", "MBA"]
    filtered_texts = [text for text in retrieved_texts if any(kw in text.lower() for kw in keywords)]

    # If keyword filtering removes all, return original retrieved texts
    return filtered_texts if filtered_texts else retrieved_texts  

# Function to query Gemini API
def query_gemini(user_query):
    context = "\n\n".join(retrieve_relevant_chunks(user_query))  # Get relevant text
    
    prompt = f"""You are an AI chatbot providing accurate information about an educational institution.  
    Use the context below to answer the user's question:  

    ### Context:  
    {context}  

    ### User Question:  
    {user_query}  

    ### Answer:"""

    model = genai.GenerativeModel("gemini-1.5-pro")
    response = model.generate_content(prompt)
    
    return response.text if response else "❌ I couldn't find an answer. Try rephrasing!"

# Test the function
user_question = "What are the courses offered at KSSEM?"
retrieved_chunks = retrieve_relevant_chunks(user_question)

print("🔹 Retrieved Chunks:")
for i, chunk in enumerate(retrieved_chunks, 1):
    print(f"{i}. {chunk}")

answer = query_gemini(user_question)
print("\n🤖 Chatbot Response:", answer)


🔹 Retrieved Chunks:
1. 1. Principal – K.S. School of Engineering and Management (KSSEM): Name: Dr. K. Rama Narasimha Position: Principal & Director, K.S. School of Engineering and Management Qualifications: Not publicly available Profile: Dr. K. Rama Narasimha is the Principal and Director of KSSEM. The institution has been shaping and molding professional engineers who serve society in various capacities. Under his leadership, KSSEM continues to focus on academic excellence, research, and holistic student development. Here are the Heads of Departments (HODs) at K.S. School of Engineering and Management (KSSEM): 1. Department of Mechanical Engineering Head: Dr. B. Balaji Qualifications: M.Tech., MISTE., MIE., Ph.D. Profile: Dr. Balaji has been leading the Mechanical Engineering Department since its inception in 2010. Under his guidance, the department offers both undergraduate and postgraduate programs, emphasizing research and practical application in areas like Thermal, Design, Manuf

In [13]:
def query_gemini(user_query):
    retrieved_chunks = retrieve_relevant_chunks(user_query)  # Get relevant text
    context = "\n\n".join(retrieved_chunks)  # Convert list to string
    
    if not context:  # If no relevant context is found, handle it
        context = "No relevant information was found in the knowledge base."
    
    prompt = f"""You are an AI chatbot providing accurate information about an educational institution.  
    Use the context below to answer the user's question:  

    ### Context:  
    {context}  

    ### User Question:  
    {user_query}  

    ### Answer:"""
    
    model = genai.GenerativeModel("gemini-1.5-pro-latest")  # Choose a working model
    response = model.generate_content(prompt)
    
    return response.text if response and hasattr(response, "text") else "❌ No response from Gemini API."


In [14]:
user_question = "What are the courses offered at KSSEM?"
answer = query_gemini(user_question)
print("\nChatbot Response:", answer)



Chatbot Response: KSSEM offers the following courses:

*   **Undergraduate Programs:**
    *   Mechanical Engineering (B.E.)
    *   Civil Engineering (B.E.)
    *   Electrical and Electronics Engineering (B.E.)
    *   Computer Science and Engineering (B.E.)
    *   Computer Science and Business Systems (B.E.)

*   **Postgraduate Program:**
    *   Master of Business Administration (MBA)



In [15]:
user_question = "What is fullform of KSSEM?"
answer = query_gemini(user_question)
print("\nChatbot Response:", answer)


Chatbot Response: K.S. School of Engineering and Management



In [16]:
user_question = "Who is principal KSSEM?"
answer = query_gemini(user_question)
print("\nChatbot Response:", answer)


Chatbot Response: Dr. K. Rama Narasimha is the Principal & Director of K.S. School of Engineering and Management (KSSEM).



In [19]:
user_question = "Who is HOD of BE Computer science and engineering at KSSEM?"
answer = query_gemini(user_question)
print("\nChatbot Response:", answer)

ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).

In [18]:
user_question = "who established  KSSEM?"
answer = query_gemini(user_question)
print("\nChatbot Response:", answer)

ResourceExhausted: 429 Resource has been exhausted (e.g. check quota).

In [1]:
import faiss

index = faiss.read_index("dataset/faiss_index_cleaned.bin")
print("Index loaded successfully!")

Index loaded successfully!


In [2]:
import os
print("Current Directory:", os.getcwd())
print("Files in dataset folder:", os.listdir("dataset"))

Current Directory: c:\Users\Rakshitha.N\OneDrive\Desktop\paddu chatbot
Files in dataset folder: ['1. Principal – K.S. School of Engin.txt', 'basic questions college.txt', 'cleaned_chunks.json', 'courses.txt', 'faculty_list_updated.TXT', 'faiss_index_cleaned.bin', 'Here’s the information for each ins.txt', 'ksa.txt', 'kssem.txt', 'ks_polytechnic.txt', 'preprocessed_chunks.json', 'scraped_data.txt', 'scraper.py', 'text_mappings_cleaned.json', 'timings and result.txt', 'TIMINGS.txt']


In [4]:
import os

file_path = "dataset/faiss_index_cleaned.bin"

# Change file permissions to allow read/write access
os.chmod(file_path, 0o777)  # Equivalent to chmod 777 in bash

print("File permissions updated successfully!")

File permissions updated successfully!
