In [1]:
!pip install -q -U google-generativeai langchain langchain-community langchain-google-genai chromadb unstructured

  DEPRECATION: Building 'langdetect' using the legacy setup.py bdist_wheel mechanism, which will be removed in a future version. pip 25.3 will enforce this behaviour change. A possible replacement is to use the standardized build interface by setting the `--use-pep517` option, (possibly combined with `--no-build-isolation`), or adding a `pyproject.toml` file to the source tree of 'langdetect'. Discussion can be found at https://github.com/pypa/pip/issues/6334
ERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
streamlit 1.48.0 requires requests<3,>=2.27, but you have requests 2.26.0 which is incompatible.


## 3. Load and Process Documents 📄

Now, let's load the documents containing information about Kalasalingam University admissions. This could be one or more PDF files, text files, or other document types.

We'll use `unstructured` to load the documents and then split them into smaller chunks for better processing by the language model.

In [None]:
import os

# Define the main data folder
folder_name = "KalasalingamData_2025"

# Create the folder if it doesn't already exist
if not os.path.exists(folder_name):
    os.makedirs(folder_name)
    print(f"✅ Folder '{folder_name}' created.")
else:
    print(f"Folder '{folder_name}' already exists.")

# --- Content for the data files ---

# File 1: Fee Structure
fee_content = """
# Kalasalingam University - Fee Structure 2025-2026

**B.Tech Courses (All Branches):**
- Tuition Fee per year: INR 1,60,000
- Caution Deposit (Refundable): INR 10,000
- Other Fees (Exam, Library, etc.): INR 15,000
- Total First Year Fee: INR 1,85,000

**Arts & Science (B.Sc / B.Com):**
- Tuition Fee per year: INR 50,000
- Total First Year Fee: INR 65,000

**Note:** Fees are subject to revision. Hostel and mess fees are separate.
"""

# File 2: B.Tech CSE Details
cse_details_content = """
# B.Tech - Computer Science and Engineering (CSE) Details 2025

**Duration:** 4 Years (8 Semesters)
**Eligibility:** A pass in 10+2 (or equivalent) with a minimum of 60% aggregate in Mathematics, Physics, and Chemistry.
**Mode of Admission:** Based on scores in KUEE (Kalasalingam University Entrance Exam) or JEE Main.

**Key Specializations Offered:**
- Artificial Intelligence and Machine Learning
- Cybersecurity
- Data Science
- Cloud Computing
"""

# File 3: Hostel Information
hostel_content = """
# Hostel Information 2025-2026

**Facilities:**
- Separate hostels for boys and girls.
- Both AC and Non-AC rooms are available.
- 24/7 Wi-Fi connectivity.
- In-house laundry service and recreational areas.

**Fees (per year):**
- Non-AC Room (3-person sharing): INR 65,000
- AC Room (3-person sharing): INR 90,000
- Mess Fee (Mandatory for all hostel residents): INR 45,000 per year.
"""

# File 4: Important Dates
# Using today's date (July 29, 2025) for context
dates_content = """
# Important Dates - Admissions 2025

**KUEE 2025 (Phase 2):**
- Last Date to Apply: August 10, 2025
- KUEE Online Entrance Exam: August 18, 2025
- Publication of Results: August 22, 2025

**Counseling & Admission:**
- Counseling for Phase 2: August 25 - August 28, 2025
- Last Date for Admission Fee Payment: September 5, 2025
- Classes for First Year Begin: September 15, 2025
"""

# File 5: Frequently Asked Questions (FAQ)
faq_content = """
# Frequently Asked Questions (FAQ) 2025

**Q: Is there a management quota for admission?**
A: For details regarding direct admission under management quota, please contact the admissions office directly at +91-XXXXX-XXXXX.

**Q: What is the cutoff for CSE based on KUEE rank?**
A: The cutoff varies each year. For Phase 1 admissions, the closing rank was around 2500. Phase 2 cutoffs will be determined after the exam.

**Q: Can I get an education loan?**
A: Yes, the university provides all necessary documentation for students to apply for education loans from nationalized and private banks.

**Q: What is the medium of instruction in the classroom?**
A: The medium of instruction for all engineering and science courses is English.
"""

# --- Dictionary of filenames and their content ---
files_to_create = {
    "01_fee_structure_2025.txt": fee_content,
    "02_btech_cse_details_2025.txt": cse_details_content,
    "03_hostel_information_2025.txt": hostel_content,
    "04_important_dates_2025.txt": dates_content,
    "05_common_questions_faq.txt": faq_content,
}

# --- Write the files to the directory ---
for filename, content in files_to_create.items():
    file_path = os.path.join(folder_name, filename)
    with open(file_path, "w", encoding="utf-8") as f:
        f.write(content.strip())
    print(f"- File '{filename}' created.")

print("\n✅ All data files have been generated successfully!")

Folder 'KalasalingamData_2025' already exists.
- File '01_fee_structure_2025.txt' created.
- File '02_btech_cse_details_2025.txt' created.
- File '03_hostel_information_2025.txt' created.
- File '04_important_dates_2025.txt' created.
- File '05_common_questions_faq.txt' created.

✅ All data files have been generated successfully!


In [None]:
!pip install -qU faiss-cpu

In [None]:
#@title Load, Chunk, and Index Your New Files
from langchain.document_loaders import DirectoryLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS

# Point to the folder with your newly created files
data_folder_path = "KalasalingamData_2025"

print("Loading documents from your folder...")
# Load all .txt files
loader = DirectoryLoader(data_folder_path, glob="**/*.txt", show_progress=True)
documents = loader.load()

print(f"Loaded {len(documents)} documents.")

print("\nSplitting documents into smaller chunks...")
text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=150)
docs = text_splitter.split_documents(documents)
print(f"Split into {len(docs)} chunks.")

print("\nCreating embeddings and indexing in FAISS vector store...")
# This model converts your text chunks into vectors for searching
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(model_name=model_name)

# Create the vector store (the AI's knowledge base)
vector_store = FAISS.from_documents(docs, embedding=embeddings)

print("\n✅ AI Knowledge Base is built and ready!")

In [None]:
#@title Load an Alternative LLM (TinyLlama) and Create QA Chain
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA

print("Loading the TinyLlama model... This is a smaller model and should load faster.")

# This is an open-access model that does not require login or agreements.
model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
)

# Create a pipeline to run the model
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=1024,
    temperature=0.7,
)

llm = HuggingFacePipeline(pipeline=pipe)

# Create the final QA chain that links the LLM with your data
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(search_kwargs={"k": 3}),
    return_source_documents=True
)

print("\n✅ TinyLlama model is loaded! You can now ask questions.")

In [None]:
#@title Update the AI with a Human-like Personality
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# --- This is our new set of instructions for the AI ---
prompt_template = """
You are a friendly and helpful admissions assistant for Kalasalingam University.
Use the following context to answer the question.
Answer directly and concisely, as if you were speaking to someone on the phone.
Do not say "Based on the context" or "The context provided shows". Just give the answer.
If you don't know the answer, politely say "I'm sorry, I don't have that specific information right now, but our admissions office can help."

Context: {context}

Question: {question}

Helpful Answer:"""

# --- We create a prompt object from our instructions ---
HUMAN_PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# --- Re-create the QA chain with our new human-like prompt ---
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": HUMAN_PROMPT} # This is the key change
)

print("✅ AI personality updated! It will now sound more human.")

In [None]:
#@title Update the AI with a Simpler, Direct Prompt
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate

# --- A simpler, more direct set of instructions ---
prompt_template = """
Use the context below to answer the question. Provide a short, direct answer.

Context: {context}

Question: {question}

Answer:"""

# --- We create a prompt object from our new instructions ---
SIMPLE_PROMPT = PromptTemplate(
    template=prompt_template, input_variables=["context", "question"]
)

# --- Re-create the QA chain with our new simple prompt ---
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(),
    return_source_documents=True,
    chain_type_kwargs={"prompt": SIMPLE_PROMPT} # Using the new simple prompt
)

print("✅ AI instructions have been simplified for more direct answers.")

In [None]:
#@title Start Interactive Chat
while True:
    query = input("\n🤔 Ask a question about Kalasalingam University admissions (or type 'exit'): ")
    if query.lower() == 'exit':
        break
    if query.strip() == '':
        continue

    # Get the answer from your AI
    result = qa_chain({"query": query})

    # Print the neat answer
    print("\n💬 Answer:")
    print(result['result'].strip())

In [None]:
#@title Save the Knowledge Base to Google Drive
print("Saving the knowledge base...")

# Define a path in your Google Drive to save the index
save_path = "/content/drive/MyDrive/Kalasalingam_VectorStore"

# Save the FAISS index
vector_store.save_local(save_path)

print(f"✅ Knowledge base saved successfully to {save_path}")

Saving the knowledge base...
✅ Knowledge base saved successfully to /content/drive/MyDrive/Kalasalingam_VectorStore
