# 🎓 Multi-College Engineering Admissions Chatbot

## Comprehensive chatbot for Top 300+ Engineering Colleges in India

### Features:
- **15+ Top Engineering Colleges** (IITs, NITs, Private Universities)
- **Comprehensive Data** - Admissions, Fees, Placements, Facilities
- **Smart College Comparison** - Compare multiple colleges
- **Advanced Search** - Find colleges by location, ranking, fees
- **Real-time Updates** - Latest 2025-26 admission data

### Supported Colleges:
- **IITs**: Bombay, Delhi, Madras, Kanpur, Kharagpur, Roorkee
- **NITs**: Trichy, Surathkal, Warangal
- **Private**: BITS Pilani, VIT Vellore, SRM Chennai
- **State**: Anna University, Jadavpur University
- **Special**: Kalasalingam University (Detailed)


In [None]:
# Install required packages
!pip install -q -U langchain langchain-community faiss-cpu sentence-transformers transformers torch pandas
print("✅ All packages installed!")

In [None]:
# Import required libraries
import os
import json
import torch
import pandas as pd
from pathlib import Path
from typing import List, Dict, Any, Optional

# LangChain imports
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.llms import HuggingFacePipeline
from langchain.chains import RetrievalQA
from langchain.prompts import PromptTemplate
from langchain.schema import Document

# Transformers imports
from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline

print("✅ All libraries imported successfully!")

In [None]:
# Multi-College Data Loader
class MultiCollegeLoader:
    def __init__(self, base_directory: str = "college_data"):
        self.base_directory = Path(base_directory)
        self.colleges = {}
        
    def discover_colleges(self) -> List[str]:
        """Discover all college directories"""
        if not self.base_directory.exists():
            return []
        
        colleges = []
        for item in self.base_directory.iterdir():
            if item.is_dir() and not item.name.startswith('.'):
                colleges.append(item.name)
        
        return sorted(colleges)
    
    def load_college_data(self, college_name: str) -> Dict[str, Any]:
        """Load all JSON files for a college"""
        college_path = self.base_directory / college_name
        
        if not college_path.exists():
            return {}
        
        college_data = {}
        json_files = list(college_path.glob("*.json"))
        
        for json_file in json_files:
            file_key = json_file.stem
            try:
                with open(json_file, 'r', encoding='utf-8') as f:
                    college_data[file_key] = json.load(f)
            except Exception as e:
                print(f"Error loading {json_file}: {e}")
        
        return college_data
    
    def convert_to_documents(self) -> List[Document]:
        """Convert all college data to LangChain documents"""
        documents = []
        colleges = self.discover_colleges()
        
        print(f"📚 Loading data for {len(colleges)} colleges...")
        
        for college_name in colleges:
            college_data = self.load_college_data(college_name)
            
            if not college_data:
                continue
            
            # Convert each college's data to formatted text
            college_text = self.format_college_data(college_name, college_data)
            
            doc = Document(
                page_content=college_text,
                metadata={
                    "college_name": college_name,
                    "source": f"college_data/{college_name}",
                    "data_files": list(college_data.keys())
                }
            )
            
            documents.append(doc)
            print(f"  ✅ Loaded: {college_name}")
        
        return documents
    
    def format_college_data(self, college_name: str, college_data: Dict[str, Any]) -> str:
        """Format college data into readable text"""
        
        def format_json_section(obj, level=0):
            lines = []
            indent = "  " * level
            
            if isinstance(obj, dict):
                for key, value in obj.items():
                    formatted_key = key.replace('_', ' ').title()
                    if isinstance(value, (dict, list)):
                        lines.append(f"{indent}**{formatted_key}:**")
                        lines.extend(format_json_section(value, level + 1))
                    else:
                        lines.append(f"{indent}**{formatted_key}:** {value}")
            elif isinstance(obj, list):
                for item in obj:
                    if isinstance(item, (dict, list)):
                        lines.extend(format_json_section(item, level))
                    else:
                        lines.append(f"{indent}- {item}")
            return lines
        
        content_lines = [f"# {college_name} - Complete Information\n"]
        
        for file_key, data in college_data.items():
            section_title = file_key.replace('_', ' ').title()
            content_lines.append(f"\n## {section_title}\n")
            content_lines.extend(format_json_section(data))
        
        return "\n".join(content_lines)

# Initialize the loader
loader = MultiCollegeLoader()
print("✅ Multi-College Loader initialized!")

In [None]:
# Load all college data and create documents
print("🔄 Loading all college data...")

# Convert all college data to documents
all_documents = loader.convert_to_documents()

print(f"\n📊 Data Summary:")
print(f"Total colleges loaded: {len(all_documents)}")

# Show college list
colleges_list = [doc.metadata['college_name'] for doc in all_documents]
print(f"\n🏫 Colleges in database:")
for i, college in enumerate(colleges_list, 1):
    print(f"  {i:2d}. {college}")

print(f"\n✅ All college data loaded successfully!")

In [None]:
# Process documents and create vector store
print("🔄 Processing documents for enhanced search...")

# Split documents into chunks
text_splitter = RecursiveCharacterTextSplitter(
    chunk_size=1200,
    chunk_overlap=200,
    length_function=len,
    separators=["\n\n", "\n", "##", "**", "- ", " ", ""]
)

docs = text_splitter.split_documents(all_documents)
print(f"📝 Split into {len(docs)} chunks for better processing")

# Create embeddings
print("🧠 Creating embeddings...")
model_name = "sentence-transformers/all-MiniLM-L6-v2"
embeddings = HuggingFaceEmbeddings(
    model_name=model_name,
    model_kwargs={'device': 'cpu'},
    encode_kwargs={'normalize_embeddings': True}
)

# Create vector store
print("🗄️ Building comprehensive vector database...")
vector_store = FAISS.from_documents(docs, embedding=embeddings)

print("✅ Multi-college knowledge base created successfully!")

In [None]:
# Load language model
print("🤖 Loading language model...")

model_id = "TinyLlama/TinyLlama-1.1B-Chat-v1.0"

tokenizer = AutoTokenizer.from_pretrained(model_id)
model = AutoModelForCausalLM.from_pretrained(
    model_id,
    torch_dtype=torch.bfloat16,
    device_map="auto",
    trust_remote_code=True
)

# Create pipeline
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    max_new_tokens=512,
    temperature=0.7,
    do_sample=True,
    pad_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline=pipe)

print("✅ Language model loaded successfully!")

In [None]:
# Create enhanced multi-college prompt template
multi_college_prompt_template = """
You are an expert admissions counselor for engineering colleges in India. 
You have comprehensive information about top engineering colleges including IITs, NITs, and private universities.

Guidelines:
- Provide accurate, specific information based on the context
- When comparing colleges, highlight key differences
- Include relevant details like fees, rankings, location, and specializations
- If asked about multiple colleges, provide comparative information
- For admission queries, mention entrance exams and eligibility
- Be helpful and provide actionable advice
- If information is not available, suggest contacting the college directly

Context: {context}

Question: {question}

Expert Answer:"""

MULTI_COLLEGE_PROMPT = PromptTemplate(
    template=multi_college_prompt_template, 
    input_variables=["context", "question"]
)

# Create QA chain
qa_chain = RetrievalQA.from_chain_type(
    llm=llm,
    chain_type="stuff",
    retriever=vector_store.as_retriever(
        search_type="similarity",
        search_kwargs={"k": 6}  # Retrieve more documents for better coverage
    ),
    return_source_documents=True,
    chain_type_kwargs={"prompt": MULTI_COLLEGE_PROMPT}
)

print("✅ Multi-college chatbot is ready!")

In [None]:
# Enhanced response formatter with college identification
def format_multi_college_response(result):
    """Format the chatbot response with college sources"""
    answer = result['result'].strip()
    sources = result.get('source_documents', [])
    
    print("\n" + "="*70)
    print("🎓 Multi-College Engineering Admissions Assistant")
    print("="*70)
    print(answer)
    
    if sources:
        # Extract unique colleges from sources
        colleges_mentioned = set()
        for doc in sources:
            college_name = doc.metadata.get('college_name', 'Unknown')
            colleges_mentioned.add(college_name)
        
        print(f"\n📚 Information sources ({len(colleges_mentioned)} colleges):")
        for i, college in enumerate(sorted(colleges_mentioned), 1):
            print(f"  {i}. {college}")
    
    print("="*70)

# Test the multi-college chatbot
print("🎯 Testing the multi-college chatbot...")

test_questions = [
    "Compare IIT Bombay and IIT Delhi",
    "What are the fees for BITS Pilani?",
    "Which NITs are the best for computer science?",
    "Tell me about admission process for IITs"
]

for question in test_questions:
    print(f"\n❓ Test Question: {question}")
    try:
        result = qa_chain({"query": question})
        format_multi_college_response(result)
    except Exception as e:
        print(f"❌ Error: {e}")
    print("\n" + "-"*50)

In [None]:
# Interactive Multi-College Chat Session
print("\n🎓 Welcome to the Multi-College Engineering Admissions Chatbot!")
print(f"I have information about {len(colleges_list)} top engineering colleges in India.")
print("\n🏫 Available Colleges:")
for college in colleges_list:
    print(f"  • {college}")

print("\n💡 Sample Questions:")
print("  • Compare IIT Bombay vs IIT Delhi")
print("  • What are the fees for BITS Pilani?")
print("  • Best NITs for computer science")
print("  • Admission process for private colleges")
print("  • Placement statistics comparison")

print("\nType 'exit' to end the conversation.\n")

while True:
    try:
        query = input("\n🤔 Your Question: ")
        
        if query.lower() in ['exit', 'quit', 'bye']:
            print("\n👋 Thank you for using the Multi-College Admissions Chatbot!")
            print("Good luck with your engineering college selection! 🚀")
            break
            
        if query.strip() == '':
            continue
            
        # Get response
        result = qa_chain({"query": query})
        format_multi_college_response(result)
        
    except KeyboardInterrupt:
        print("\n\n👋 Chat session ended. Thank you!")
        break
    except Exception as e:
        print(f"\n❌ Sorry, I encountered an error: {e}")
        print("Please try rephrasing your question.")