In [1]:
!pip install pandas nltk scikit-learn




In [2]:
import pandas as pd

data = {
    "Question": [
        "How much is the admission fee?",
        "How can I apply for a hostel?",
        "When will exams start?",
        "What are the library timings?",
        "Who is the dean of the university?"
    ],
    "Answer": [
        "Admission fee is ₹5000.",
        "Fill the hostel form online at hostel.university.edu.",
        "Exams will begin in December as per the academic calendar.",
        "Library is open from 9 AM to 8 PM on weekdays.",
        "Dr. Meera Sharma is the dean of the university."
    ]
}

df = pd.DataFrame(data)
df.to_csv("faq_dataset.csv", index=False)
df.head()


Unnamed: 0,Question,Answer
0,How much is the admission fee?,Admission fee is ₹5000.
1,How can I apply for a hostel?,Fill the hostel form online at hostel.universi...
2,When will exams start?,Exams will begin in December as per the academ...
3,What are the library timings?,Library is open from 9 AM to 8 PM on weekdays.
4,Who is the dean of the university?,Dr. Meera Sharma is the dean of the university.


In [4]:
import pandas as pd
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer
import string

nltk.download('punkt', quiet=True)
nltk.download('stopwords', quiet=True)
nltk.download('wordnet', quiet=True)
nltk.download('punkt_tab', quiet=True) # Download the missing resource

data = pd.read_csv("faq_dataset.csv")

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

def preprocess(text):
    text = text.lower().translate(str.maketrans('', '', string.punctuation))
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(w) for w in tokens if w not in stop_words]
    return " ".join(tokens)

data['processed_question'] = data['Question'].apply(preprocess)

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(data['processed_question'])

def chatbot_response(query):
    query_processed = preprocess(query)
    query_vec = vectorizer.transform([query_processed])
    similarity = cosine_similarity(query_vec, X)
    index = similarity.argmax()
    score = similarity[0][index]
    if score < 0.2:
        return "I'm not sure about that. Please contact the university helpdesk."
    return data.iloc[index]['Answer']

In [5]:
while True:
    query = input("You: ")
    if query.lower() in ["exit", "quit", "bye"]:
        print("Chatbot: Goodbye!")
        break
    print("Chatbot:", chatbot_response(query))


You: how can i apply to hostel
Chatbot: Fill the hostel form online at hostel.university.edu.
You: bye
Chatbot: Goodbye!
