In [4]:
# 📌 Install required libraries
# TASK 2: Chatbot for FAQs
!pip install nltk scikit-learn

# 📌 Import libraries
import nltk
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')
nltk.download('wordnet')
nltk.download('punkt_tab') # Add this line to download the missing resource

from nltk.stem import WordNetLemmatizer

# 📌 Preprocessing Function
lemmatizer = WordNetLemmatizer()

def preprocess(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word.isalnum()]
    return " ".join(tokens)

# 📌 Example FAQ Dataset (You can expand this)
faqs = {
    "What is AI?": "AI stands for Artificial Intelligence. It enables machines to mimic human intelligence.",
    "What is NLP?": "NLP means Natural Language Processing. It helps computers understand human language.",
    "What is Machine Learning?": "Machine Learning is a subset of AI where systems learn patterns from data.",
    "How to contact support?": "You can reach support by emailing support@codealpha.tech.",
    "What is Python?": "Python is a popular programming language used in AI, data science, and web development.",
    "Who created ChatGPT?": "ChatGPT was developed by OpenAI."
}

# 📌 Preprocess FAQs
questions = list(faqs.keys())
answers = list(faqs.values())
preprocessed_questions = [preprocess(q) for q in questions]

# 📌 TF-IDF Vectorization
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(preprocessed_questions)

# 📌 Chatbot Function
def chatbot_response(user_query):
    user_query_processed = preprocess(user_query)
    user_vec = vectorizer.transform([user_query_processed])

    similarity = cosine_similarity(user_vec, X)
    idx = similarity.argmax()
    confidence = similarity[0][idx]

    if confidence > 0.2:  # Threshold
        return answers[idx]
    else:
        return "🤖 Sorry, I don't understand your question. Can you rephrase?"

# 📌 Interactive Loop (for testing in Colab)
print("Chatbot 🤖: Hello! Ask me anything (type 'bye' to exit)")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["bye", "exit", "quit"]:
        print("Chatbot 🤖: Goodbye!")
        break
    response = chatbot_response(user_input)
    print("Chatbot 🤖:", response)



[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to /root/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package punkt_tab to /root/nltk_data...
[nltk_data]   Package punkt_tab is already up-to-date!


Chatbot 🤖: Hello! Ask me anything (type 'bye' to exit)
You: what is ai?
Chatbot 🤖: AI stands for Artificial Intelligence. It enables machines to mimic human intelligence.
You: what is ml?
Chatbot 🤖: AI stands for Artificial Intelligence. It enables machines to mimic human intelligence.
You: what is nlp?
Chatbot 🤖: NLP means Natural Language Processing. It helps computers understand human language.
You: bye
Chatbot 🤖: Goodbye!
