In [7]:
import nltk
import random
import string
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# Download necessary NLTK resources
nltk.download("punkt")
nltk.download("stopwords")
nltk.download("wordnet")

from nltk.corpus import stopwords
from nltk.stem import WordNetLemmatizer

lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words("english"))

# Predefined corpus (Questions and Answers)
corpus = {
    "hello": "Hello! How can I assist you?",
    "hi": "Hello! How can I assist you?",
    "how are you": "I'm fine, thank you! How about you?",
    "what is your name": "I am a chatbot created to help you.",
    "can you explain machine learning": "Sure! Machine learning is a field of AI that allows computers to learn from data.",
    "tell me a joke": "Why did the scarecrow win an award? Because he was outstanding in his field!",
    "goodbye": "Goodbye! Have a great day!",
    "what can you do": "I can answer general questions, help with AI concepts, and provide information."
}

# Preprocess text
def preprocess_input(text):
    tokens = nltk.word_tokenize(text.lower())
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in stop_words and word not in string.punctuation]
    return " ".join(tokens)

# Prepare corpus keys and responses
questions = list(corpus.keys())
responses = list(corpus.values())

# Vectorize corpus
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(questions)

# Chatbot response function
def chatbot_response(user_input):
    processed_input = preprocess_input(user_input)
    input_vector = vectorizer.transform([processed_input])

    # Compute similarity with all predefined questions
    similarity_scores = cosine_similarity(input_vector, X)
    best_match_index = np.argmax(similarity_scores)

    # If the similarity score is too low, return a default response
    if similarity_scores[0, best_match_index] < 0.3:
        return "I'm not sure how to respond to that. Can you rephrase?"

    return responses[best_match_index]

# Running the chatbot
print("Chatbot is ready! Type 'exit' to end.")
while True:
    user_input = input("You: ")
    if user_input.lower() == "exit":
        print("Chatbot: Goodbye! Have a great day!")
        break
    print("Chatbot:", chatbot_response(user_input))


[nltk_data] Downloading package punkt to
[nltk_data]     /Users/siddbohidar/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     /Users/siddbohidar/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/siddbohidar/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Chatbot is ready! Type 'exit' to end.


You:  Hi


Chatbot: Hello! How can I assist you?


You:  How are you?


Chatbot: I'm not sure how to respond to that. Can you rephrase?


You:  What is your name


Chatbot: I am a chatbot created to help you.


You:  tell me a joke


Chatbot: Why did the scarecrow win an award? Because he was outstanding in his field!


You:  what can you do"


Chatbot: I'm not sure how to respond to that. Can you rephrase?


You:  what can you do


Chatbot: I'm not sure how to respond to that. Can you rephrase?


You:  can you explain machine learning


Chatbot: Sure! Machine learning is a field of AI that allows computers to learn from data.


You:  goodbye


Chatbot: Goodbye! Have a great day!


You:  exit


Chatbot: Goodbye! Have a great day!
