In [1]:
import nltk
from nltk.stem import WordNetLemmatizer
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import random
import string

# Downloading necessary data for NLTK
nltk.download('punkt')
nltk.download('wordnet')

# Initialize the lemmatizer to reduce words to their base form
lemmatizer = WordNetLemmatizer()

# Predefined text that the chatbot uses to interact
chatbot_corpus = """
Hey there! I am here to assist you with various queries. You can ask me about general information, help with tasks, or just chat!
Feel free to ask anything or share what you're working on, and I'll be happy to help!
"""

# Helper function to clean and preprocess the text
def clean_input(text):
    text = text.lower()  # Normalize to lowercase
    tokens = nltk.word_tokenize(text)  # Tokenize the input
    cleaned_tokens = [lemmatizer.lemmatize(token) for token in tokens if token not in string.punctuation]  # Remove punctuation, lemmatize
    return cleaned_tokens

# Function to generate a chatbot response based on the input
def generate_response(user_input):
    user_input_cleaned = clean_input(user_input)
    input_vector = tfidf_vectorizer.transform([' '.join(user_input_cleaned)]).toarray()  # Transform input to vector
    
    # Calculate similarity between the input and the chatbot's predefined responses
    similarity_scores = cosine_similarity(input_vector, corpus_vectors)
    best_match_idx = np.argmax(similarity_scores)
    
    # If the similarity score passes a certain threshold, return the closest matching response
    if similarity_scores[0][best_match_idx] > 0.4:  # Set the threshold to 0.4
        return response_corpus[best_match_idx]
    else:
        return "Hmm, I'm not quite sure I understand. Could you rephrase that?"

# Breaking down the corpus into sentences
response_corpus = nltk.sent_tokenize(chatbot_corpus)

# Vectorizing the sentences using TF-IDF to give weight to relevant words
tfidf_vectorizer = TfidfVectorizer()
corpus_vectors = tfidf_vectorizer.fit_transform(response_corpus).toarray()

# Main loop to run the chatbot interaction
def chatbot():
    print("Chatbot: Hi! How can I assist you today? (Type 'exit' to end the conversation)")
    
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'exit':
            print("Chatbot: It was great chatting with you! Goodbye!")
            break
        else:
            response = generate_response(user_input)
            print(f"Chatbot: {response}")

# Start the chatbot
if __name__ == "__main__":
    chatbot()


[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\DELL\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


Chatbot: Hi! How can I assist you today? (Type 'exit' to end the conversation)
You: exit
Chatbot: It was great chatting with you! Goodbye!
