In [1]:
import nltk
import random
import string
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

In [2]:
nltk.download('punkt')  
nltk.download('wordnet')

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\HP\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


True

In [3]:
# Step 1: Define Training Data (Chatbot Knowledge Base)
corpus = """Hello! How can I help you today? 
I am a chatbot created to assist with basic queries. 
You can ask me about general topics. 
I can help with programming, weather updates, and small talk.
If you need specific help, please specify your question.
I do not have access to live data, but I can give useful information. 
How are you today? 
Tell me about your favorite hobby.
Goodbye! Have a great day!
"""


In [4]:
# Step 2: Preprocess Text (Tokenization & Lemmatization)
sentence_tokens = nltk.sent_tokenize(corpus)  # Sentence-level tokenization
word_tokens = nltk.word_tokenize(corpus)  # Word-level tokenization

In [5]:
lemmatizer = nltk.WordNetLemmatizer()

In [6]:
def preprocess_text(text):
    tokens = nltk.word_tokenize(text.lower())  # Convert to lowercase & tokenize
    tokens = [lemmatizer.lemmatize(word) for word in tokens if word not in string.punctuation]  # Lemmatize words
    return " ".join(tokens)


In [7]:
# Step 3: Define a Function to Generate a Response
def chatbot_response(user_input):
    user_input = preprocess_text(user_input)
    sentence_tokens.append(user_input)  # Add user query to corpus

    # Convert text to numerical representation using TF-IDF
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform(sentence_tokens)
    
    # Compute similarity between user input and all sentences in corpus
    similarity_scores = cosine_similarity(tfidf_matrix[-1], tfidf_matrix[:-1])
    sentence_tokens.pop()  # Remove user input from corpus after processing

    # Find the best matching response
    response_idx = np.argmax(similarity_scores)  # Get the index of the highest similarity
    confidence = similarity_scores[0, response_idx]

    if confidence < 0.3:  # Set a threshold for relevance
        return "I'm sorry, I don't understand that."

    return sentence_tokens[response_idx]

# Step 4: Implement Chat Loop
print("Chatbot: Hello! Type 'bye' to exit.")

Chatbot: Hello! Type 'bye' to exit.


In [8]:
while True:
    user_input = input("You: ")
    if user_input.lower() in ['bye', 'exit', 'quit']:
        print("Chatbot: Goodbye! Have a great day!")
        break
    print("Chatbot:", chatbot_response(user_input))

You: hello
Chatbot: Hello!
You: tell me about
Chatbot: Tell me about your favorite hobby.
You: dancing
Chatbot: I'm sorry, I don't understand that.
You: exit
Chatbot: Goodbye! Have a great day!
