In [5]:
import nltk
import numpy as np
import string
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# --- Step 1: Download NLTK resources (Run once) ---
nltk.download('punkt') # For tokenization
nltk.download('wordnet') # For lemmatization
nltk.download('omw-1.4')

# --- Step 2: The Dataset (Knowledge Base) ---
# In a real app, this would come from a CSV or Database
faqs = {
    "What is Python?": "Python is a high-level, interpreted programming language known for its simplicity.",
    "How do I install Python?": "You can download Python from the official website python.org.",
    "What are variables?": "Variables are containers for storing data values.",
    "How do I write a comment?": "In Python, you can write a comment by starting the line with a # symbol.",
    "Bye": "Goodbye! Have a great day coding."
}

# Separate questions and answers for processing
known_questions = list(faqs.keys())
answers = list(faqs.values())

# --- Step 3: Preprocessing Function ---
lemmatizer = nltk.stem.WordNetLemmatizer()

def preprocess_text(text):
    # 1. Lowercase the text
    text = text.lower()
    
    # 2. Remove punctuation
    text = "".join([char for char in text if char not in string.punctuation])
    
    # 3. Tokenization (splitting into list of words)
    tokens = nltk.word_tokenize(text)
    
    # 4. Lemmatization (converting words to base form, e.g., 'running' -> 'run')
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    
    return " ".join(tokens)

# Preprocess the known questions immediately
cleaned_known_questions = [preprocess_text(q) for q in known_questions]

# --- Step 4: The Chatbot Logic ---
def get_response(user_input):
    # Clean the user's input
    cleaned_input = preprocess_text(user_input)
    
    # Add user input to the list of questions to compare them all together
    all_texts = cleaned_known_questions + [cleaned_input]
    
    # Vectorization (Convert text to numbers using TF-IDF)
    # This matrix counts how important specific words are
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(all_texts)
    
    # Calculate Cosine Similarity
    # Compare the user's input (last item in matrix) with all known questions
    user_vector = tfidf_matrix[-1]
    question_vectors = tfidf_matrix[:-1]
    
    similarity_scores = cosine_similarity(user_vector, question_vectors)
    
    # Find the index of the highest score
    best_match_index = np.argmax(similarity_scores)
    best_score = similarity_scores[0][best_match_index]
    
    # Threshold: If similarity is too low, say "I don't understand"
    if best_score < 0.2: 
        return "I am sorry, I don't understand that question."
    else:
        return answers[best_match_index]

# --- Step 5: Simple Chat Interface ---
print("Chatbot: Hello! Ask me anything about Python. (Type 'exit' to quit)")

while True:
    user_text = input("You: ")
    if user_text.lower() == 'exit':
        print("Chatbot: Bye!")
        break
    
    response = get_response(user_text)
    print(f"Chatbot: {response}")

[nltk_data] Downloading package punkt to C:\Users\MR ENGINEER
[nltk_data]     SOLUTION\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to C:\Users\MR ENGINEER
[nltk_data]     SOLUTION\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\MR ENGINEER
[nltk_data]     SOLUTION\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


Chatbot: Hello! Ask me anything about Python. (Type 'exit' to quit)
Chatbot: I am sorry, I don't understand that question.
Chatbot: Python is a high-level, interpreted programming language known for its simplicity.
Chatbot: In Python, you can write a comment by starting the line with a # symbol.
Chatbot: Variables are containers for storing data values.
Chatbot: I am sorry, I don't understand that question.
Chatbot: I am sorry, I don't understand that question.
Chatbot: Bye!


In [3]:
import nltk
nltk.download('punkt_tab')

[nltk_data] Downloading package punkt_tab to C:\Users\MR ENGINEER
[nltk_data]     SOLUTION\AppData\Roaming\nltk_data...
[nltk_data]   Unzipping tokenizers\punkt_tab.zip.


True

In [4]:
# --- Step 1: Download NLTK resources (Run once) ---
import nltk

try:
    nltk.data.find('tokenizers/punkt_tab')
except LookupError:
    nltk.download('punkt_tab')

try:
    nltk.data.find('tokenizers/punkt')
except LookupError:
    nltk.download('punkt')

try:
    nltk.data.find('corpora/wordnet')
except LookupError:
    nltk.download('wordnet')
    
nltk.download('omw-1.4')

[nltk_data] Downloading package wordnet to C:\Users\MR ENGINEER
[nltk_data]     SOLUTION\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package omw-1.4 to C:\Users\MR ENGINEER
[nltk_data]     SOLUTION\AppData\Roaming\nltk_data...
[nltk_data]   Package omw-1.4 is already up-to-date!


True

In [3]:
import nltk
import numpy as np
import string
import tkinter as tk
from tkinter import scrolledtext
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

# --- Step 1: Download NLTK resources (Safe Method) ---
# This block handles the download safely to avoid lookup errors
resources = ['punkt_tab', 'punkt', 'wordnet', 'omw-1.4']
for resource in resources:
    try:
        nltk.data.find(f'tokenizers/{resource}')
    except LookupError:
        nltk.download(resource, quiet=True)

# --- Step 2: The Dataset (Knowledge Base) ---
faqs = {
    "What is Python?": "Python is a high-level, interpreted programming language known for its simplicity.",
    "How do I install Python?": "You can download Python from the official website python.org.",
    "What are variables?": "Variables are containers for storing data values.",
    "How do I write a comment?": "In Python, you can write a comment by starting the line with a # symbol.",
    "Bye": "Goodbye! Have a great day coding."
}

known_questions = list(faqs.keys())
answers = list(faqs.values())

# --- Step 3: Preprocessing Function ---
lemmatizer = nltk.stem.WordNetLemmatizer()

def preprocess_text(text):
    text = text.lower()
    text = "".join([char for char in text if char not in string.punctuation])
    tokens = nltk.word_tokenize(text)
    tokens = [lemmatizer.lemmatize(word) for word in tokens]
    return " ".join(tokens)

# Preprocess known questions once at startup
cleaned_known_questions = [preprocess_text(q) for q in known_questions]

# --- Step 4: The Chatbot Logic ---
def get_response(user_input):
    cleaned_input = preprocess_text(user_input)
    
    # Add user input to the database list temporarily
    all_texts = cleaned_known_questions + [cleaned_input]
    
    # Vectorize
    tfidf_vectorizer = TfidfVectorizer()
    tfidf_matrix = tfidf_vectorizer.fit_transform(all_texts)
    
    # Compare
    user_vector = tfidf_matrix[-1]
    question_vectors = tfidf_matrix[:-1]
    
    similarity_scores = cosine_similarity(user_vector, question_vectors)
    best_match_index = np.argmax(similarity_scores)
    best_score = similarity_scores[0][best_match_index]
    
    if best_score < 0.2: 
        return "I am sorry, I don't understand that question."
    else:
        return answers[best_match_index]

# --- Step 5: The GUI (Graphical User Interface) ---
def send_message():
    # 1. Get text from the input box
    user_text = entry_box.get()
    
    if user_text.strip() != "":
        # 2. Display User's message in the chat window
        chat_window.config(state=tk.NORMAL) # Unlock the window to write
        chat_window.insert(tk.END, "You: " + user_text + "\n")
        
        # 3. Get Bot Response
        bot_response = get_response(user_text)
        chat_window.insert(tk.END, "Bot: " + bot_response + "\n\n")
        
        # 4. Auto-scroll to the bottom and lock the window
        chat_window.see(tk.END)
        chat_window.config(state=tk.DISABLED)
        
        # 5. Clear the input box
        entry_box.delete(0, tk.END)

# Create the main window
root = tk.Tk()
root.title("Python FAQ Chatbot")
root.geometry("400x500")
root.resizable(width=False, height=False)

# Chat Window (Where messages appear)
chat_window = scrolledtext.ScrolledText(root, wrap=tk.WORD, width=50, height=20, state=tk.DISABLED)
chat_window.pack(padx=10, pady=10)

# Input Box (Where you type)
entry_box = tk.Entry(root, width=40)
entry_box.pack(padx=10, pady=5)

# Bind the 'Enter' key to send message
entry_box.bind("<Return>", lambda event: send_message())

# Send Button
send_button = tk.Button(root, text="Send", command=send_message, bg="#4CAF50", fg="white")
send_button.pack(pady=5)

# Start the GUI
root.mainloop()