In [11]:
import nltk
from nltk.tokenize import word_tokenize
from nltk.corpus import wordnet
from nltk.stem import WordNetLemmatizer
from nltk.corpus import stopwords
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
import string

# Download required NLTK data
nltk.download('punkt')
nltk.download('wordnet')
nltk.download('stopwords')

# Initialize lemmatizer and stopwords
lemmatizer = WordNetLemmatizer()
stop_words = set(stopwords.words('english'))

# Knowledge base for academic assistance
knowledge_base = {
    "math": {
        "keywords": ["math", "mathematics", "algebra", "calculus", "equation", "solve"],
        "questions": [
            ("how to solve a quadratic equation", "A quadratic equation ax² + bx + c = 0 is solved using the formula x = [-b ± √(b² - 4ac)] / 2a. For example, for x² + 3x + 2 = 0, a=1, b=3, c=2, so x = [-3 ± √(9 - 8)] / 2 = [-3 ± 1] / 2, giving x = -1 or x = -2."),
            ("what is a derivative", "A derivative represents the rate of change of a function. For f(x) = x², the derivative is f'(x) = 2x, found using the power rule: d/dx(x^n) = nx^(n-1)."),
            ("explain pythagorean theorem", "The Pythagorean theorem states that in a right triangle, a² + b² = c², where c is the hypotenuse. For example, if legs are 3 and 4, then c = √(3² + 4²) = √25 = 5.")
        ]
    },
    "science": {
        "keywords": ["science", "physics", "chemistry", "biology", "force", "atom", "cell"],
        "questions": [
            ("what is newton's first law", "Newton's First Law states that an object at rest stays at rest, and an object in motion stays in motion unless acted upon by an external force. For example, a book on a table remains stationary unless pushed."),
            ("what is photosynthesis", "Photosynthesis is the process by which plants use sunlight, water, and CO₂ to produce glucose and oxygen. The equation is 6CO₂ + 6H₂O → C₆H₁₂O₆ + 6O₂."),
            ("explain atomic structure", "An atom consists of a nucleus (protons and neutrons) and electrons orbiting it. For example, a carbon atom has 6 protons, 6 neutrons, and 6 electrons.")
        ]
    },
    "programming": {
        "keywords": ["programming", "code", "python", "java", "algorithm", "debug"],
        "questions": [
            ("how to write a python loop", "In Python, a for loop iterates over a sequence: `for i in range(5): print(i)` prints 0 to 4. A while loop runs until a condition is false: `x = 0; while x < 5: print(x); x += 1`."),
            ("what is a function in programming", "A function is a reusable block of code that performs a task. In Python, define it with `def`: e.g., `def add(a, b): return a + b` computes the sum of two numbers."),
            ("explain debugging", "Debugging is the process of finding and fixing errors in code. For example, in Python, use print statements or a debugger like pdb to trace variable values and identify issues.")
        ]
    }
}

# Preprocess text: tokenize, lemmatize, remove stopwords and punctuation
def preprocess_text(text):
    tokens = word_tokenize(text.lower())
    tokens = [token for token in tokens if token not in string.punctuation]
    tokens = [token for token in tokens if token not in stop_words]
    tokens = [lemmatizer.lemmatize(token) for token in tokens]
    return ' '.join(tokens)

# Get category based on keywords
def get_category(user_input):
    user_tokens = set(word_tokenize(user_input.lower()))
    max_overlap = 0
    best_category = None
    for category, data in knowledge_base.items():
        keywords = set(data["keywords"])
        overlap = len(user_tokens.intersection(keywords))
        if overlap > max_overlap:
            max_overlap = overlap
            best_category = category
    return best_category if max_overlap > 0 else None

# Find best matching question using TF-IDF and cosine similarity
def find_best_response(user_input, category):
    if not category:
        return "Sorry, I couldn't identify the topic. Please clarify or ask about math, science, or programming."
    
    questions = [q[0] for q in knowledge_base[category]["questions"]]
    responses = [q[1] for q in knowledge_base[category]["questions"]]
    
    # Preprocess user input and questions
    processed_input = preprocess_text(user_input)
    processed_questions = [preprocess_text(q) for q in questions]
    
    # Compute TF-IDF vectors
    vectorizer = TfidfVectorizer()
    tfidf_matrix = vectorizer.fit_transform([processed_input] + processed_questions)
    
    # Calculate cosine similarity
    similarities = cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:]).flatten()
    best_match_idx = np.argmax(similarities)
    
    # Return response if similarity is above threshold
    if similarities[best_match_idx] > 0.2:  # Adjustable threshold for ~90% accuracy
        return responses[best_match_idx]
    return f"I understood you're asking about {category}, but I need more details. Could you rephrase your question?"

# Main chatbot function
def chatbot():
    print("Welcome to the Academic Assistant Chatbot! Ask about math, science, or programming. Type 'quit' to exit.")
    while True:
        user_input = input("You: ")
        if user_input.lower() == 'quit':
            print("Goodbye! Happy learning!")
            break
        
        category = get_category(user_input)
        response = find_best_response(user_input, category)
        print(f"Bot: {response}")

# Run the chatbot
if __name__ == "__main__":
    chatbot()

[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package wordnet to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package wordnet is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


Welcome to the Academic Assistant Chatbot! Ask about math, science, or programming. Type 'quit' to exit.


You:  quit


Goodbye! Happy learning!
