In [2]:
!pip install nltk



In [4]:
import nltk
import string
import numpy as np
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity

nltk.download('punkt')
nltk.download('stopwords')

from nltk.corpus import stopwords
from nltk.tokenize import word_tokenize

[nltk_data] Downloading package punkt to C:\Users\User/nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\User/nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


In [6]:
def preprocess(text):
    # Lowercase
    text = text.lower()
    # Remove punctuation
    text = text.translate(str.maketrans("", "", string.punctuation))
    # Tokenize
    tokens = word_tokenize(text)
    # Remove stopwords
    filtered_tokens = [word for word in tokens if word not in stopwords.words('english')]
    return " ".join(filtered_tokens)


In [8]:
questions = [
    "What is Artificial Intelligence?",
    "Define machine learning.",
    "What is deep learning?",
    "Explain supervised learning.",
    "Explain unsupervised learning.",
    "What is reinforcement learning?",
    "What are neural networks?",
    "What is natural language processing?"
]

answers = [
    "Artificial Intelligence is the simulation of human intelligence in machines.",
    "Machine learning is a subset of AI that enables systems to learn from data.",
    "Deep learning is a type of machine learning using neural networks with many layers.",
    "Supervised learning is a type of ML where the model is trained on labeled data.",
    "Unsupervised learning uses unlabeled data to find hidden patterns or groupings.",
    "Reinforcement learning is an area of ML where agents learn by interacting with their environment.",
    "Neural networks are computing systems inspired by the human brain's network of neurons.",
    "Natural language processing is a field of AI that focuses on the interaction between computers and human language."
]


In [11]:
preprocessed_questions = [preprocess(q) for q in questions]

vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(preprocessed_questions)


In [13]:
def get_answer(user_input):
    user_input_processed = preprocess(user_input)
    user_vec = vectorizer.transform([user_input_processed])
    similarities = cosine_similarity(user_vec, X)
    max_sim_index = np.argmax(similarities)
    max_score = similarities[0][max_sim_index]

    if max_score < 0.3:  # Optional: set a threshold to avoid irrelevant answers
        return "I'm sorry, I don't understand the question."
    return answers[max_sim_index]


In [15]:
print("Bot: Hello! Ask me a question about AI. (Type 'exit' to quit)")

while True:
    user_input = input("You: ")
    if user_input.lower() in ["exit", "quit"]:
        print("Bot: Goodbye!")
        break
    response = get_answer(user_input)
    print("Bot:", response)


Bot: Hello! Ask me a question about AI. (Type 'exit' to quit)


You:  What is Artificial Intelligence


Bot: Artificial Intelligence is the simulation of human intelligence in machines.


You:  exit


Bot: Goodbye!
