In [1]:
import numpy as np
import pandas as pd

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity


In [2]:
questions = [
    "What is NLP?",
    "What is machine learning?",
    "What is deep learning?",
    "What is artificial intelligence?",
    "What is Python?",
    "What is data science?",
    "What is supervised learning?",
    "What is unsupervised learning?",
    "What is a neural network?",
    "What is TF-IDF?",
    "What is CountVectorizer?",
    "What is spam detection?",
    "What is sentiment analysis?",
    "What is word embedding?",
    "What is cosine similarity?"
]

answers = [
    "NLP stands for Natural Language Processing and deals with text and language data.",
    "Machine learning is a technique that allows systems to learn from data.",
    "Deep learning is a subset of machine learning using neural networks with many layers.",
    "Artificial Intelligence enables machines to mimic human intelligence.",
    "Python is a popular programming language used in data science and AI.",
    "Data science involves extracting insights from data using statistics and machine learning.",
    "Supervised learning uses labeled data to train models.",
    "Unsupervised learning finds patterns in unlabeled data.",
    "A neural network is a model inspired by the human brain.",
    "TF-IDF converts text into numerical vectors based on importance of words.",
    "CountVectorizer converts text into word frequency vectors.",
    "Spam detection identifies unwanted or fraudulent messages.",
    "Sentiment analysis determines emotional tone in text.",
    "Word embeddings represent words in numerical vector space.",
    "Cosine similarity measures similarity between two text vectors."
]


In [3]:
vectorizer = TfidfVectorizer()
X = vectorizer.fit_transform(questions)


In [4]:
def chatbot(user_query):
    query_vec = vectorizer.transform([user_query])
    similarity = cosine_similarity(query_vec, X)
    index = similarity.argmax()
    return answers[index]


In [5]:
chatbot("Explain NLP")


'NLP stands for Natural Language Processing and deals with text and language data.'

In [7]:
chatbot("What is artificial intelligence?")



'Artificial Intelligence enables machines to mimic human intelligence.'

In [8]:
chatbot("Tell me about TF IDF")

'TF-IDF converts text into numerical vectors based on importance of words.'

In [9]:
chatbot("Define supervised learning")

'Supervised learning uses labeled data to train models.'

## Task 3: Rule-Based FAQ Chatbot

A simple rule-based FAQ chatbot was developed using TF-IDF vectorization and cosine similarity.

- Predefined questions and answers were stored.
- User queries were converted into TF-IDF vectors.
- Cosine similarity was used to find the closest matching question.
- The corresponding answer was returned.

This chatbot demonstrates a basic NLP application without using deep learning models.
